From bd1bf9d5de377be7a527393ad45452f020ed804d Mon Sep 17 00:00:00 2001 From: Michael Schellenberger Costa Date: Mon, 22 Jun 2026 11:51:51 +0200 Subject: [PATCH 1/3] [Tile] Disable tile mode for NVCC 13.3 (#9488) --- libcudacxx/include/cuda/std/__cccl/compiler.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/libcudacxx/include/cuda/std/__cccl/compiler.h b/libcudacxx/include/cuda/std/__cccl/compiler.h index f37dbec1d56..da41e2f18b0 100644 --- a/libcudacxx/include/cuda/std/__cccl/compiler.h +++ b/libcudacxx/include/cuda/std/__cccl/compiler.h @@ -111,12 +111,6 @@ # define _CCCL_CUDA_COMPILATION() 0 #endif // ^^^ not compiling .cu file ^^^ -#ifdef __CUDACC_TILE__ -# define _CCCL_TILE_COMPILATION() 1 -#else // ^^^ compiling .cu file in tile mode ^^^ / vvv not compiling in tile mode vvv -# define _CCCL_TILE_COMPILATION() 0 -#endif // ^^^ not compiling .cu file ^^^ - // The CUDA compiler version shares the implementation with the C++ compiler #define _CCCL_CUDA_COMPILER_MAKE_VERSION(_MAJOR, _MINOR) _CCCL_COMPILER_MAKE_VERSION(_MAJOR, _MINOR) #define _CCCL_CUDA_COMPILER(...) _CCCL_VERSION_COMPARE(_CCCL_CUDA_COMPILER_, _CCCL_CUDA_COMPILER_##__VA_ARGS__) @@ -157,6 +151,12 @@ # define _CCCL_DEVICE_COMPILATION() 0 #endif // ^^^ not compiling device code ^^^ +#if defined(__CUDACC_TILE__) && _CCCL_CUDA_COMPILER(NVCC, >, 13, 3) +# define _CCCL_TILE_COMPILATION() 1 +#else // ^^^ compiling .cu file in tile mode ^^^ / vvv not compiling in tile mode vvv +# define _CCCL_TILE_COMPILATION() 0 +#endif // ^^^ not compiling .cu file ^^^ + #define _CCCL_CUDACC_MAKE_VERSION(_MAJOR, _MINOR) ((_MAJOR) * 1000 + (_MINOR) * 10) // clang-cuda does not define __CUDACC_VER_MAJOR__ and friends. They are instead retrieved from the CUDA_VERSION macro From ebf0233c36a7e8705d6d6b69860bfbf140fc328f Mon Sep 17 00:00:00 2001 From: Michael Schellenberger Costa Date: Mon, 22 Jun 2026 11:52:33 +0200 Subject: [PATCH 2/3] [Tile] Mark alignment helpers as `_CCCL_HOST_DEVICE_API` (#9487) * [Tile] Mark alignment helpers as unsupported in tile mode Currently tile does not support `__builtin_assume_aligned` Previously we would just disable the whole codepath but with added support for `__builtin_is_constant_evaluated` it became active again. Rather than disabling it for all of CCCL with tile mode, we mark those functions that use the builtin as `_CCCL_HOST_DEVICE` * [Tile] Fix various tests * [Tile] Do not compile out device only function it is otherwise not visible --- libcudacxx/include/cuda/__memory/align_down.h | 2 +- libcudacxx/include/cuda/__memory/align_up.h | 2 +- libcudacxx/include/cuda/__memory/ptr_rebind.h | 8 ++++---- libcudacxx/include/cuda/std/__mdspan/aligned_accessor.h | 5 +++-- libcudacxx/include/cuda/std/__memory/align.h | 2 +- libcudacxx/include/cuda/std/__memory/assume_aligned.h | 2 +- .../include/cuda/std/__memory/runtime_assume_aligned.h | 3 ++- .../cuda/functional/proclaim_return_type.pass.cpp | 6 +++--- .../test/libcudacxx/cuda/memory/align_down.pass.cpp | 2 +- libcudacxx/test/libcudacxx/cuda/memory/align_up.pass.cpp | 2 +- .../test/libcudacxx/cuda/memory/ptr_rebind.pass.cpp | 2 +- .../cuda/utilities/expected/device_only_types.pass.cpp | 4 ++-- .../expected/expected.void/device_only_types.pass.cpp | 4 ++-- .../cuda/utilities/optional/device_only_types.pass.cpp | 8 ++++++++ .../cuda/utilities/tuple/device_only_types.pass.cpp | 8 ++++++++ .../cuda/utilities/unexpected/device_only_types.pass.cpp | 8 ++++++++ .../utilities/utility/pair/device_only_types.pass.cpp | 8 ++++++++ .../cuda/utilities/variant/device_only_types.pass.cpp | 8 ++++++++ .../libcxx/macros/extended_data_types.fp8.fail.cpp | 2 +- .../floating.point/device_fp128_functions.pass.cpp | 1 + .../alg.modifying/alg.copy/copy_backward.pass.cpp | 4 ++-- .../std/algorithms/alg.modifying/alg.copy/copy_n.pass.cpp | 4 ++-- .../algorithms/alg.modifying/alg.copy/copy_rand.pass.cpp | 4 ++-- .../integral/16b_integral_ref.pass.cpp | 3 +++ .../mdspan.aligned_accessor/aligned_accessor.pass.cpp | 4 ++++ .../std/utilities/memory/ptr.align/align.pass.cpp | 2 +- .../utilities/memory/ptr.align/assume_aligned.pass.cpp | 3 +++ .../utilities/memory/ptr.align/assume_aligned.runfail.cpp | 6 +++++- 28 files changed, 87 insertions(+), 30 deletions(-) diff --git a/libcudacxx/include/cuda/__memory/align_down.h b/libcudacxx/include/cuda/__memory/align_down.h index 1f64af55f38..a9a2501f0ba 100644 --- a/libcudacxx/include/cuda/__memory/align_down.h +++ b/libcudacxx/include/cuda/__memory/align_down.h @@ -42,7 +42,7 @@ _CCCL_BEGIN_NAMESPACE_CUDA template -[[nodiscard]] _CCCL_API _Tp* align_down(_Tp* __ptr, ::cuda::std::size_t __alignment) noexcept +[[nodiscard]] _CCCL_HOST_DEVICE_API _Tp* align_down(_Tp* __ptr, ::cuda::std::size_t __alignment) noexcept { using ::cuda::std::uintptr_t; _CCCL_ASSERT(::cuda::__is_valid_alignment<_Tp>(__alignment), "invalid alignment"); diff --git a/libcudacxx/include/cuda/__memory/align_up.h b/libcudacxx/include/cuda/__memory/align_up.h index 0a03965dd0a..81cc7fefba4 100644 --- a/libcudacxx/include/cuda/__memory/align_up.h +++ b/libcudacxx/include/cuda/__memory/align_up.h @@ -43,7 +43,7 @@ _CCCL_BEGIN_NAMESPACE_CUDA template -[[nodiscard]] _CCCL_API inline _Tp* align_up(_Tp* __ptr, ::cuda::std::size_t __alignment) noexcept +[[nodiscard]] _CCCL_HOST_DEVICE_API inline _Tp* align_up(_Tp* __ptr, ::cuda::std::size_t __alignment) noexcept { using ::cuda::std::uintptr_t; _CCCL_ASSERT(::cuda::__is_valid_alignment<_Tp>(__alignment), "invalid alignment"); diff --git a/libcudacxx/include/cuda/__memory/ptr_rebind.h b/libcudacxx/include/cuda/__memory/ptr_rebind.h index 3d7babc3029..f9e4e8ccf32 100644 --- a/libcudacxx/include/cuda/__memory/ptr_rebind.h +++ b/libcudacxx/include/cuda/__memory/ptr_rebind.h @@ -31,7 +31,7 @@ _CCCL_BEGIN_NAMESPACE_CUDA template -[[nodiscard]] _CCCL_API _Up* ptr_rebind(_Tp* __ptr) noexcept +[[nodiscard]] _CCCL_HOST_DEVICE_API _Up* ptr_rebind(_Tp* __ptr) noexcept { if constexpr (::cuda::std::is_same_v<_Up, _Tp>) // also handle _Tp == _Up == void { @@ -51,19 +51,19 @@ template } template -[[nodiscard]] _CCCL_API const _Up* ptr_rebind(const _Tp* __ptr) noexcept +[[nodiscard]] _CCCL_HOST_DEVICE_API const _Up* ptr_rebind(const _Tp* __ptr) noexcept { return ::cuda::ptr_rebind(const_cast<_Tp*>(__ptr)); } template -[[nodiscard]] _CCCL_API volatile _Up* ptr_rebind(volatile _Tp* __ptr) noexcept +[[nodiscard]] _CCCL_HOST_DEVICE_API volatile _Up* ptr_rebind(volatile _Tp* __ptr) noexcept { return ::cuda::ptr_rebind(const_cast<_Tp*>(__ptr)); } template -[[nodiscard]] _CCCL_API const volatile _Up* ptr_rebind(const volatile _Tp* __ptr) noexcept +[[nodiscard]] _CCCL_HOST_DEVICE_API const volatile _Up* ptr_rebind(const volatile _Tp* __ptr) noexcept { return ::cuda::ptr_rebind(const_cast<_Tp*>(__ptr)); } diff --git a/libcudacxx/include/cuda/std/__mdspan/aligned_accessor.h b/libcudacxx/include/cuda/std/__mdspan/aligned_accessor.h index 35cac951f07..8e9c329f6c3 100644 --- a/libcudacxx/include/cuda/std/__mdspan/aligned_accessor.h +++ b/libcudacxx/include/cuda/std/__mdspan/aligned_accessor.h @@ -77,12 +77,13 @@ class aligned_accessor return {}; } - _CCCL_API constexpr reference access(data_handle_type __p, size_t __i) const noexcept + _CCCL_HOST_DEVICE_API constexpr reference access(data_handle_type __p, size_t __i) const noexcept { return ::cuda::std::assume_aligned(__p)[__i]; } - _CCCL_API constexpr typename offset_policy::data_handle_type offset(data_handle_type __p, size_t __i) const noexcept + _CCCL_HOST_DEVICE_API constexpr typename offset_policy::data_handle_type + offset(data_handle_type __p, size_t __i) const noexcept { return ::cuda::std::assume_aligned(__p) + __i; } diff --git a/libcudacxx/include/cuda/std/__memory/align.h b/libcudacxx/include/cuda/std/__memory/align.h index 678a4a266f3..482cdc1f5ca 100644 --- a/libcudacxx/include/cuda/std/__memory/align.h +++ b/libcudacxx/include/cuda/std/__memory/align.h @@ -35,7 +35,7 @@ _CCCL_DIAG_SUPPRESS_MSVC(4146) // unary minus operator applied to unsigned type, _CCCL_BEGIN_NAMESPACE_CUDA_STD -_CCCL_API inline void* align(size_t __alignment, size_t __size, void*& __ptr, size_t& __space) +_CCCL_HOST_DEVICE_API inline void* align(size_t __alignment, size_t __size, void*& __ptr, size_t& __space) { _CCCL_ASSERT(::cuda::__is_valid_alignment(__alignment), "cuda::std::align: invalid alignment"); if (__space < __size) diff --git a/libcudacxx/include/cuda/std/__memory/assume_aligned.h b/libcudacxx/include/cuda/std/__memory/assume_aligned.h index b667ee8fed4..443eb47114b 100644 --- a/libcudacxx/include/cuda/std/__memory/assume_aligned.h +++ b/libcudacxx/include/cuda/std/__memory/assume_aligned.h @@ -32,7 +32,7 @@ _CCCL_BEGIN_NAMESPACE_CUDA_STD template -[[nodiscard]] _CCCL_API constexpr _Tp* assume_aligned(_Tp* __ptr) noexcept +[[nodiscard]] _CCCL_HOST_DEVICE_API constexpr _Tp* assume_aligned(_Tp* __ptr) noexcept { static_assert(::cuda::__is_valid_alignment<_Tp>(_Align), "invalid _Align value for _Tp"); #if !defined(_CCCL_BUILTIN_IS_CONSTANT_EVALUATED) diff --git a/libcudacxx/include/cuda/std/__memory/runtime_assume_aligned.h b/libcudacxx/include/cuda/std/__memory/runtime_assume_aligned.h index 191ecb7530d..edfe21baaf6 100644 --- a/libcudacxx/include/cuda/std/__memory/runtime_assume_aligned.h +++ b/libcudacxx/include/cuda/std/__memory/runtime_assume_aligned.h @@ -29,7 +29,8 @@ _CCCL_BEGIN_NAMESPACE_CUDA_STD template -[[nodiscard]] _CCCL_API _Tp* __runtime_assume_aligned(_Tp* __ptr, ::cuda::std::size_t __alignment) noexcept +[[nodiscard]] _CCCL_HOST_DEVICE_API _Tp* +__runtime_assume_aligned(_Tp* __ptr, [[maybe_unused]] ::cuda::std::size_t __alignment) noexcept { #if defined(_CCCL_BUILTIN_ASSUME_ALIGNED) using _Up = remove_volatile_t<_Tp>; diff --git a/libcudacxx/test/libcudacxx/cuda/functional/proclaim_return_type.pass.cpp b/libcudacxx/test/libcudacxx/cuda/functional/proclaim_return_type.pass.cpp index efc29310038..9d4213e0940 100644 --- a/libcudacxx/test/libcudacxx/cuda/functional/proclaim_return_type.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/functional/proclaim_return_type.pass.cpp @@ -20,7 +20,7 @@ TEST_FUNC void test_lambda_return_type() { // Ensure type can be queried from cuda::std::invoke_result_t # if _CCCL_TILE_COMPILATION() - auto d_lm = [] _CCCL_TILE() -> ReturnT { + auto d_lm = [] TEST_TILE_FUNC() -> ReturnT { return ReturnT{}; }; # else // ^^^ _CCCL_TILE_COMPILATION() ^^^ / vvv !_CCCL_TILE_COMPILATION() vvv @@ -96,11 +96,11 @@ struct h_callable struct d_callable { - TEST_DEVICE_FUNC int operator()() const& + TEST_TILE_DEVICE_FUNC int operator()() const& { return 42; } - TEST_DEVICE_FUNC int operator()() const&& + TEST_TILE_DEVICE_FUNC int operator()() const&& { return 42; } diff --git a/libcudacxx/test/libcudacxx/cuda/memory/align_down.pass.cpp b/libcudacxx/test/libcudacxx/cuda/memory/align_down.pass.cpp index 72d07e701fc..83c138dde9c 100644 --- a/libcudacxx/test/libcudacxx/cuda/memory/align_down.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/memory/align_down.pass.cpp @@ -9,7 +9,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: enable-tile -// error: asm statement is unsupported in tile code +// nvbug6327166: error: Internal Compiler Error (tile codegen): "call to unknown tile builtin function!" #include #include diff --git a/libcudacxx/test/libcudacxx/cuda/memory/align_up.pass.cpp b/libcudacxx/test/libcudacxx/cuda/memory/align_up.pass.cpp index cbd584ff946..b846cd6a0af 100644 --- a/libcudacxx/test/libcudacxx/cuda/memory/align_up.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/memory/align_up.pass.cpp @@ -9,7 +9,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: enable-tile -// error: asm statement is unsupported in tile code +// nvbug6327166: error: Internal Compiler Error (tile codegen): "call to unknown tile builtin function!" #include #include diff --git a/libcudacxx/test/libcudacxx/cuda/memory/ptr_rebind.pass.cpp b/libcudacxx/test/libcudacxx/cuda/memory/ptr_rebind.pass.cpp index 58af2302070..9be143a0f03 100644 --- a/libcudacxx/test/libcudacxx/cuda/memory/ptr_rebind.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/memory/ptr_rebind.pass.cpp @@ -9,7 +9,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: enable-tile -// error: asm statement is unsupported in tile code +// nvbug6327166: error: Internal Compiler Error (tile codegen): "call to unknown tile builtin function!" #include #include diff --git a/libcudacxx/test/libcudacxx/cuda/utilities/expected/device_only_types.pass.cpp b/libcudacxx/test/libcudacxx/cuda/utilities/expected/device_only_types.pass.cpp index d6615a90ae7..55d51543317 100644 --- a/libcudacxx/test/libcudacxx/cuda/utilities/expected/device_only_types.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/utilities/expected/device_only_types.pass.cpp @@ -17,7 +17,7 @@ #include "host_device_types.h" #include "test_macros.h" -#if _CCCL_DEVICE_COMPILATION() +#if _CCCL_TILE_COMPILATION() || _CCCL_DEVICE_COMPILATION() TEST_DEVICE_FUNC void test() { using expected = cuda::std::expected; @@ -193,7 +193,7 @@ TEST_DEVICE_FUNC void test() assert(rhs.error() == 1337); } } -#endif // _CCCL_DEVICE_COMPILATION() +#endif // _CCCL_TILE_COMPILATION() || _CCCL_DEVICE_COMPILATION() int main(int arg, char** argv) { diff --git a/libcudacxx/test/libcudacxx/cuda/utilities/expected/expected.void/device_only_types.pass.cpp b/libcudacxx/test/libcudacxx/cuda/utilities/expected/expected.void/device_only_types.pass.cpp index 133abb1e8d6..34b57f4b97f 100644 --- a/libcudacxx/test/libcudacxx/cuda/utilities/expected/expected.void/device_only_types.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/utilities/expected/expected.void/device_only_types.pass.cpp @@ -16,7 +16,7 @@ #include "host_device_types.h" #include "test_macros.h" -#if _CCCL_DEVICE_COMPILATION() +#if _CCCL_TILE_COMPILATION() || _CCCL_DEVICE_COMPILATION() TEST_DEVICE_FUNC void test() { using expected = cuda::std::expected; @@ -157,7 +157,7 @@ TEST_DEVICE_FUNC void test() assert(rhs.error() == 1337); } } -#endif // _CCCL_DEVICE_COMPILATION() +#endif // _CCCL_TILE_COMPILATION() || _CCCL_DEVICE_COMPILATION() int main(int arg, char** argv) { diff --git a/libcudacxx/test/libcudacxx/cuda/utilities/optional/device_only_types.pass.cpp b/libcudacxx/test/libcudacxx/cuda/utilities/optional/device_only_types.pass.cpp index e14e9119ffe..c0cedbcf43f 100644 --- a/libcudacxx/test/libcudacxx/cuda/utilities/optional/device_only_types.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/utilities/optional/device_only_types.pass.cpp @@ -17,6 +17,7 @@ #include "host_device_types.h" #include "test_macros.h" +#if _CCCL_TILE_COMPILATION() || _CCCL_DEVICE_COMPILATION() template TEST_DEVICE_FUNC void test() { @@ -138,6 +139,13 @@ TEST_DEVICE_FUNC void test() test(); test(); } +#endif // _CCCL_TILE_COMPILATION() || _CCCL_DEVICE_COMPILATION() + +#if _CCCL_TILE_COMPILATION() // cannot run main because its __tile_global__ +__global__ void test_kernel() +{ + test(); +} int main(int arg, char** argv) { diff --git a/libcudacxx/test/libcudacxx/cuda/utilities/tuple/device_only_types.pass.cpp b/libcudacxx/test/libcudacxx/cuda/utilities/tuple/device_only_types.pass.cpp index ae9389c121b..4c1d5992ad4 100644 --- a/libcudacxx/test/libcudacxx/cuda/utilities/tuple/device_only_types.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/utilities/tuple/device_only_types.pass.cpp @@ -13,6 +13,7 @@ #include "host_device_types.h" #include "test_macros.h" +#if _CCCL_TILE_COMPILATION() || _CCCL_DEVICE_COMPILATION() TEST_DEVICE_FUNC void test() { using tuple = cuda::std::tuple; @@ -73,6 +74,13 @@ TEST_DEVICE_FUNC void test() assert(cuda::std::get<0>(rhs) == 1337); } } +#endif // _CCCL_TILE_COMPILATION() || _CCCL_DEVICE_COMPILATION() + +#if _CCCL_TILE_COMPILATION() // cannot run main because its __tile_global__ +__global__ void test_kernel() +{ + test(); +} int main(int arg, char** argv) { diff --git a/libcudacxx/test/libcudacxx/cuda/utilities/unexpected/device_only_types.pass.cpp b/libcudacxx/test/libcudacxx/cuda/utilities/unexpected/device_only_types.pass.cpp index 89976d4fa45..50a9df393ce 100644 --- a/libcudacxx/test/libcudacxx/cuda/utilities/unexpected/device_only_types.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/utilities/unexpected/device_only_types.pass.cpp @@ -13,6 +13,7 @@ #include "host_device_types.h" #include "test_macros.h" +#if _CCCL_TILE_COMPILATION() || _CCCL_DEVICE_COMPILATION() TEST_DEVICE_FUNC void test() { using unexpected = cuda::std::unexpected; @@ -74,6 +75,13 @@ TEST_DEVICE_FUNC void test() assert(rhs.error() == 1337); } } +#endif // _CCCL_TILE_COMPILATION() || _CCCL_DEVICE_COMPILATION() + +#if _CCCL_TILE_COMPILATION() // cannot run main because its __tile_global__ +__global__ void test_kernel() +{ + test(); +} int main(int arg, char** argv) { diff --git a/libcudacxx/test/libcudacxx/cuda/utilities/utility/pair/device_only_types.pass.cpp b/libcudacxx/test/libcudacxx/cuda/utilities/utility/pair/device_only_types.pass.cpp index 23f6e9b1cfd..1a6f9413d7f 100644 --- a/libcudacxx/test/libcudacxx/cuda/utilities/utility/pair/device_only_types.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/utilities/utility/pair/device_only_types.pass.cpp @@ -15,6 +15,7 @@ #include "host_device_types.h" #include "test_macros.h" +#if _CCCL_TILE_COMPILATION() || _CCCL_DEVICE_COMPILATION() TEST_DEVICE_FUNC void test() { using pair = cuda::std::pair; @@ -85,6 +86,13 @@ TEST_DEVICE_FUNC void test() assert(rhs.second == 42); } } +#endif // _CCCL_TILE_COMPILATION() || _CCCL_DEVICE_COMPILATION() + +#if _CCCL_TILE_COMPILATION() // cannot run main because its __tile_global__ +__global__ void test_kernel() +{ + test(); +} int main(int arg, char** argv) { diff --git a/libcudacxx/test/libcudacxx/cuda/utilities/variant/device_only_types.pass.cpp b/libcudacxx/test/libcudacxx/cuda/utilities/variant/device_only_types.pass.cpp index fadfed0d211..e3444d6e965 100644 --- a/libcudacxx/test/libcudacxx/cuda/utilities/variant/device_only_types.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/utilities/variant/device_only_types.pass.cpp @@ -13,6 +13,7 @@ #include "host_device_types.h" #include "test_macros.h" +#if _CCCL_TILE_COMPILATION() || _CCCL_DEVICE_COMPILATION() TEST_DEVICE_FUNC void test() { using variant = cuda::std::variant; @@ -112,6 +113,13 @@ TEST_DEVICE_FUNC void test() assert(cuda::std::get<0>(rhs) == 1337); } } +#endif // _CCCL_TILE_COMPILATION() || _CCCL_DEVICE_COMPILATION() + +#if _CCCL_TILE_COMPILATION() // cannot run main because its __tile_global__ +__global__ void test_kernel() +{ + test(); +} int main(int arg, char** argv) { diff --git a/libcudacxx/test/libcudacxx/libcxx/macros/extended_data_types.fp8.fail.cpp b/libcudacxx/test/libcudacxx/libcxx/macros/extended_data_types.fp8.fail.cpp index 7418e0030b7..1e2638df086 100644 --- a/libcudacxx/test/libcudacxx/libcxx/macros/extended_data_types.fp8.fail.cpp +++ b/libcudacxx/test/libcudacxx/libcxx/macros/extended_data_types.fp8.fail.cpp @@ -16,7 +16,7 @@ int main(int, char**) { -#if !_CCCL_HAS_NVFP8() +#if !_CCCL_HAS_NVFP8() && !_CCCL_TILE_COMPILATION() auto x1 = __nv_fp8_e4m3(1.0f); unused(x1); #else diff --git a/libcudacxx/test/libcudacxx/libcxx/numerics/floating.point/device_fp128_functions.pass.cpp b/libcudacxx/test/libcudacxx/libcxx/numerics/floating.point/device_fp128_functions.pass.cpp index cc1fcb3e959..b86eb503546 100644 --- a/libcudacxx/test/libcudacxx/libcxx/numerics/floating.point/device_fp128_functions.pass.cpp +++ b/libcudacxx/test/libcudacxx/libcxx/numerics/floating.point/device_fp128_functions.pass.cpp @@ -10,6 +10,7 @@ // ADDITIONAL_COMPILE_OPTIONS_HOST: -fext-numeric-literals // ADDITIONAL_COMPILE_DEFINITIONS: CCCL_GCC_HAS_EXTENDED_NUMERIC_LITERALS +// UNSUPPORTED: enable-tile #include #include diff --git a/libcudacxx/test/libcudacxx/std/algorithms/alg.modifying/alg.copy/copy_backward.pass.cpp b/libcudacxx/test/libcudacxx/std/algorithms/alg.modifying/alg.copy/copy_backward.pass.cpp index 1fe744ba4f9..bc43d4e9dbe 100644 --- a/libcudacxx/test/libcudacxx/std/algorithms/alg.modifying/alg.copy/copy_backward.pass.cpp +++ b/libcudacxx/test/libcudacxx/std/algorithms/alg.modifying/alg.copy/copy_backward.pass.cpp @@ -174,9 +174,9 @@ TEST_CONSTEXPR_CXX20 TEST_FUNC bool test() #if !TEST_COMPILER(NVRTC) NV_IF_TARGET(NV_IS_HOST, (test>();)) #endif // !TEST_COMPILER(NVRTC) -#if TEST_CUDA_COMPILATION() +#if TEST_CUDA_COMPILATION() && !_CCCL_TILE_COMPILATION() NV_IF_TARGET(NV_IS_DEVICE, (test>();)) -#endif // TEST_CUDA_COMPILATION() +#endif // TEST_CUDA_COMPILATION() && !_CCCL_TILE_COMPILATION() return true; } diff --git a/libcudacxx/test/libcudacxx/std/algorithms/alg.modifying/alg.copy/copy_n.pass.cpp b/libcudacxx/test/libcudacxx/std/algorithms/alg.modifying/alg.copy/copy_n.pass.cpp index 1fb9fdc850e..e4bf0cc4056 100644 --- a/libcudacxx/test/libcudacxx/std/algorithms/alg.modifying/alg.copy/copy_n.pass.cpp +++ b/libcudacxx/test/libcudacxx/std/algorithms/alg.modifying/alg.copy/copy_n.pass.cpp @@ -99,9 +99,9 @@ TEST_CONSTEXPR_CXX20 TEST_FUNC bool test() #if !TEST_COMPILER(NVRTC) NV_IF_TARGET(NV_IS_HOST, (test>();)) #endif // !TEST_COMPILER(NVRTC) -#if TEST_CUDA_COMPILATION() +#if TEST_CUDA_COMPILATION() && !_CCCL_TILE_COMPILATION() NV_IF_TARGET(NV_IS_DEVICE, (test>();)) -#endif // TEST_CUDA_COMPILATION() +#endif // TEST_CUDA_COMPILATION() && !_CCCL_TILE_COMPILATION() return true; } diff --git a/libcudacxx/test/libcudacxx/std/algorithms/alg.modifying/alg.copy/copy_rand.pass.cpp b/libcudacxx/test/libcudacxx/std/algorithms/alg.modifying/alg.copy/copy_rand.pass.cpp index 68e9ce83e17..ab301e94a28 100644 --- a/libcudacxx/test/libcudacxx/std/algorithms/alg.modifying/alg.copy/copy_rand.pass.cpp +++ b/libcudacxx/test/libcudacxx/std/algorithms/alg.modifying/alg.copy/copy_rand.pass.cpp @@ -30,9 +30,9 @@ TEST_CONSTEXPR_CXX20 TEST_FUNC bool test() #if !TEST_COMPILER(NVRTC) NV_IF_TARGET(NV_IS_HOST, (test>();)) #endif // !TEST_COMPILER(NVRTC) -#if TEST_CUDA_COMPILATION() +#if TEST_CUDA_COMPILATION() && !_CCCL_TILE_COMPILATION() NV_IF_TARGET(NV_IS_DEVICE, (test>();)) -#endif // TEST_CUDA_COMPILATION() +#endif // TEST_CUDA_COMPILATION() && !_CCCL_TILE_COMPILATION() return true; } diff --git a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/16b_integral_ref.pass.cpp b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/16b_integral_ref.pass.cpp index 41ed18d02b2..f9d0cc4e6aa 100644 --- a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/16b_integral_ref.pass.cpp +++ b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/16b_integral_ref.pass.cpp @@ -10,6 +10,9 @@ // UNSUPPORTED: windows // UNSUPPORTED: aarch64-unknown-linux-gnu +// XFAIL: enable-tile +// error: asm statement is unsupported in tile code + // // template <> diff --git a/libcudacxx/test/libcudacxx/std/containers/views/mdspan/mdspan.aligned_accessor/aligned_accessor.pass.cpp b/libcudacxx/test/libcudacxx/std/containers/views/mdspan/mdspan.aligned_accessor/aligned_accessor.pass.cpp index ea044833157..bf14f156edb 100644 --- a/libcudacxx/test/libcudacxx/std/containers/views/mdspan/mdspan.aligned_accessor/aligned_accessor.pass.cpp +++ b/libcudacxx/test/libcudacxx/std/containers/views/mdspan/mdspan.aligned_accessor/aligned_accessor.pass.cpp @@ -6,6 +6,10 @@ // SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. // //===----------------------------------------------------------------------===// + +// UNSUPPORTED: enable-tile +// nvbug6327166: error: Internal Compiler Error (tile codegen): "call to unknown tile builtin function!" + #include #include #include diff --git a/libcudacxx/test/libcudacxx/std/utilities/memory/ptr.align/align.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/memory/ptr.align/align.pass.cpp index 09bade11941..6795b7c0cae 100644 --- a/libcudacxx/test/libcudacxx/std/utilities/memory/ptr.align/align.pass.cpp +++ b/libcudacxx/test/libcudacxx/std/utilities/memory/ptr.align/align.pass.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: enable-tile -// error: asm statement is unsupported in tile code +// nvbug6327166: error: Internal Compiler Error (tile codegen): "call to unknown tile builtin function!" // #include diff --git a/libcudacxx/test/libcudacxx/std/utilities/memory/ptr.align/assume_aligned.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/memory/ptr.align/assume_aligned.pass.cpp index 2d74151a5c8..ac9b9ab9339 100644 --- a/libcudacxx/test/libcudacxx/std/utilities/memory/ptr.align/assume_aligned.pass.cpp +++ b/libcudacxx/test/libcudacxx/std/utilities/memory/ptr.align/assume_aligned.pass.cpp @@ -7,6 +7,9 @@ // //===----------------------------------------------------------------------===// +// UNSUPPORTED: enable-tile +// nvbug6327166: error: Internal Compiler Error (tile codegen): "call to unknown tile builtin function!" + // #include // template diff --git a/libcudacxx/test/libcudacxx/std/utilities/memory/ptr.align/assume_aligned.runfail.cpp b/libcudacxx/test/libcudacxx/std/utilities/memory/ptr.align/assume_aligned.runfail.cpp index 225473736e7..10dfceabeef 100644 --- a/libcudacxx/test/libcudacxx/std/utilities/memory/ptr.align/assume_aligned.runfail.cpp +++ b/libcudacxx/test/libcudacxx/std/utilities/memory/ptr.align/assume_aligned.runfail.cpp @@ -6,7 +6,11 @@ // SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. // //===----------------------------------------------------------------------===// -#include "cuda/std/__memory/assume_aligned.h" + +// UNSUPPORTED: enable-tile +// nvbug6327166: error: Internal Compiler Error (tile codegen): "call to unknown tile builtin function!" + +#include #include From 990e7c85c2c19480771774c43c8912f1807c47ba Mon Sep 17 00:00:00 2001 From: Michael Schellenberger Costa Date: Wed, 10 Jun 2026 08:57:53 +0200 Subject: [PATCH 3/3] [Tile] Improve testing of `__tile__` and `__device__` only functions (#9313) We were accidentally always testing `__tile__ __device__` functions, which avoided some issues that have been fixed in the ToT compiler. We need to ensure that we properly test `__device__` only functions in a `__device__ __tile__` feature --- .../include/cuda/std/__cccl/visibility.h | 2 + .../device_mdspan/index_operator.pass.cpp | 54 +++-- .../mdspan/shared_mem_mdspan/assign.pass.cpp | 14 +- .../shared_mem_mdspan/conversion.pass.cpp | 26 +-- .../shared_mem_mdspan/ctor.copy.pass.cpp | 14 +- .../shared_mem_mdspan/ctor.default.pass.cpp | 16 +- .../shared_mem_mdspan/ctor.dh_array.pass.cpp | 24 +- .../ctor.dh_extents.pass.cpp | 16 +- .../ctor.dh_integers.pass.cpp | 16 +- .../shared_mem_mdspan/ctor.dh_map.pass.cpp | 16 +- .../ctor.dh_map_acc.pass.cpp | 14 +- .../shared_mem_mdspan/ctor.dh_span.pass.cpp | 24 +- .../shared_mem_mdspan/ctor.move.pass.cpp | 14 +- .../shared_mem_mdspan/index_operator.pass.cpp | 46 ++-- .../mdspan/shared_mem_mdspan/move.pass.cpp | 14 +- .../mdspan/shared_mem_mdspan/swap.pass.cpp | 4 +- .../expected/device_only_types.pass.cpp | 15 +- .../expected.void/device_only_types.pass.cpp | 15 +- .../expected.void/tile_only_types.pass.cpp | 171 +++++++++++++++ .../expected/tile_only_types.pass.cpp | 207 ++++++++++++++++++ .../optional/device_only_types.pass.cpp | 9 +- .../optional/tile_only_types.pass.cpp | 153 +++++++++++++ .../tuple/device_only_types.pass.cpp | 9 +- .../utilities/tuple/tile_only_types.pass.cpp | 88 ++++++++ .../unexpected/device_only_types.pass.cpp | 9 +- .../unexpected/tile_only_types.pass.cpp | 89 ++++++++ .../utility/pair/device_only_types.pass.cpp | 11 +- .../utility/pair/tile_only_types.pass.cpp | 98 +++++++++ .../variant/device_only_types.pass.cpp | 9 +- .../variant/tile_only_types.pass.cpp | 127 +++++++++++ .../arithmetic.operations/divides.pass.cpp | 23 +- .../arithmetic.operations/minus.pass.cpp | 23 +- .../arithmetic.operations/modulus.pass.cpp | 23 +- .../arithmetic.operations/multiplies.pass.cpp | 23 +- .../arithmetic.operations/negate.pass.cpp | 23 +- .../arithmetic.operations/plus.pass.cpp | 23 +- .../bitwise.operations/bit_and.pass.cpp | 23 +- .../bitwise.operations/bit_not.pass.cpp | 23 +- .../bitwise.operations/bit_or.pass.cpp | 23 +- .../bitwise.operations/bit_xor.pass.cpp | 23 +- .../comparisons/equal_to.pass.cpp | 19 +- .../comparisons/greater.pass.cpp | 19 +- .../comparisons/greater_equal.pass.cpp | 19 +- .../comparisons/less.pass.cpp | 19 +- .../comparisons/less_equal.pass.cpp | 19 +- .../comparisons/not_equal_to.pass.cpp | 19 +- .../func.invoke/invoke.pass.cpp | 32 +++ .../logical.operations/logical_and.pass.cpp | 23 +- .../logical.operations/logical_not.pass.cpp | 23 +- .../logical.operations/logical_or.pass.cpp | 23 +- .../negators/binary_negate.pass.cpp | 22 +- .../negators/unary_negate.pass.cpp | 21 +- .../time.traits.duration_values/max.pass.cpp | 3 - libcudacxx/test/support/host_device_types.h | 98 +++++++-- libcudacxx/test/support/test_macros.h | 6 +- 55 files changed, 1706 insertions(+), 213 deletions(-) create mode 100644 libcudacxx/test/libcudacxx/cuda/utilities/expected/expected.void/tile_only_types.pass.cpp create mode 100644 libcudacxx/test/libcudacxx/cuda/utilities/expected/tile_only_types.pass.cpp create mode 100644 libcudacxx/test/libcudacxx/cuda/utilities/optional/tile_only_types.pass.cpp create mode 100644 libcudacxx/test/libcudacxx/cuda/utilities/tuple/tile_only_types.pass.cpp create mode 100644 libcudacxx/test/libcudacxx/cuda/utilities/unexpected/tile_only_types.pass.cpp create mode 100644 libcudacxx/test/libcudacxx/cuda/utilities/utility/pair/tile_only_types.pass.cpp create mode 100644 libcudacxx/test/libcudacxx/cuda/utilities/variant/tile_only_types.pass.cpp diff --git a/libcudacxx/include/cuda/std/__cccl/visibility.h b/libcudacxx/include/cuda/std/__cccl/visibility.h index cfbf384d8ce..075a98130aa 100644 --- a/libcudacxx/include/cuda/std/__cccl/visibility.h +++ b/libcudacxx/include/cuda/std/__cccl/visibility.h @@ -114,11 +114,13 @@ # define _CCCL_HOST_DEVICE_API _CCCL_HOST_DEVICE # define _CCCL_HOST_API _CCCL_HOST # define _CCCL_DEVICE_API _CCCL_DEVICE +# define _CCCL_TILE_API _CCCL_TILE #else // ^^^ _CCCL_COMPILER(NVHPC) ^^^ / vvv !_CCCL_COMPILER(NVHPC) vvv # define _CCCL_API _CCCL_TILE _CCCL_HOST_DEVICE _CCCL_VISIBILITY_HIDDEN _CCCL_EXCLUDE_FROM_EXPLICIT_INSTANTIATION # define _CCCL_HOST_DEVICE_API _CCCL_HOST_DEVICE _CCCL_VISIBILITY_HIDDEN _CCCL_EXCLUDE_FROM_EXPLICIT_INSTANTIATION # define _CCCL_HOST_API _CCCL_HOST _CCCL_VISIBILITY_HIDDEN _CCCL_EXCLUDE_FROM_EXPLICIT_INSTANTIATION # define _CCCL_DEVICE_API _CCCL_DEVICE _CCCL_VISIBILITY_HIDDEN _CCCL_EXCLUDE_FROM_EXPLICIT_INSTANTIATION +# define _CCCL_TILE_API _CCCL_TILE _CCCL_VISIBILITY_HIDDEN _CCCL_EXCLUDE_FROM_EXPLICIT_INSTANTIATION #endif // !_CCCL_COMPILER(NVHPC) //! @brief \c _CCCL_NODEBUG_API marks a function's visibility as hidden and causes diff --git a/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/device_mdspan/index_operator.pass.cpp b/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/device_mdspan/index_operator.pass.cpp index 6c0e51ef52d..59c8643b79f 100644 --- a/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/device_mdspan/index_operator.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/device_mdspan/index_operator.pass.cpp @@ -40,30 +40,26 @@ TEST_DIAG_SUPPRESS_GCC("-Wcomma-subscript") #endif // TEST_COMPILER(GCC, >=, 10) template -TEST_DEVICE_FUNC constexpr auto& access(MDS mds, int64_t i0) +TEST_TILE_DEVICE_FUNC constexpr auto& access(MDS mds, int64_t i0) { return mds[i0]; } #if _CCCL_HAS_MULTIARG_OPERATOR_BRACKETS() -template < - class MDS, - class... Indices, - class = cuda::std::enable_if_t< - cuda::std::is_same_v()[cuda::std::declval()...]), typename MDS::reference>, - int> = 0> -TEST_DEVICE_FUNC constexpr bool check_operator_constraints(MDS m, Indices... idxs) +template + requires requires(MDS mds, Indices... indices) { mds[indices...]; } +TEST_TILE_DEVICE_FUNC constexpr bool check_operator_constraints(MDS m, Indices... idxs) { unused(m[idxs...]); return true; } #else // ^^^ _CCCL_HAS_MULTIARG_OPERATOR_BRACKETS() ^^^ / vvv !_CCCL_HAS_MULTIARG_OPERATOR_BRACKETS() vvv -template < - class MDS, - class Index, - class = cuda::std::enable_if_t()[cuda::std::declval()]), - typename MDS::reference>::value>> -TEST_DEVICE_FUNC constexpr bool check_operator_constraints(MDS m, Index idx) +template ()[cuda::std::declval()]), + typename MDS::reference>, + int> = 0> +TEST_TILE_DEVICE_FUNC constexpr bool check_operator_constraints(MDS m, Index idx) { unused(m[idx]); return true; @@ -71,29 +67,29 @@ TEST_DEVICE_FUNC constexpr bool check_operator_constraints(MDS m, Index idx) #endif // !_CCCL_HAS_MULTIARG_OPERATOR_BRACKETS() template -TEST_DEVICE_FUNC constexpr bool check_operator_constraints(MDS, Indices...) +TEST_TILE_DEVICE_FUNC constexpr bool check_operator_constraints(MDS, Indices...) { return false; } #if _CCCL_HAS_MULTIARG_OPERATOR_BRACKETS() template -TEST_DEVICE_FUNC constexpr auto& access(MDS mds) +TEST_TILE_DEVICE_FUNC constexpr auto& access(MDS mds) { return mds[]; } template -TEST_DEVICE_FUNC constexpr auto& access(MDS mds, int64_t i0, int64_t i1) +TEST_TILE_DEVICE_FUNC constexpr auto& access(MDS mds, int64_t i0, int64_t i1) { return mds[i0, i1]; } template -TEST_DEVICE_FUNC constexpr auto& access(MDS mds, int64_t i0, int64_t i1, int64_t i2) +TEST_TILE_DEVICE_FUNC constexpr auto& access(MDS mds, int64_t i0, int64_t i1, int64_t i2) { return mds[i0, i1, i2]; } template -TEST_DEVICE_FUNC constexpr auto& access(MDS mds, int64_t i0, int64_t i1, int64_t i2, int64_t i3) +TEST_TILE_DEVICE_FUNC constexpr auto& access(MDS mds, int64_t i0, int64_t i1, int64_t i2, int64_t i3) { return mds[i0, i1, i2, i3]; } @@ -101,7 +97,7 @@ TEST_DEVICE_FUNC constexpr auto& access(MDS mds, int64_t i0, int64_t i1, int64_t // We must ensure that we do not try to access multiarg accessors template = 0> -TEST_DEVICE_FUNC constexpr void assert_access(MDS mds, Arg arg) +TEST_TILE_DEVICE_FUNC constexpr void assert_access(MDS mds, Arg arg) { int* ptr1 = &(mds.accessor().access(mds.data_handle(), mds.mapping()(arg))); int* ptr2 = &access(mds, arg); @@ -109,7 +105,7 @@ TEST_DEVICE_FUNC constexpr void assert_access(MDS mds, Arg arg) } template = 0> -TEST_DEVICE_FUNC constexpr void assert_access(MDS mds, Args... args) +TEST_TILE_DEVICE_FUNC constexpr void assert_access(MDS mds, Args... args) { #if _CCCL_HAS_MULTIARG_OPERATOR_BRACKETS() int* ptr1 = &(mds.accessor().access(mds.data_handle(), mds.mapping()(args...))); @@ -121,7 +117,7 @@ TEST_DEVICE_FUNC constexpr void assert_access(MDS mds, Args... args) } template = 0> -TEST_DEVICE_FUNC constexpr void iterate(MDS mds, Args... args) +TEST_TILE_DEVICE_FUNC constexpr void iterate(MDS mds, Args... args) { int* ptr1 = &(mds.accessor().access(mds.data_handle(), mds.mapping()(args...))); assert_access(mds, args...); @@ -134,7 +130,7 @@ TEST_DEVICE_FUNC constexpr void iterate(MDS mds, Args... args) } template = 0> -TEST_DEVICE_FUNC constexpr void iterate(MDS mds, Args... args) +TEST_TILE_DEVICE_FUNC constexpr void iterate(MDS mds, Args... args) { constexpr int r = static_cast(MDS::extents_type::rank()) - 1 - static_cast(sizeof...(Args)); for (typename MDS::index_type i = 0; i < mds.extents().extent(r); i++) @@ -144,7 +140,7 @@ TEST_DEVICE_FUNC constexpr void iterate(MDS mds, Args... args) } template -TEST_DEVICE_FUNC constexpr void test_iteration(Mapping m) +TEST_TILE_DEVICE_FUNC constexpr void test_iteration(Mapping m) { cuda::std::array data{}; using MDS = cuda::device_mdspan; @@ -153,7 +149,7 @@ TEST_DEVICE_FUNC constexpr void test_iteration(Mapping m) } template -TEST_DEVICE_FUNC constexpr void test_layout() +TEST_TILE_DEVICE_FUNC constexpr void test_layout() { [[maybe_unused]] constexpr size_t D = cuda::std::dynamic_extent; test_iteration(construct_mapping(Layout(), cuda::std::extents(1))); @@ -300,7 +296,7 @@ TEST_DEVICE_FUNC constexpr void test_layout() } template -TEST_DEVICE_FUNC constexpr void test_layout_large() +TEST_TILE_DEVICE_FUNC constexpr void test_layout_large() { [[maybe_unused]] constexpr size_t D = cuda::std::dynamic_extent; test_iteration(construct_mapping(Layout(), cuda::std::extents(3, 5, 6))); @@ -309,9 +305,9 @@ TEST_DEVICE_FUNC constexpr void test_layout_large() // mdspan::operator[] casts to index_type before calling mapping // mapping requirements only require the index operator to mixed integer types not anything convertible to index_type -TEST_DEVICE_FUNC constexpr void test_index_cast_happens() {} +TEST_TILE_DEVICE_FUNC constexpr void test_index_cast_happens() {} -TEST_DEVICE_FUNC constexpr bool test() +TEST_TILE_DEVICE_FUNC constexpr bool test() { test_layout(); test_layout(); @@ -319,7 +315,7 @@ TEST_DEVICE_FUNC constexpr bool test() return true; } -TEST_DEVICE_FUNC void test_device() +TEST_TILE_DEVICE_FUNC void test_device() { test(); } diff --git a/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/shared_mem_mdspan/assign.pass.cpp b/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/shared_mem_mdspan/assign.pass.cpp index 3de61196a68..d6a6bbff31c 100644 --- a/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/shared_mem_mdspan/assign.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/shared_mem_mdspan/assign.pass.cpp @@ -22,7 +22,7 @@ #include "test_macros.h" template -TEST_DEVICE_FUNC constexpr void test_mdspan_types(const H& handle, const M& map, const A& acc) +TEST_TILE_DEVICE_FUNC constexpr void test_mdspan_types(const H& handle, const M& map, const A& acc) { using MDS = cuda::shared_memory_mdspan; @@ -45,7 +45,7 @@ TEST_DEVICE_FUNC constexpr void test_mdspan_types(const H& handle, const M& map, } template -TEST_DEVICE_FUNC constexpr void mixin_extents(const H& handle, const L& layout, const A& acc) +TEST_TILE_DEVICE_FUNC constexpr void mixin_extents(const H& handle, const L& layout, const A& acc) { [[maybe_unused]] constexpr size_t D = cuda::std::dynamic_extent; test_mdspan_types(handle, construct_mapping(layout, cuda::std::extents()), acc); @@ -57,7 +57,7 @@ TEST_DEVICE_FUNC constexpr void mixin_extents(const H& handle, const L& layout, } template -TEST_DEVICE_FUNC constexpr void mixin_layout(const H& handle, const A& acc) +TEST_TILE_DEVICE_FUNC constexpr void mixin_layout(const H& handle, const A& acc) { // make sure we test a trivially assignable mapping static_assert(cuda::std::is_trivially_assignable_v< @@ -73,7 +73,7 @@ TEST_DEVICE_FUNC constexpr void mixin_layout(const H& handle, const A& acc) } template , int> = 0> -TEST_DEVICE_FUNC constexpr void mixin_accessor() +TEST_TILE_DEVICE_FUNC constexpr void mixin_accessor() { cuda::std::array elements{42}; // make sure we test trivially constructible accessor and data_handle @@ -83,7 +83,7 @@ TEST_DEVICE_FUNC constexpr void mixin_accessor() } template , int> = 0> -TEST_DEVICE_FUNC void mixin_accessor() +TEST_TILE_DEVICE_FUNC void mixin_accessor() { ElementPool elements; // make sure we test trivially constructible accessor and data_handle @@ -92,7 +92,7 @@ TEST_DEVICE_FUNC void mixin_accessor() mixin_layout(elements.get_ptr(), cuda::std::default_accessor()); } -TEST_DEVICE_FUNC void test() +TEST_TILE_DEVICE_FUNC void test() { mixin_accessor(); mixin_accessor(); @@ -100,7 +100,7 @@ TEST_DEVICE_FUNC void test() mixin_accessor(); } -TEST_DEVICE_FUNC void test_evil() +TEST_TILE_DEVICE_FUNC void test_evil() { mixin_accessor(); mixin_accessor(); diff --git a/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/shared_mem_mdspan/conversion.pass.cpp b/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/shared_mem_mdspan/conversion.pass.cpp index 03c2d093512..00d3c18f82a 100644 --- a/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/shared_mem_mdspan/conversion.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/shared_mem_mdspan/conversion.pass.cpp @@ -49,7 +49,7 @@ #include "test_macros.h" template -TEST_DEVICE_FUNC constexpr void test_implicit_conversion(ToMDS to_mds, FromMDS from_mds) +TEST_TILE_DEVICE_FUNC constexpr void test_implicit_conversion(ToMDS to_mds, FromMDS from_mds) { assert(to_mds.extents() == from_mds.extents()); test_equality_with_handle(to_mds, from_mds); @@ -68,7 +68,7 @@ template = 0> -TEST_DEVICE_FUNC constexpr void test_conversion_impl(FromMDS) +TEST_TILE_DEVICE_FUNC constexpr void test_conversion_impl(FromMDS) { static_assert(!cuda::std::is_constructible_v); } @@ -79,7 +79,7 @@ template = 0, cuda::std::enable_if_t = 0> -TEST_DEVICE_FUNC constexpr void test_conversion_impl(FromMDS) +TEST_TILE_DEVICE_FUNC constexpr void test_conversion_impl(FromMDS) {} template = 0, cuda::std::enable_if_t = 0, cuda::std::enable_if_t = 0> -TEST_DEVICE_FUNC constexpr void test_conversion_impl(FromMDS from_mds) +TEST_TILE_DEVICE_FUNC constexpr void test_conversion_impl(FromMDS from_mds) { ToMDS to_mds(from_mds); assert(to_mds.extents() == from_mds.extents()); @@ -106,7 +106,7 @@ template = 0, cuda::std::enable_if_t = 0, cuda::std::enable_if_t = 0> -TEST_DEVICE_FUNC constexpr void test_conversion_impl(FromMDS from_mds) +TEST_TILE_DEVICE_FUNC constexpr void test_conversion_impl(FromMDS from_mds) { ToMDS to_mds(from_mds); assert(to_mds.extents() == from_mds.extents()); @@ -117,7 +117,7 @@ TEST_DEVICE_FUNC constexpr void test_conversion_impl(FromMDS from_mds) } template -TEST_DEVICE_FUNC constexpr void test_conversion(FromMDS from_mds) +TEST_TILE_DEVICE_FUNC constexpr void test_conversion(FromMDS from_mds) { // check some requirements, to see we didn't screw up our test layouts/accessors static_assert(cuda::std::copyable); @@ -142,7 +142,7 @@ TEST_DEVICE_FUNC constexpr void test_conversion(FromMDS from_mds) } template -TEST_DEVICE_FUNC constexpr void +TEST_TILE_DEVICE_FUNC constexpr void construct_from_mds(const FromH& handle, const FromL& layout, const FromExt& exts, const FromA& acc) { using ToMDS = cuda::shared_memory_mdspan; @@ -151,7 +151,7 @@ construct_from_mds(const FromH& handle, const FromL& layout, const FromExt& exts } template -TEST_DEVICE_FUNC constexpr void mixin_extents(const FromH& handle, const FromL& layout, const FromA& acc) +TEST_TILE_DEVICE_FUNC constexpr void mixin_extents(const FromH& handle, const FromL& layout, const FromA& acc) { [[maybe_unused]] constexpr size_t D = cuda::std::dynamic_extent; // constructible and convertible @@ -178,7 +178,7 @@ TEST_DEVICE_FUNC constexpr void mixin_extents(const FromH& handle, const FromL& } template -TEST_DEVICE_FUNC constexpr void mixin_layout(const FromH& handle, const FromA& acc) +TEST_TILE_DEVICE_FUNC constexpr void mixin_layout(const FromH& handle, const FromA& acc) { mixin_extents(handle, cuda::std::layout_left(), acc); mixin_extents(handle, cuda::std::layout_right(), acc); @@ -214,7 +214,7 @@ template && !cuda::std::is_same_v, int> = 0> -TEST_DEVICE_FUNC constexpr void test_impl(FromA from_acc) +TEST_TILE_DEVICE_FUNC constexpr void test_impl(FromA from_acc) { cuda::std::array elements = {42}; mixin_layout(typename FromA::data_handle_type(elements.data()), from_acc); @@ -225,7 +225,7 @@ template || cuda::std::is_same_v, int> = 0> -TEST_DEVICE_FUNC void test_impl(FromA from_acc) +TEST_TILE_DEVICE_FUNC void test_impl(FromA from_acc) { ElementPool elements; mixin_layout(typename FromA::data_handle_type(elements.get_ptr()), from_acc); @@ -241,7 +241,7 @@ template -TEST_DEVICE_FUNC void test(FromA from_acc) +TEST_TILE_DEVICE_FUNC void test(FromA from_acc) { static_assert(cuda::std::copyable); static_assert(cuda::std::copyable); @@ -261,7 +261,7 @@ TEST_DEVICE_FUNC void test(FromA from_acc) test_impl(from_acc); } -TEST_DEVICE_FUNC void run_conversion_tests() +TEST_TILE_DEVICE_FUNC void run_conversion_tests() { // using shorthands here: t and o for better visual distinguishability constexpr bool t = true; diff --git a/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/shared_mem_mdspan/ctor.copy.pass.cpp b/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/shared_mem_mdspan/ctor.copy.pass.cpp index 257b569b3f6..20538dd311a 100644 --- a/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/shared_mem_mdspan/ctor.copy.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/shared_mem_mdspan/ctor.copy.pass.cpp @@ -26,7 +26,7 @@ #include "test_macros.h" template -TEST_DEVICE_FUNC constexpr void test_mdspan_types(const H& handle, const M& map, const A& acc) +TEST_TILE_DEVICE_FUNC constexpr void test_mdspan_types(const H& handle, const M& map, const A& acc) { using MDS = cuda::shared_memory_mdspan; @@ -44,7 +44,7 @@ TEST_DEVICE_FUNC constexpr void test_mdspan_types(const H& handle, const M& map, } template -TEST_DEVICE_FUNC constexpr void mixin_extents(const H& handle, const L& layout, const A& acc) +TEST_TILE_DEVICE_FUNC constexpr void mixin_extents(const H& handle, const L& layout, const A& acc) { [[maybe_unused]] constexpr size_t D = cuda::std::dynamic_extent; test_mdspan_types(handle, construct_mapping(layout, cuda::std::extents()), acc); @@ -56,7 +56,7 @@ TEST_DEVICE_FUNC constexpr void mixin_extents(const H& handle, const L& layout, } template -TEST_DEVICE_FUNC constexpr void mixin_layout(const H& handle, const A& acc) +TEST_TILE_DEVICE_FUNC constexpr void mixin_layout(const H& handle, const A& acc) { // make sure we test a trivially copyable mapping static_assert( @@ -70,7 +70,7 @@ TEST_DEVICE_FUNC constexpr void mixin_layout(const H& handle, const A& acc) } template , int> = 0> -TEST_DEVICE_FUNC constexpr void mixin_accessor() +TEST_TILE_DEVICE_FUNC constexpr void mixin_accessor() { cuda::std::array elements{42}; // make sure we test trivially constructible accessor and data_handle @@ -80,7 +80,7 @@ TEST_DEVICE_FUNC constexpr void mixin_accessor() } template , int> = 0> -TEST_DEVICE_FUNC void mixin_accessor() +TEST_TILE_DEVICE_FUNC void mixin_accessor() { ElementPool elements; // make sure we test trivially constructible accessor and data_handle @@ -89,7 +89,7 @@ TEST_DEVICE_FUNC void mixin_accessor() mixin_layout(elements.get_ptr(), cuda::std::default_accessor()); } -TEST_DEVICE_FUNC void test() +TEST_TILE_DEVICE_FUNC void test() { mixin_accessor(); mixin_accessor(); @@ -97,7 +97,7 @@ TEST_DEVICE_FUNC void test() mixin_accessor(); } -TEST_DEVICE_FUNC void test_evil() +TEST_TILE_DEVICE_FUNC void test_evil() { mixin_accessor(); mixin_accessor(); diff --git a/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/shared_mem_mdspan/ctor.default.pass.cpp b/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/shared_mem_mdspan/ctor.default.pass.cpp index 95d26f75511..b502fef6b88 100644 --- a/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/shared_mem_mdspan/ctor.default.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/shared_mem_mdspan/ctor.default.pass.cpp @@ -39,7 +39,7 @@ template 0) && hc && mc && ac, int> = 0> -TEST_DEVICE_FUNC constexpr void test_mdspan_types(const H&, const M&, const A&) +TEST_TILE_DEVICE_FUNC constexpr void test_mdspan_types(const H&, const M&, const A&) { using MDS = cuda::shared_memory_mdspan; @@ -64,7 +64,7 @@ template 0) && hc && mc && ac), int> = 0> -TEST_DEVICE_FUNC constexpr void test_mdspan_types(const H&, const M&, const A&) +TEST_TILE_DEVICE_FUNC constexpr void test_mdspan_types(const H&, const M&, const A&) { using MDS = cuda::shared_memory_mdspan; @@ -76,7 +76,7 @@ TEST_DEVICE_FUNC constexpr void test_mdspan_types(const H&, const M&, const A&) } template -TEST_DEVICE_FUNC constexpr void mixin_extents(const H& handle, const L& layout, const A& acc) +TEST_TILE_DEVICE_FUNC constexpr void mixin_extents(const H& handle, const L& layout, const A& acc) { [[maybe_unused]] constexpr size_t D = cuda::std::dynamic_extent; test_mdspan_types(handle, construct_mapping(layout, cuda::std::extents()), acc); @@ -89,7 +89,7 @@ TEST_DEVICE_FUNC constexpr void mixin_extents(const H& handle, const L& layout, } template -TEST_DEVICE_FUNC constexpr void mixin_layout(const H& handle, const A& acc) +TEST_TILE_DEVICE_FUNC constexpr void mixin_layout(const H& handle, const A& acc) { mixin_extents(handle, cuda::std::layout_left(), acc); mixin_extents(handle, cuda::std::layout_right(), acc); @@ -102,20 +102,20 @@ TEST_DEVICE_FUNC constexpr void mixin_layout(const H& handle, const A& acc) } template , int> = 0> -TEST_DEVICE_FUNC constexpr void mixin_accessor() +TEST_TILE_DEVICE_FUNC constexpr void mixin_accessor() { cuda::std::array elements{42}; mixin_layout(elements.data(), cuda::std::default_accessor()); } template , int> = 0> -TEST_DEVICE_FUNC void mixin_accessor() +TEST_TILE_DEVICE_FUNC void mixin_accessor() { ElementPool elements; mixin_layout(elements.get_ptr(), cuda::std::default_accessor()); } -TEST_DEVICE_FUNC void test() +TEST_TILE_DEVICE_FUNC void test() { mixin_accessor(); mixin_accessor(); @@ -123,7 +123,7 @@ TEST_DEVICE_FUNC void test() mixin_accessor(); } -TEST_DEVICE_FUNC void test_evil() +TEST_TILE_DEVICE_FUNC void test_evil() { mixin_accessor(); mixin_accessor(); diff --git a/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/shared_mem_mdspan/ctor.dh_array.pass.cpp b/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/shared_mem_mdspan/ctor.dh_array.pass.cpp index 5eca0399d8e..d8fee204439 100644 --- a/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/shared_mem_mdspan/ctor.dh_array.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/shared_mem_mdspan/ctor.dh_array.pass.cpp @@ -42,7 +42,7 @@ #include "test_macros.h" template -TEST_DEVICE_FUNC constexpr auto array_from_extents(const Extents& exts, cuda::std::index_sequence) +TEST_TILE_DEVICE_FUNC constexpr auto array_from_extents(const Extents& exts, cuda::std::index_sequence) { return cuda::std::array{exts.extent(Idxs)...}; } @@ -60,7 +60,7 @@ template _CCCL_CONCEPT check_mdspan_ctor_implicit = decltype(check_implicit_construction_impl(0))::value; template -TEST_DEVICE_FUNC constexpr void +TEST_TILE_DEVICE_FUNC constexpr void test_mdspan_ctor_array(const H& handle, const M& map, const A&, cuda::std::array exts) { using MDS = @@ -80,7 +80,7 @@ test_mdspan_ctor_array(const H& handle, const M& map, const A&, cuda::std::array } template 0), int> = 0> -TEST_DEVICE_FUNC constexpr cuda::std::array +TEST_TILE_DEVICE_FUNC constexpr cuda::std::array get_exts_dynamic(const cuda::std::array& exts) { cuda::std::array exts_dynamic{}; @@ -95,14 +95,14 @@ get_exts_dynamic(const cuda::std::array = 0> -TEST_DEVICE_FUNC constexpr cuda::std::array +TEST_TILE_DEVICE_FUNC constexpr cuda::std::array get_exts_dynamic(const cuda::std::array&) { return cuda::std::array{}; } template = 0> -TEST_DEVICE_FUNC constexpr void test_mdspan_ctor(const H& handle, const M& map, const A& acc) +TEST_TILE_DEVICE_FUNC constexpr void test_mdspan_ctor(const H& handle, const M& map, const A& acc) { using MDS = cuda::shared_memory_mdspan; @@ -117,7 +117,7 @@ TEST_DEVICE_FUNC constexpr void test_mdspan_ctor(const H& handle, const M& map, test_mdspan_ctor_array(handle, map, acc, exts_dynamic); } template = 0> -TEST_DEVICE_FUNC constexpr void test_mdspan_ctor(const H& handle, const M& map, const A& acc) +TEST_TILE_DEVICE_FUNC constexpr void test_mdspan_ctor(const H& handle, const M& map, const A& acc) { using MDS = cuda::shared_memory_mdspan; @@ -128,7 +128,7 @@ TEST_DEVICE_FUNC constexpr void test_mdspan_ctor(const H& handle, const M& map, } template -TEST_DEVICE_FUNC constexpr void mixin_extents(const H& handle, const L& layout, const A& acc) +TEST_TILE_DEVICE_FUNC constexpr void mixin_extents(const H& handle, const L& layout, const A& acc) { [[maybe_unused]] constexpr size_t D = cuda::std::dynamic_extent; test_mdspan_ctor(handle, construct_mapping(layout, cuda::std::extents()), acc); @@ -141,7 +141,7 @@ TEST_DEVICE_FUNC constexpr void mixin_extents(const H& handle, const L& layout, } template -TEST_DEVICE_FUNC constexpr void mixin_layout(const H& handle, const A& acc) +TEST_TILE_DEVICE_FUNC constexpr void mixin_layout(const H& handle, const A& acc) { mixin_extents(handle, cuda::std::layout_left(), acc); mixin_extents(handle, cuda::std::layout_right(), acc); @@ -165,20 +165,20 @@ TEST_DEVICE_FUNC constexpr void mixin_layout(const H& handle, const A& acc) } template , int> = 0> -TEST_DEVICE_FUNC constexpr void mixin_accessor() +TEST_TILE_DEVICE_FUNC constexpr void mixin_accessor() { cuda::std::array elements{42}; mixin_layout(elements.data(), cuda::std::default_accessor()); } template , int> = 0> -TEST_DEVICE_FUNC void mixin_accessor() +TEST_TILE_DEVICE_FUNC void mixin_accessor() { ElementPool elements; mixin_layout(elements.get_ptr(), cuda::std::default_accessor()); } -TEST_DEVICE_FUNC void test() +TEST_TILE_DEVICE_FUNC void test() { mixin_accessor(); mixin_accessor(); @@ -220,7 +220,7 @@ TEST_DEVICE_FUNC void test() static_assert(!cuda::std::is_constructible_v>); } -TEST_DEVICE_FUNC void test_evil() +TEST_TILE_DEVICE_FUNC void test_evil() { mixin_accessor(); mixin_accessor(); diff --git a/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/shared_mem_mdspan/ctor.dh_extents.pass.cpp b/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/shared_mem_mdspan/ctor.dh_extents.pass.cpp index a97390f9d1d..55c00ca7903 100644 --- a/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/shared_mem_mdspan/ctor.dh_extents.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/shared_mem_mdspan/ctor.dh_extents.pass.cpp @@ -34,7 +34,7 @@ #include "test_macros.h" template = 0> -TEST_DEVICE_FUNC constexpr void test_mdspan_types(const H& handle, const M& map, const A&) +TEST_TILE_DEVICE_FUNC constexpr void test_mdspan_types(const H& handle, const M& map, const A&) { using MDS = cuda::shared_memory_mdspan; @@ -53,7 +53,7 @@ TEST_DEVICE_FUNC constexpr void test_mdspan_types(const H& handle, const M& map, test_equality_accessor(m, A{}); } template = 0> -TEST_DEVICE_FUNC constexpr void test_mdspan_types(const H& handle, const M& map, const A&) +TEST_TILE_DEVICE_FUNC constexpr void test_mdspan_types(const H& handle, const M& map, const A&) { using MDS = cuda::shared_memory_mdspan; @@ -64,7 +64,7 @@ TEST_DEVICE_FUNC constexpr void test_mdspan_types(const H& handle, const M& map, } template -TEST_DEVICE_FUNC constexpr void mixin_extents(const H& handle, const L& layout, const A& acc) +TEST_TILE_DEVICE_FUNC constexpr void mixin_extents(const H& handle, const L& layout, const A& acc) { [[maybe_unused]] constexpr size_t D = cuda::std::dynamic_extent; test_mdspan_types(handle, construct_mapping(layout, cuda::std::extents()), acc); @@ -77,7 +77,7 @@ TEST_DEVICE_FUNC constexpr void mixin_extents(const H& handle, const L& layout, } template -TEST_DEVICE_FUNC constexpr void mixin_layout(const H& handle, const A& acc) +TEST_TILE_DEVICE_FUNC constexpr void mixin_layout(const H& handle, const A& acc) { mixin_extents(handle, cuda::std::layout_left(), acc); mixin_extents(handle, cuda::std::layout_right(), acc); @@ -102,20 +102,20 @@ TEST_DEVICE_FUNC constexpr void mixin_layout(const H& handle, const A& acc) } template , int> = 0> -TEST_DEVICE_FUNC constexpr void mixin_accessor() +TEST_TILE_DEVICE_FUNC constexpr void mixin_accessor() { cuda::std::array elements{42}; mixin_layout(elements.data(), cuda::std::default_accessor()); } template , int> = 0> -TEST_DEVICE_FUNC void mixin_accessor() +TEST_TILE_DEVICE_FUNC void mixin_accessor() { ElementPool elements; mixin_layout(elements.get_ptr(), cuda::std::default_accessor()); } -TEST_DEVICE_FUNC void test() +TEST_TILE_DEVICE_FUNC void test() { mixin_accessor(); mixin_accessor(); @@ -138,7 +138,7 @@ TEST_DEVICE_FUNC void test() static_assert(!cuda::std::is_constructible_v>); } -TEST_DEVICE_FUNC void test_evil() +TEST_TILE_DEVICE_FUNC void test_evil() { mixin_accessor(); mixin_accessor(); diff --git a/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/shared_mem_mdspan/ctor.dh_integers.pass.cpp b/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/shared_mem_mdspan/ctor.dh_integers.pass.cpp index 092e1ac031b..f9cd95e59cc 100644 --- a/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/shared_mem_mdspan/ctor.dh_integers.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/shared_mem_mdspan/ctor.dh_integers.pass.cpp @@ -58,7 +58,7 @@ template _CCCL_CONCEPT check_mdspan_ctor_implicit = decltype(check_implicit_construction_impl(0))::value; template = 0> -TEST_DEVICE_FUNC constexpr void test_mdspan_types(const H& handle, const M& map, const A&, Idxs... idxs) +TEST_TILE_DEVICE_FUNC constexpr void test_mdspan_types(const H& handle, const M& map, const A&, Idxs... idxs) { using MDS = cuda::shared_memory_mdspan; @@ -80,7 +80,7 @@ TEST_DEVICE_FUNC constexpr void test_mdspan_types(const H& handle, const M& map, test_equality_accessor(m, A{}); } template = 0> -TEST_DEVICE_FUNC constexpr void test_mdspan_types(const H& handle, const M& map, const A&, Idxs... idxs) +TEST_TILE_DEVICE_FUNC constexpr void test_mdspan_types(const H& handle, const M& map, const A&, Idxs... idxs) { using MDS = cuda::shared_memory_mdspan; @@ -91,7 +91,7 @@ TEST_DEVICE_FUNC constexpr void test_mdspan_types(const H& handle, const M& map, } template -TEST_DEVICE_FUNC constexpr void mixin_extents(const H& handle, const L& layout, const A& acc) +TEST_TILE_DEVICE_FUNC constexpr void mixin_extents(const H& handle, const L& layout, const A& acc) { [[maybe_unused]] constexpr size_t D = cuda::std::dynamic_extent; // construct from just dynamic extents @@ -114,7 +114,7 @@ TEST_DEVICE_FUNC constexpr void mixin_extents(const H& handle, const L& layout, } template -TEST_DEVICE_FUNC constexpr void mixin_layout(const H& handle, const A& acc) +TEST_TILE_DEVICE_FUNC constexpr void mixin_layout(const H& handle, const A& acc) { mixin_extents(handle, cuda::std::layout_left(), acc); mixin_extents(handle, cuda::std::layout_right(), acc); @@ -139,20 +139,20 @@ TEST_DEVICE_FUNC constexpr void mixin_layout(const H& handle, const A& acc) } template , int> = 0> -TEST_DEVICE_FUNC constexpr void mixin_accessor() +TEST_TILE_DEVICE_FUNC constexpr void mixin_accessor() { cuda::std::array elements{42}; mixin_layout(elements.data(), cuda::std::default_accessor()); } template , int> = 0> -TEST_DEVICE_FUNC void mixin_accessor() +TEST_TILE_DEVICE_FUNC void mixin_accessor() { ElementPool elements; mixin_layout(elements.get_ptr(), cuda::std::default_accessor()); } -TEST_DEVICE_FUNC void test() +TEST_TILE_DEVICE_FUNC void test() { mixin_accessor(); mixin_accessor(); @@ -175,7 +175,7 @@ TEST_DEVICE_FUNC void test() static_assert(!cuda::std::is_constructible_v); } -TEST_DEVICE_FUNC void test_evil() +TEST_TILE_DEVICE_FUNC void test_evil() { mixin_accessor(); mixin_accessor(); diff --git a/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/shared_mem_mdspan/ctor.dh_map.pass.cpp b/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/shared_mem_mdspan/ctor.dh_map.pass.cpp index 18c14fc58c4..d32428e4586 100644 --- a/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/shared_mem_mdspan/ctor.dh_map.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/shared_mem_mdspan/ctor.dh_map.pass.cpp @@ -33,7 +33,7 @@ #include "test_macros.h" template = 0> -TEST_DEVICE_FUNC constexpr void test_mdspan_types(const H& handle, const M& map, const A&) +TEST_TILE_DEVICE_FUNC constexpr void test_mdspan_types(const H& handle, const M& map, const A&) { using MDS = cuda::shared_memory_mdspan; @@ -51,7 +51,7 @@ TEST_DEVICE_FUNC constexpr void test_mdspan_types(const H& handle, const M& map, test_equality_accessor(m, A{}); } template = 0> -TEST_DEVICE_FUNC constexpr void test_mdspan_types(const H& handle, const M& map, const A&) +TEST_TILE_DEVICE_FUNC constexpr void test_mdspan_types(const H& handle, const M& map, const A&) { using MDS = cuda::shared_memory_mdspan; @@ -61,7 +61,7 @@ TEST_DEVICE_FUNC constexpr void test_mdspan_types(const H& handle, const M& map, } template -TEST_DEVICE_FUNC constexpr void mixin_extents(const H& handle, const L& layout, const A& acc) +TEST_TILE_DEVICE_FUNC constexpr void mixin_extents(const H& handle, const L& layout, const A& acc) { [[maybe_unused]] constexpr size_t D = cuda::std::dynamic_extent; test_mdspan_types(handle, construct_mapping(layout, cuda::std::extents()), acc); @@ -74,7 +74,7 @@ TEST_DEVICE_FUNC constexpr void mixin_extents(const H& handle, const L& layout, } template -TEST_DEVICE_FUNC constexpr void mixin_layout(const H& handle, const A& acc) +TEST_TILE_DEVICE_FUNC constexpr void mixin_layout(const H& handle, const A& acc) { mixin_extents(handle, cuda::std::layout_left(), acc); mixin_extents(handle, cuda::std::layout_right(), acc); @@ -82,14 +82,14 @@ TEST_DEVICE_FUNC constexpr void mixin_layout(const H& handle, const A& acc) } template , int> = 0> -TEST_DEVICE_FUNC constexpr void mixin_accessor() +TEST_TILE_DEVICE_FUNC constexpr void mixin_accessor() { cuda::std::array elements{42}; mixin_layout(elements.data(), cuda::std::default_accessor()); } template , int> = 0> -TEST_DEVICE_FUNC void mixin_accessor() +TEST_TILE_DEVICE_FUNC void mixin_accessor() { ElementPool elements; mixin_layout(elements.get_ptr(), cuda::std::default_accessor()); @@ -98,7 +98,7 @@ TEST_DEVICE_FUNC void mixin_accessor() template using mapping_t = typename cuda::std::layout_right::template mapping; -TEST_DEVICE_FUNC void test() +TEST_TILE_DEVICE_FUNC void test() { mixin_accessor(); mixin_accessor(); @@ -123,7 +123,7 @@ TEST_DEVICE_FUNC void test() static_assert(!cuda::std::is_constructible_v>>); } -TEST_DEVICE_FUNC void test_evil() +TEST_TILE_DEVICE_FUNC void test_evil() { mixin_accessor(); mixin_accessor(); diff --git a/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/shared_mem_mdspan/ctor.dh_map_acc.pass.cpp b/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/shared_mem_mdspan/ctor.dh_map_acc.pass.cpp index 5863f38292b..382cdbe15cb 100644 --- a/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/shared_mem_mdspan/ctor.dh_map_acc.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/shared_mem_mdspan/ctor.dh_map_acc.pass.cpp @@ -30,7 +30,7 @@ #include "test_macros.h" template -TEST_DEVICE_FUNC constexpr void test_mdspan_types(const H& handle, const M& map, const A& acc) +TEST_TILE_DEVICE_FUNC constexpr void test_mdspan_types(const H& handle, const M& map, const A& acc) { using MDS = cuda::shared_memory_mdspan; @@ -48,7 +48,7 @@ TEST_DEVICE_FUNC constexpr void test_mdspan_types(const H& handle, const M& map, } template -TEST_DEVICE_FUNC constexpr void mixin_extents(const H& handle, const L& layout, const A& acc) +TEST_TILE_DEVICE_FUNC constexpr void mixin_extents(const H& handle, const L& layout, const A& acc) { [[maybe_unused]] constexpr size_t D = cuda::std::dynamic_extent; test_mdspan_types(handle, construct_mapping(layout, cuda::std::extents()), acc); @@ -60,7 +60,7 @@ TEST_DEVICE_FUNC constexpr void mixin_extents(const H& handle, const L& layout, } template -TEST_DEVICE_FUNC constexpr void mixin_layout(const H& handle, const A& acc) +TEST_TILE_DEVICE_FUNC constexpr void mixin_layout(const H& handle, const A& acc) { mixin_extents(handle, cuda::std::layout_left(), acc); mixin_extents(handle, cuda::std::layout_right(), acc); @@ -68,14 +68,14 @@ TEST_DEVICE_FUNC constexpr void mixin_layout(const H& handle, const A& acc) } template , int> = 0> -TEST_DEVICE_FUNC constexpr void mixin_accessor() +TEST_TILE_DEVICE_FUNC constexpr void mixin_accessor() { cuda::std::array elements{42}; mixin_layout(elements.data(), cuda::std::default_accessor()); } template , int> = 0> -TEST_DEVICE_FUNC void mixin_accessor() +TEST_TILE_DEVICE_FUNC void mixin_accessor() { ElementPool elements; mixin_layout(elements.get_ptr(), cuda::std::default_accessor()); @@ -84,7 +84,7 @@ TEST_DEVICE_FUNC void mixin_accessor() template using mapping_t = typename cuda::std::layout_right::template mapping; -TEST_DEVICE_FUNC void test() +TEST_TILE_DEVICE_FUNC void test() { mixin_accessor(); mixin_accessor(); @@ -118,7 +118,7 @@ TEST_DEVICE_FUNC void test() !cuda::std::is_constructible_v>, acc_t>); } -TEST_DEVICE_FUNC void test_evil() +TEST_TILE_DEVICE_FUNC void test_evil() { mixin_accessor(); mixin_accessor(); diff --git a/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/shared_mem_mdspan/ctor.dh_span.pass.cpp b/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/shared_mem_mdspan/ctor.dh_span.pass.cpp index 459034bf8a4..cc721acf7a0 100644 --- a/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/shared_mem_mdspan/ctor.dh_span.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/shared_mem_mdspan/ctor.dh_span.pass.cpp @@ -42,7 +42,7 @@ #include "test_macros.h" template -TEST_DEVICE_FUNC constexpr auto array_from_extents(const Extents& exts, cuda::std::index_sequence) +TEST_TILE_DEVICE_FUNC constexpr auto array_from_extents(const Extents& exts, cuda::std::index_sequence) { return cuda::std::array{exts.extent(Idxs)...}; } @@ -60,7 +60,7 @@ template _CCCL_CONCEPT check_mdspan_ctor_implicit = decltype(check_implicit_construction_impl(0))::value; template -TEST_DEVICE_FUNC constexpr void +TEST_TILE_DEVICE_FUNC constexpr void test_mdspan_ctor_span(const H& handle, const M& map, const A&, cuda::std::span exts) { using MDS = @@ -79,7 +79,7 @@ test_mdspan_ctor_span(const H& handle, const M& map, const A&, cuda::std::span 0), int> = 0> -TEST_DEVICE_FUNC constexpr cuda::std::array +TEST_TILE_DEVICE_FUNC constexpr cuda::std::array get_exts_dynamic(const cuda::std::array& exts) { cuda::std::array exts_dynamic{}; @@ -94,14 +94,14 @@ get_exts_dynamic(const cuda::std::array = 0> -TEST_DEVICE_FUNC constexpr cuda::std::array +TEST_TILE_DEVICE_FUNC constexpr cuda::std::array get_exts_dynamic(const cuda::std::array&) { return cuda::std::array{}; } template = 0> -TEST_DEVICE_FUNC constexpr void test_mdspan_ctor(const H& handle, const M& map, const A& acc) +TEST_TILE_DEVICE_FUNC constexpr void test_mdspan_ctor(const H& handle, const M& map, const A& acc) { using MDS = cuda::shared_memory_mdspan; @@ -118,7 +118,7 @@ TEST_DEVICE_FUNC constexpr void test_mdspan_ctor(const H& handle, const M& map, handle, map, acc, cuda::std::span(exts_dynamic)); } template = 0> -TEST_DEVICE_FUNC constexpr void test_mdspan_ctor(const H& handle, const M& map, const A& acc) +TEST_TILE_DEVICE_FUNC constexpr void test_mdspan_ctor(const H& handle, const M& map, const A& acc) { using MDS = cuda::shared_memory_mdspan; @@ -128,7 +128,7 @@ TEST_DEVICE_FUNC constexpr void test_mdspan_ctor(const H& handle, const M& map, } template -TEST_DEVICE_FUNC constexpr void mixin_extents(const H& handle, const L& layout, const A& acc) +TEST_TILE_DEVICE_FUNC constexpr void mixin_extents(const H& handle, const L& layout, const A& acc) { [[maybe_unused]] constexpr size_t D = cuda::std::dynamic_extent; test_mdspan_ctor(handle, construct_mapping(layout, cuda::std::extents()), acc); @@ -141,7 +141,7 @@ TEST_DEVICE_FUNC constexpr void mixin_extents(const H& handle, const L& layout, } template -TEST_DEVICE_FUNC constexpr void mixin_layout(const H& handle, const A& acc) +TEST_TILE_DEVICE_FUNC constexpr void mixin_layout(const H& handle, const A& acc) { mixin_extents(handle, cuda::std::layout_left(), acc); mixin_extents(handle, cuda::std::layout_right(), acc); @@ -165,20 +165,20 @@ TEST_DEVICE_FUNC constexpr void mixin_layout(const H& handle, const A& acc) } template , int> = 0> -TEST_DEVICE_FUNC constexpr void mixin_accessor() +TEST_TILE_DEVICE_FUNC constexpr void mixin_accessor() { cuda::std::array elements{42}; mixin_layout(elements.data(), cuda::std::default_accessor()); } template , int> = 0> -TEST_DEVICE_FUNC void mixin_accessor() +TEST_TILE_DEVICE_FUNC void mixin_accessor() { ElementPool elements; mixin_layout(elements.get_ptr(), cuda::std::default_accessor()); } -TEST_DEVICE_FUNC void test() +TEST_TILE_DEVICE_FUNC void test() { mixin_accessor(); mixin_accessor(); @@ -220,7 +220,7 @@ TEST_DEVICE_FUNC void test() static_assert(!cuda::std::is_constructible_v>); } -TEST_DEVICE_FUNC void test_evil() +TEST_TILE_DEVICE_FUNC void test_evil() { mixin_accessor(); mixin_accessor(); diff --git a/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/shared_mem_mdspan/ctor.move.pass.cpp b/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/shared_mem_mdspan/ctor.move.pass.cpp index 96ccd0e000f..c3d5a90801d 100644 --- a/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/shared_mem_mdspan/ctor.move.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/shared_mem_mdspan/ctor.move.pass.cpp @@ -26,7 +26,7 @@ #include "test_macros.h" template -TEST_DEVICE_FUNC constexpr void test_mdspan_types(const H& handle, const M& map, const A& acc) +TEST_TILE_DEVICE_FUNC constexpr void test_mdspan_types(const H& handle, const M& map, const A& acc) { using MDS = cuda::shared_memory_mdspan; @@ -43,7 +43,7 @@ TEST_DEVICE_FUNC constexpr void test_mdspan_types(const H& handle, const M& map, } template -TEST_DEVICE_FUNC constexpr void mixin_extents(const H& handle, const L& layout, const A& acc) +TEST_TILE_DEVICE_FUNC constexpr void mixin_extents(const H& handle, const L& layout, const A& acc) { [[maybe_unused]] constexpr size_t D = cuda::std::dynamic_extent; test_mdspan_types(handle, construct_mapping(layout, cuda::std::extents()), acc); @@ -55,7 +55,7 @@ TEST_DEVICE_FUNC constexpr void mixin_extents(const H& handle, const L& layout, } template -TEST_DEVICE_FUNC constexpr void mixin_layout(const H& handle, const A& acc) +TEST_TILE_DEVICE_FUNC constexpr void mixin_layout(const H& handle, const A& acc) { // make sure we test a trivially copyable mapping static_assert(cuda::std::is_trivially_move_constructible_v< @@ -69,7 +69,7 @@ TEST_DEVICE_FUNC constexpr void mixin_layout(const H& handle, const A& acc) } template , int> = 0> -TEST_DEVICE_FUNC constexpr void mixin_accessor() +TEST_TILE_DEVICE_FUNC constexpr void mixin_accessor() { cuda::std::array elements{42}; // make sure we test trivially constructible accessor and data_handle @@ -80,7 +80,7 @@ TEST_DEVICE_FUNC constexpr void mixin_accessor() } template , int> = 0> -TEST_DEVICE_FUNC void mixin_accessor() +TEST_TILE_DEVICE_FUNC void mixin_accessor() { ElementPool elements; // make sure we test trivially constructible accessor and data_handle @@ -90,7 +90,7 @@ TEST_DEVICE_FUNC void mixin_accessor() mixin_layout(elements.get_ptr(), cuda::std::default_accessor()); } -TEST_DEVICE_FUNC void test() +TEST_TILE_DEVICE_FUNC void test() { mixin_accessor(); mixin_accessor(); @@ -98,7 +98,7 @@ TEST_DEVICE_FUNC void test() mixin_accessor(); } -TEST_DEVICE_FUNC void test_evil() +TEST_TILE_DEVICE_FUNC void test_evil() { mixin_accessor(); mixin_accessor(); diff --git a/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/shared_mem_mdspan/index_operator.pass.cpp b/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/shared_mem_mdspan/index_operator.pass.cpp index df9382177b9..ceb72be44a6 100644 --- a/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/shared_mem_mdspan/index_operator.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/shared_mem_mdspan/index_operator.pass.cpp @@ -40,30 +40,26 @@ TEST_DIAG_SUPPRESS_GCC("-Wcomma-subscript") #endif // TEST_COMPILER(GCC, >=, 10) template -TEST_DEVICE_FUNC constexpr auto& access(MDS mds, int64_t i0) +TEST_TILE_DEVICE_FUNC constexpr auto& access(MDS mds, int64_t i0) { return mds[i0]; } #if _CCCL_HAS_MULTIARG_OPERATOR_BRACKETS() -template < - class MDS, - class... Indices, - class = cuda::std::enable_if_t< - cuda::std::is_same_v()[cuda::std::declval()...]), typename MDS::reference>>, - int> -= 0 > TEST_DEVICE_FUNC constexpr bool check_operator_constraints(MDS m, Indices... idxs) +template + requires requires(MDS mds, Indices... indices) { mds[indices...]; } +TEST_TILE_DEVICE_FUNC constexpr bool check_operator_constraints(MDS m, Indices... idxs) { unused(m[idxs...]); return true; } #else // ^^^ _CCCL_HAS_MULTIARG_OPERATOR_BRACKETS() ^^^ / vvv !_CCCL_HAS_MULTIARG_OPERATOR_BRACKETS() vvv -template < - class MDS, - class Index, - class = cuda::std::enable_if_t< - cuda::std::is_same_v()[cuda::std::declval()]), typename MDS::reference>>> -TEST_DEVICE_FUNC constexpr bool check_operator_constraints(MDS m, Index idx) +template ()[cuda::std::declval()]), + typename MDS::reference>, + int> = 0> +TEST_TILE_DEVICE_FUNC constexpr bool check_operator_constraints(MDS m, Index idx) { unused(m[idx]); return true; @@ -71,29 +67,29 @@ TEST_DEVICE_FUNC constexpr bool check_operator_constraints(MDS m, Index idx) #endif // ^^^ !_CCCL_HAS_MULTIARG_OPERATOR_BRACKETS() ^^^ template -TEST_DEVICE_FUNC constexpr bool check_operator_constraints(MDS, Indices...) +TEST_TILE_DEVICE_FUNC constexpr bool check_operator_constraints(MDS, Indices...) { return false; } #if _CCCL_HAS_MULTIARG_OPERATOR_BRACKETS() template -TEST_DEVICE_FUNC constexpr auto& access(MDS mds) +TEST_TILE_DEVICE_FUNC constexpr auto& access(MDS mds) { return mds[]; } template -TEST_DEVICE_FUNC constexpr auto& access(MDS mds, int64_t i0, int64_t i1) +TEST_TILE_DEVICE_FUNC constexpr auto& access(MDS mds, int64_t i0, int64_t i1) { return mds[i0, i1]; } template -TEST_DEVICE_FUNC constexpr auto& access(MDS mds, int64_t i0, int64_t i1, int64_t i2) +TEST_TILE_DEVICE_FUNC constexpr auto& access(MDS mds, int64_t i0, int64_t i1, int64_t i2) { return mds[i0, i1, i2]; } template -TEST_DEVICE_FUNC constexpr auto& access(MDS mds, int64_t i0, int64_t i1, int64_t i2, int64_t i3) +TEST_TILE_DEVICE_FUNC constexpr auto& access(MDS mds, int64_t i0, int64_t i1, int64_t i2, int64_t i3) { return mds[i0, i1, i2, i3]; } @@ -101,7 +97,7 @@ TEST_DEVICE_FUNC constexpr auto& access(MDS mds, int64_t i0, int64_t i1, int64_t // We must ensure that we do not try to access multiarg accessors template = 0> -TEST_DEVICE_FUNC constexpr void assert_access(MDS mds, Arg arg) +TEST_TILE_DEVICE_FUNC constexpr void assert_access(MDS mds, Arg arg) { int* ptr1 = &(mds.accessor().access(mds.data_handle(), mds.mapping()(arg))); int* ptr2 = &access(mds, arg); @@ -109,7 +105,7 @@ TEST_DEVICE_FUNC constexpr void assert_access(MDS mds, Arg arg) } template = 0> -TEST_DEVICE_FUNC constexpr void assert_access(MDS mds, Args... args) +TEST_TILE_DEVICE_FUNC constexpr void assert_access(MDS mds, Args... args) { #if _CCCL_HAS_MULTIARG_OPERATOR_BRACKETS() int* ptr1 = &(mds.accessor().access(mds.data_handle(), mds.mapping()(args...))); @@ -121,7 +117,7 @@ TEST_DEVICE_FUNC constexpr void assert_access(MDS mds, Args... args) } template = 0> -TEST_DEVICE_FUNC constexpr void iterate(MDS mds, Args... args) +TEST_TILE_DEVICE_FUNC constexpr void iterate(MDS mds, Args... args) { int* ptr1 = &(mds.accessor().access(mds.data_handle(), mds.mapping()(args...))); assert_access(mds, args...); @@ -134,7 +130,7 @@ TEST_DEVICE_FUNC constexpr void iterate(MDS mds, Args... args) } template = 0> -TEST_DEVICE_FUNC constexpr void iterate(MDS mds, Args... args) +TEST_TILE_DEVICE_FUNC constexpr void iterate(MDS mds, Args... args) { constexpr int r = static_cast(MDS::extents_type::rank()) - 1 - static_cast(sizeof...(Args)); for (typename MDS::index_type i = 0; i < mds.extents().extent(r); i++) @@ -144,7 +140,7 @@ TEST_DEVICE_FUNC constexpr void iterate(MDS mds, Args... args) } template -TEST_DEVICE_FUNC void test_iteration(Mapping m) +TEST_TILE_DEVICE_FUNC void test_iteration(Mapping m) { __shared__ cuda::std::array iteration_data; using MDS = cuda::shared_memory_mdspan; @@ -153,7 +149,7 @@ TEST_DEVICE_FUNC void test_iteration(Mapping m) } template -TEST_DEVICE_FUNC void test_layout() +TEST_TILE_DEVICE_FUNC void test_layout() { [[maybe_unused]] constexpr size_t D = cuda::std::dynamic_extent; test_iteration(construct_mapping(Layout(), cuda::std::extents(1))); diff --git a/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/shared_mem_mdspan/move.pass.cpp b/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/shared_mem_mdspan/move.pass.cpp index 0f70c9eaca2..401e89d493c 100644 --- a/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/shared_mem_mdspan/move.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/shared_mem_mdspan/move.pass.cpp @@ -26,7 +26,7 @@ #include "test_macros.h" template -TEST_DEVICE_FUNC constexpr void test_mdspan_types(const H& handle, const M& map, const A& acc) +TEST_TILE_DEVICE_FUNC constexpr void test_mdspan_types(const H& handle, const M& map, const A& acc) { using MDS = cuda::shared_memory_mdspan; @@ -46,7 +46,7 @@ TEST_DEVICE_FUNC constexpr void test_mdspan_types(const H& handle, const M& map, } template -TEST_DEVICE_FUNC constexpr void mixin_extents(const H& handle, const L& layout, const A& acc) +TEST_TILE_DEVICE_FUNC constexpr void mixin_extents(const H& handle, const L& layout, const A& acc) { [[maybe_unused]] constexpr size_t D = cuda::std::dynamic_extent; test_mdspan_types(handle, construct_mapping(layout, cuda::std::extents()), acc); @@ -58,7 +58,7 @@ TEST_DEVICE_FUNC constexpr void mixin_extents(const H& handle, const L& layout, } template -TEST_DEVICE_FUNC constexpr void mixin_layout(const H& handle, const A& acc) +TEST_TILE_DEVICE_FUNC constexpr void mixin_layout(const H& handle, const A& acc) { // make sure we test a trivially copyable mapping static_assert(cuda::std::is_trivially_move_assignable_v< @@ -72,7 +72,7 @@ TEST_DEVICE_FUNC constexpr void mixin_layout(const H& handle, const A& acc) } template , int> = 0> -TEST_DEVICE_FUNC constexpr void mixin_accessor() +TEST_TILE_DEVICE_FUNC constexpr void mixin_accessor() { cuda::std::array elements{42}; // make sure we test trivially constructible accessor and data_handle @@ -82,7 +82,7 @@ TEST_DEVICE_FUNC constexpr void mixin_accessor() } template , int> = 0> -TEST_DEVICE_FUNC void mixin_accessor() +TEST_TILE_DEVICE_FUNC void mixin_accessor() { ElementPool elements; // make sure we test trivially constructible accessor and data_handle @@ -91,7 +91,7 @@ TEST_DEVICE_FUNC void mixin_accessor() mixin_layout(elements.get_ptr(), cuda::std::default_accessor()); } -TEST_DEVICE_FUNC void test() +TEST_TILE_DEVICE_FUNC void test() { mixin_accessor(); mixin_accessor(); @@ -99,7 +99,7 @@ TEST_DEVICE_FUNC void test() mixin_accessor(); } -TEST_DEVICE_FUNC void test_evil() +TEST_TILE_DEVICE_FUNC void test_evil() { mixin_accessor(); mixin_accessor(); diff --git a/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/shared_mem_mdspan/swap.pass.cpp b/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/shared_mem_mdspan/swap.pass.cpp index 04592e994ba..69227a8300c 100644 --- a/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/shared_mem_mdspan/swap.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/shared_mem_mdspan/swap.pass.cpp @@ -27,7 +27,7 @@ #include "test_macros.h" template -TEST_DEVICE_FUNC void test_swap(MDS a, MDS b) +TEST_TILE_DEVICE_FUNC void test_swap(MDS a, MDS b) { auto org_a = a; auto org_b = b; @@ -43,7 +43,7 @@ TEST_DEVICE_FUNC void test_swap(MDS a, MDS b) test_swap_counter(); } -TEST_DEVICE_FUNC void test() +TEST_TILE_DEVICE_FUNC void test() { using extents_t = cuda::std::extents; float data_a[1024] = {}; diff --git a/libcudacxx/test/libcudacxx/cuda/utilities/expected/device_only_types.pass.cpp b/libcudacxx/test/libcudacxx/cuda/utilities/expected/device_only_types.pass.cpp index 55d51543317..bc281176bcb 100644 --- a/libcudacxx/test/libcudacxx/cuda/utilities/expected/device_only_types.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/utilities/expected/device_only_types.pass.cpp @@ -195,8 +195,21 @@ TEST_DEVICE_FUNC void test() } #endif // _CCCL_TILE_COMPILATION() || _CCCL_DEVICE_COMPILATION() +#if _CCCL_TILE_COMPILATION() // cannot run main because its __tile_global__ +__global__ void test_kernel() +{ + test(); +} + +int main(int arg, char** argv) +{ + NV_IF_TARGET(NV_IS_HOST, (test_kernel<<<1, 1>>>();)) + return 0; +} +#else // ^^^ _CCCL_TILE_COMPILATION() ^^^ / vvv !_CCCL_TILE_COMPILATION() vvv int main(int arg, char** argv) { - NV_IF_TARGET(NV_IS_DEVICE, (test();)) + NV_IF_TARGET(NV_IS_DEVICE, test();) return 0; } +#endif // !_CCCL_TILE_COMPILATION() diff --git a/libcudacxx/test/libcudacxx/cuda/utilities/expected/expected.void/device_only_types.pass.cpp b/libcudacxx/test/libcudacxx/cuda/utilities/expected/expected.void/device_only_types.pass.cpp index 34b57f4b97f..7ce0e6c0d74 100644 --- a/libcudacxx/test/libcudacxx/cuda/utilities/expected/expected.void/device_only_types.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/utilities/expected/expected.void/device_only_types.pass.cpp @@ -159,8 +159,21 @@ TEST_DEVICE_FUNC void test() } #endif // _CCCL_TILE_COMPILATION() || _CCCL_DEVICE_COMPILATION() +#if _CCCL_TILE_COMPILATION() // cannot run main because its __tile_global__ +__global__ void test_kernel() +{ + test(); +} + +int main(int arg, char** argv) +{ + NV_IF_TARGET(NV_IS_HOST, (test_kernel<<<1, 1>>>();)) + return 0; +} +#else // ^^^ _CCCL_TILE_COMPILATION() ^^^ / vvv !_CCCL_TILE_COMPILATION() vvv int main(int arg, char** argv) { - NV_IF_TARGET(NV_IS_DEVICE, (test();)) + NV_IF_TARGET(NV_IS_DEVICE, test();) return 0; } +#endif // !_CCCL_TILE_COMPILATION() diff --git a/libcudacxx/test/libcudacxx/cuda/utilities/expected/expected.void/tile_only_types.pass.cpp b/libcudacxx/test/libcudacxx/cuda/utilities/expected/expected.void/tile_only_types.pass.cpp new file mode 100644 index 00000000000..1dac109fd0d --- /dev/null +++ b/libcudacxx/test/libcudacxx/cuda/utilities/expected/expected.void/tile_only_types.pass.cpp @@ -0,0 +1,171 @@ +//===----------------------------------------------------------------------===// +// +// Part of the libcu++ Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +// REQUIRES: enable-tile + +// UNSUPPORTED: clang-14 + +#include +#include +#include + +#include "host_device_types.h" +#include "test_macros.h" + +TEST_TILE_FUNC void test() +{ + using expected = cuda::std::expected; + { // default construction + expected default_constructed{}; + assert(default_constructed.has_value()); + } + + { // in_place zero initialization + expected in_place_zero_initialization{cuda::std::in_place}; + assert(in_place_zero_initialization.has_value()); + } + + { // unexpect zero initialization + expected in_place_zero_initialization{cuda::std::unexpect}; + assert(!in_place_zero_initialization.has_value()); + assert(in_place_zero_initialization.error() == 0); + } + + { // unexpect initialization + expected in_place_initialization{cuda::std::unexpect, 42}; + assert(!in_place_initialization.has_value()); + assert(in_place_initialization.error() == 42); + } + + { // unexpect initializer_list initialization + expected init_list_initialization{cuda::std::unexpect, cuda::std::initializer_list{}, 42}; + assert(!init_list_initialization.has_value()); + assert(init_list_initialization.error() == 42); + } + + { // copy construction + expected input{cuda::std::in_place}; + expected dest{input}; + assert(dest.has_value()); + } + + { // move construction + expected input{cuda::std::in_place}; + expected dest{cuda::std::move(input)}; + assert(dest.has_value()); + } + + { // assignment, value to value + expected input{cuda::std::in_place}; + expected dest{cuda::std::in_place}; + dest = input; + assert(dest.has_value()); + } + + { // assignment, value to empty + expected input{cuda::std::in_place}; + expected dest{}; + dest = input; + assert(dest.has_value()); + } + + { // assignment, empty to value + expected input{}; + expected dest{cuda::std::in_place}; + dest = input; + assert(dest.has_value()); + } + + { // assignment, empty to empty + expected input{}; + expected dest{}; + dest = input; + assert(dest.has_value()); + } + + { // assignment, error to value + expected input{cuda::std::unexpect, 42}; + expected dest{cuda::std::in_place}; + dest = input; + assert(!dest.has_value()); + assert(dest.error() == 42); + } + + { // assignment, value to error + expected input{cuda::std::in_place}; + expected dest{cuda::std::unexpect, 1337}; + dest = input; + assert(dest.has_value()); + } + + { // assignment, error to error + expected input{cuda::std::unexpect, 42}; + expected dest{cuda::std::unexpect, 1337}; + dest = input; + assert(!dest.has_value()); + assert(dest.error() == 42); + } + + { // comparison with expected with value + expected lhs{cuda::std::in_place}; + expected rhs{cuda::std::in_place}; + assert(lhs == rhs); + assert(!(lhs != rhs)); + } + + { // comparison with expected with error + expected lhs{cuda::std::unexpect, 42}; + expected rhs{cuda::std::unexpect, 1337}; + assert(!(lhs == rhs)); + assert(lhs != rhs); + } + + { // comparison with type and error + expected expect{cuda::std::unexpect, 42}; + assert(expect == cuda::std::unexpected{42}); + assert(cuda::std::unexpected{42} == expect); + assert(expect != cuda::std::unexpected{1337}); + assert(cuda::std::unexpected{1337} != expect); + } + + { // swap + expected lhs{cuda::std::unexpect, 42}; + expected rhs{cuda::std::unexpect, 1337}; + lhs.swap(rhs); + assert(lhs.error() == 1337); + assert(rhs.error() == 42); + + swap(lhs, rhs); + assert(lhs.error() == 42); + assert(rhs.error() == 1337); + } + + { // swap cross error + expected lhs{cuda::std::in_place}; + expected rhs{cuda::std::unexpect, 1337}; + lhs.swap(rhs); + assert(lhs.error() == 1337); + assert(rhs.has_value()); + + swap(lhs, rhs); + assert(lhs.has_value()); + assert(rhs.error() == 1337); + } +} + +__tile_global__ void test_kernel() +{ + test(); +} + +int main(int arg, char** argv) +{ + NV_IF_TARGET(NV_IS_HOST, (test_kernel<<<1, 1>>>();)) + return 0; +} diff --git a/libcudacxx/test/libcudacxx/cuda/utilities/expected/tile_only_types.pass.cpp b/libcudacxx/test/libcudacxx/cuda/utilities/expected/tile_only_types.pass.cpp new file mode 100644 index 00000000000..f6f98bc0d11 --- /dev/null +++ b/libcudacxx/test/libcudacxx/cuda/utilities/expected/tile_only_types.pass.cpp @@ -0,0 +1,207 @@ +//===----------------------------------------------------------------------===// +// +// Part of the libcu++ Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +// REQUIRES: enable-tile + +// We cannot suppress execution checks in cuda::std::construct_at +// UNSUPPORTED: clang-14 + +#include +#include +#include + +#include "host_device_types.h" +#include "test_macros.h" + +TEST_TILE_FUNC void test() +{ + using expected = cuda::std::expected; + { // default construction + expected default_constructed{}; + assert(default_constructed.has_value()); + assert(*default_constructed == 0); + } + + { // in_place zero initialization + expected in_place_zero_initialization{cuda::std::in_place}; + assert(in_place_zero_initialization.has_value()); + assert(*in_place_zero_initialization == 0); + } + + { // in_place initialization + expected in_place_initialization{cuda::std::in_place, 42}; + assert(in_place_initialization.has_value()); + assert(*in_place_initialization == 42); + } + + { // initializer_list initialization + expected init_list_initialization{cuda::std::in_place, cuda::std::initializer_list{}, 42}; + assert(init_list_initialization.has_value()); + assert(*init_list_initialization == 42); + } + + { // unexpect zero initialization + expected in_place_zero_initialization{cuda::std::unexpect}; + assert(!in_place_zero_initialization.has_value()); + assert(in_place_zero_initialization.error() == 0); + } + + { // unexpect initialization + expected in_place_initialization{cuda::std::unexpect, 42}; + assert(!in_place_initialization.has_value()); + assert(in_place_initialization.error() == 42); + } + + { // initializer_list initialization + expected init_list_initialization{cuda::std::unexpect, cuda::std::initializer_list{}, 42}; + assert(!init_list_initialization.has_value()); + assert(init_list_initialization.error() == 42); + } + + { // value initialization + expected value_initialization{42}; + assert(value_initialization.has_value()); + assert(*value_initialization == 42); + } + + { // copy construction + expected input{42}; + expected dest{input}; + assert(dest.has_value()); + assert(*dest == 42); + } + + { // move construction + expected input{42}; + expected dest{cuda::std::move(input)}; + assert(dest.has_value()); + assert(*dest == 42); + } + + { // assignment, value to value + expected input{42}; + expected dest{1337}; + dest = input; + assert(dest.has_value()); + assert(*dest == 42); + } + + { // assignment, value to empty + expected input{42}; + expected dest{}; + dest = input; + assert(dest.has_value()); + assert(*dest == 42); + } + + { // assignment, empty to value + expected input{}; + expected dest{1337}; + dest = input; + assert(dest.has_value()); + assert(*dest == 0); + } + + { // assignment, empty to empty + expected input{}; + expected dest{}; + dest = input; + assert(dest.has_value()); + assert(*dest == 0); + } + + { // assignment, error to value + expected input{cuda::std::unexpect, 42}; + expected dest{1337}; + dest = input; + assert(!dest.has_value()); + assert(dest.error() == 42); + } + + { // assignment, value to error + expected input{42}; + expected dest{cuda::std::unexpect, 1337}; + dest = input; + assert(dest.has_value()); + assert(*dest == 42); + } + + { // assignment, error to error + expected input{cuda::std::unexpect, 42}; + expected dest{cuda::std::unexpect, 1337}; + dest = input; + assert(!dest.has_value()); + assert(dest.error() == 42); + } + + { // comparison with expected with value + expected lhs{42}; + expected rhs{1337}; + assert(!(lhs == rhs)); + assert(lhs != rhs); + } + + { // comparison with expected with error + expected lhs{cuda::std::unexpect, 42}; + expected rhs{cuda::std::unexpect, 1337}; + assert(!(lhs == rhs)); + assert(lhs != rhs); + } + + { // comparison with type and value + expected expect{42}; + assert(expect == tile_only_type{42}); + assert(tile_only_type{42} == expect); + assert(expect != tile_only_type{1337}); + assert(tile_only_type{1337} != expect); + } + + { // comparison with type and error + expected expect{cuda::std::unexpect, 42}; + assert(expect == cuda::std::unexpected{42}); + assert(cuda::std::unexpected{42} == expect); + assert(expect != cuda::std::unexpected{1337}); + assert(cuda::std::unexpected{1337} != expect); + } + + { // swap + expected lhs{42}; + expected rhs{1337}; + lhs.swap(rhs); + assert(*lhs == 1337); + assert(*rhs == 42); + + swap(lhs, rhs); + assert(*lhs == 42); + assert(*rhs == 1337); + } + + { // swap cross error + expected lhs{42}; + expected rhs{cuda::std::unexpect, 1337}; + lhs.swap(rhs); + assert(lhs.error() == 1337); + assert(*rhs == 42); + + swap(lhs, rhs); + assert(*lhs == 42); + assert(rhs.error() == 1337); + } +} + +__tile_global__ void test_kernel() +{ + test(); +} + +int main(int arg, char** argv) +{ + NV_IF_TARGET(NV_IS_HOST, (test_kernel<<<1, 1>>>();)) + return 0; +} diff --git a/libcudacxx/test/libcudacxx/cuda/utilities/optional/device_only_types.pass.cpp b/libcudacxx/test/libcudacxx/cuda/utilities/optional/device_only_types.pass.cpp index c0cedbcf43f..c4c9bb8a82d 100644 --- a/libcudacxx/test/libcudacxx/cuda/utilities/optional/device_only_types.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/utilities/optional/device_only_types.pass.cpp @@ -149,6 +149,13 @@ __global__ void test_kernel() int main(int arg, char** argv) { - NV_IF_TARGET(NV_IS_DEVICE, (test();)) + NV_IF_TARGET(NV_IS_HOST, (test_kernel<<<1, 1>>>();)) return 0; } +#else // ^^^ _CCCL_TILE_COMPILATION() ^^^ / vvv !_CCCL_TILE_COMPILATION() vvv +int main(int arg, char** argv) +{ + NV_IF_TARGET(NV_IS_DEVICE, test();) + return 0; +} +#endif // !_CCCL_TILE_COMPILATION() diff --git a/libcudacxx/test/libcudacxx/cuda/utilities/optional/tile_only_types.pass.cpp b/libcudacxx/test/libcudacxx/cuda/utilities/optional/tile_only_types.pass.cpp new file mode 100644 index 00000000000..9cf51d3bb8e --- /dev/null +++ b/libcudacxx/test/libcudacxx/cuda/utilities/optional/tile_only_types.pass.cpp @@ -0,0 +1,153 @@ +//===----------------------------------------------------------------------===// +// +// Part of the libcu++ Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +// REQUIRES: enable-tile + +// We cannot suppress execution checks in cuda::std::construct_at +// UNSUPPORTED: clang-14 + +#include +#include +#include + +#include "host_device_types.h" +#include "test_macros.h" + +template +TEST_TILE_FUNC void test() +{ + using optional = cuda::std::optional; + { // default construction + optional default_constructed{}; + assert(!default_constructed.has_value()); + } + + if constexpr (!cuda::std::is_reference_v) + { // in_place zero initialization + optional in_place_zero_initialization{cuda::std::in_place}; + assert(in_place_zero_initialization.has_value()); + assert(*in_place_zero_initialization == 0); + } + + cuda::std::remove_reference_t val{42}; + { // in_place initialization + optional in_place_initialization{cuda::std::in_place, val}; + assert(in_place_initialization.has_value()); + assert(*in_place_initialization == 42); + } + + { // value initialization + optional value_initialization{val}; + assert(value_initialization.has_value()); + assert(*value_initialization == 42); + } + + { // copy construction + optional input{val}; + optional dest{input}; + assert(dest.has_value()); + assert(*dest == 42); + } + + { // move construction + optional input{val}; + optional dest{cuda::std::move(input)}; + assert(dest.has_value()); + assert(*dest == 42); + } + + cuda::std::remove_reference_t other_val{1337}; + { // assignment, value to value + optional input{val}; + optional dest{other_val}; + dest = input; + assert(dest.has_value()); + assert(*dest == 42); + } + + { // assignment, value to empty + optional input{val}; + optional dest{}; + dest = input; + assert(dest.has_value()); + assert(*dest == 42); + } + + { // assignment, empty to value + optional input{}; + optional dest{other_val}; + dest = input; + assert(!dest.has_value()); + } + + { // assignment, empty to empty + optional input{}; + optional dest{}; + dest = input; + assert(!dest.has_value()); + } + + { // comparison with optional + optional lhs{val}; + optional rhs{other_val}; + assert(!(lhs == rhs)); + assert(lhs != rhs); + assert(lhs < rhs); + assert(lhs <= rhs); + assert(!(lhs > rhs)); + assert(!(lhs >= rhs)); + } + + { // comparison with type + optional opt{val}; + assert(opt == tile_only_type{val}); + assert(tile_only_type{val} == opt); + assert(opt != tile_only_type{other_val}); + assert(tile_only_type{other_val} != opt); + + assert(opt < tile_only_type{other_val}); + assert(tile_only_type{7} < opt); + assert(opt <= tile_only_type{other_val}); + assert(tile_only_type{7} <= opt); + + assert(opt > tile_only_type{7}); + assert(tile_only_type{other_val} > opt); + assert(opt >= tile_only_type{7}); + assert(tile_only_type{other_val} >= opt); + } + + { // swap + optional lhs{val}; + optional rhs{other_val}; + lhs.swap(rhs); + assert(*lhs == 1337); + assert(*rhs == 42); + + swap(lhs, rhs); + assert(*lhs == 42); + assert(*rhs == 1337); + } +} + +TEST_TILE_FUNC void test() +{ + test(); + test(); +} + +__tile_global__ void test_kernel() +{ + test(); +} + +int main(int arg, char** argv) +{ + NV_IF_TARGET(NV_IS_HOST, (test_kernel<<<1, 1>>>();)) + return 0; +} diff --git a/libcudacxx/test/libcudacxx/cuda/utilities/tuple/device_only_types.pass.cpp b/libcudacxx/test/libcudacxx/cuda/utilities/tuple/device_only_types.pass.cpp index 4c1d5992ad4..674a00db015 100644 --- a/libcudacxx/test/libcudacxx/cuda/utilities/tuple/device_only_types.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/utilities/tuple/device_only_types.pass.cpp @@ -84,6 +84,13 @@ __global__ void test_kernel() int main(int arg, char** argv) { - NV_IF_TARGET(NV_IS_DEVICE, (test();)) + NV_IF_TARGET(NV_IS_HOST, (test_kernel<<<1, 1>>>();)) return 0; } +#else // ^^^ _CCCL_TILE_COMPILATION() ^^^ / vvv !_CCCL_TILE_COMPILATION() vvv +int main(int arg, char** argv) +{ + NV_IF_TARGET(NV_IS_DEVICE, test();) + return 0; +} +#endif // !_CCCL_TILE_COMPILATION() diff --git a/libcudacxx/test/libcudacxx/cuda/utilities/tuple/tile_only_types.pass.cpp b/libcudacxx/test/libcudacxx/cuda/utilities/tuple/tile_only_types.pass.cpp new file mode 100644 index 00000000000..815479c3019 --- /dev/null +++ b/libcudacxx/test/libcudacxx/cuda/utilities/tuple/tile_only_types.pass.cpp @@ -0,0 +1,88 @@ +//===----------------------------------------------------------------------===// +// +// Part of the libcu++ Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +// REQUIRES: enable-tile + +#include +#include + +#include "host_device_types.h" +#include "test_macros.h" + +TEST_TILE_FUNC void test() +{ + using tuple = cuda::std::tuple; + { // default construction + tuple default_constructed{}; + assert(cuda::std::get<0>(default_constructed) == 0); + } + + { // value initialization + tuple value_initialization{tile_only_type{42}}; + assert(cuda::std::get<0>(value_initialization) == 42); + } + + { // value initialization + tuple value_initialization{42}; + assert(cuda::std::get<0>(value_initialization) == 42); + } + + { // copy construction + tuple input{42}; + tuple dest{input}; + assert(cuda::std::get<0>(dest) == 42); + } + + { // move construction + tuple input{42}; + tuple dest{cuda::std::move(input)}; + assert(cuda::std::get<0>(dest) == 42); + } + + { // assignment, value to value + tuple input{42}; + tuple dest{1337}; + dest = input; + assert(cuda::std::get<0>(dest) == 42); + } + + { // comparison with tuple + tuple lhs{42}; + tuple rhs{1337}; + assert(!(lhs == rhs)); + assert(lhs != rhs); + assert(lhs < rhs); + assert(lhs <= rhs); + assert(!(lhs > rhs)); + assert(!(lhs >= rhs)); + } + + { // swap + tuple lhs{42}; + tuple rhs{1337}; + lhs.swap(rhs); + assert(cuda::std::get<0>(lhs) == 1337); + assert(cuda::std::get<0>(rhs) == 42); + + swap(lhs, rhs); + assert(cuda::std::get<0>(lhs) == 42); + assert(cuda::std::get<0>(rhs) == 1337); + } +} + +__tile_global__ void test_kernel() +{ + test(); +} + +int main(int arg, char** argv) +{ + NV_IF_TARGET(NV_IS_HOST, (test_kernel<<<1, 1>>>();)) + return 0; +} diff --git a/libcudacxx/test/libcudacxx/cuda/utilities/unexpected/device_only_types.pass.cpp b/libcudacxx/test/libcudacxx/cuda/utilities/unexpected/device_only_types.pass.cpp index 50a9df393ce..1c0f5758af7 100644 --- a/libcudacxx/test/libcudacxx/cuda/utilities/unexpected/device_only_types.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/utilities/unexpected/device_only_types.pass.cpp @@ -85,6 +85,13 @@ __global__ void test_kernel() int main(int arg, char** argv) { - NV_IF_TARGET(NV_IS_DEVICE, (test();)) + NV_IF_TARGET(NV_IS_HOST, (test_kernel<<<1, 1>>>();)) return 0; } +#else // ^^^ _CCCL_TILE_COMPILATION() ^^^ / vvv !_CCCL_TILE_COMPILATION() vvv +int main(int arg, char** argv) +{ + NV_IF_TARGET(NV_IS_DEVICE, test();) + return 0; +} +#endif // !_CCCL_TILE_COMPILATION() diff --git a/libcudacxx/test/libcudacxx/cuda/utilities/unexpected/tile_only_types.pass.cpp b/libcudacxx/test/libcudacxx/cuda/utilities/unexpected/tile_only_types.pass.cpp new file mode 100644 index 00000000000..c294cf7f440 --- /dev/null +++ b/libcudacxx/test/libcudacxx/cuda/utilities/unexpected/tile_only_types.pass.cpp @@ -0,0 +1,89 @@ +//===----------------------------------------------------------------------===// +// +// Part of the libcu++ Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +// REQUIRES: enable-tile + +#include +#include + +#include "host_device_types.h" +#include "test_macros.h" + +TEST_TILE_FUNC void test() +{ + using unexpected = cuda::std::unexpected; + { // in_place zero initialization + unexpected in_place_zero_initialization{cuda::std::in_place}; + assert(in_place_zero_initialization.error() == 0); + } + + { // in_place initialization + unexpected in_place_initialization{cuda::std::in_place, 42}; + assert(in_place_initialization.error() == 42); + } + + { // value initialization + unexpected value_initialization{42}; + assert(value_initialization.error() == 42); + } + + { // initializer_list initialization + unexpected init_list_initialization{cuda::std::in_place, cuda::std::initializer_list{}, 42}; + assert(init_list_initialization.error() == 42); + } + + { // copy construction + unexpected input{42}; + unexpected dest{input}; + assert(dest.error() == 42); + } + + { // move construction + unexpected input{42}; + unexpected dest{cuda::std::move(input)}; + assert(dest.error() == 42); + } + + { // assignment + unexpected input{42}; + unexpected dest{1337}; + dest = input; + assert(dest.error() == 42); + } + + { // comparison with unexpected + unexpected lhs{42}; + unexpected rhs{1337}; + assert(!(lhs == rhs)); + assert(lhs != rhs); + } + + { // swap + unexpected lhs{42}; + unexpected rhs{1337}; + lhs.swap(rhs); + assert(lhs.error() == 1337); + assert(rhs.error() == 42); + + swap(lhs, rhs); + assert(lhs.error() == 42); + assert(rhs.error() == 1337); + } +} + +__tile_global__ void test_kernel() +{ + test(); +} + +int main(int arg, char** argv) +{ + NV_IF_TARGET(NV_IS_HOST, (test_kernel<<<1, 1>>>();)) + return 0; +} diff --git a/libcudacxx/test/libcudacxx/cuda/utilities/utility/pair/device_only_types.pass.cpp b/libcudacxx/test/libcudacxx/cuda/utilities/utility/pair/device_only_types.pass.cpp index 1a6f9413d7f..7e1b0680443 100644 --- a/libcudacxx/test/libcudacxx/cuda/utilities/utility/pair/device_only_types.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/utilities/utility/pair/device_only_types.pass.cpp @@ -7,8 +7,6 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: nvrtc - #include #include @@ -96,6 +94,13 @@ __global__ void test_kernel() int main(int arg, char** argv) { - NV_IF_TARGET(NV_IS_DEVICE, (test();)) + NV_IF_TARGET(NV_IS_HOST, (test_kernel<<<1, 1>>>();)) + return 0; +} +#else // ^^^ _CCCL_TILE_COMPILATION() ^^^ / vvv !_CCCL_TILE_COMPILATION() vvv +int main(int arg, char** argv) +{ + NV_IF_TARGET(NV_IS_DEVICE, test();) return 0; } +#endif // !_CCCL_TILE_COMPILATION() diff --git a/libcudacxx/test/libcudacxx/cuda/utilities/utility/pair/tile_only_types.pass.cpp b/libcudacxx/test/libcudacxx/cuda/utilities/utility/pair/tile_only_types.pass.cpp new file mode 100644 index 00000000000..67f07a5f096 --- /dev/null +++ b/libcudacxx/test/libcudacxx/cuda/utilities/utility/pair/tile_only_types.pass.cpp @@ -0,0 +1,98 @@ +//===----------------------------------------------------------------------===// +// +// Part of the libcu++ Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +// REQUIRES: enable-tile + +#include +#include + +#include "host_device_types.h" +#include "test_macros.h" + +TEST_TILE_FUNC void test() +{ + using pair = cuda::std::pair; + { // default construction + pair default_constructed{}; + assert(default_constructed.first == 0); + assert(default_constructed.second == 0); + } + + { // value initialization + pair value_initialization{tile_only_type{42}, tile_only_type{1337}}; + assert(value_initialization.first == 42); + assert(value_initialization.second == 1337); + } + + { // value initialization + pair value_initialization{42, 1337}; + assert(value_initialization.first == 42); + assert(value_initialization.second == 1337); + } + + { // copy construction + pair input{42, 1337}; + pair dest{input}; + assert(dest.first == 42); + assert(dest.second == 1337); + } + + { // move construction + pair input{42, 1337}; + pair dest{cuda::std::move(input)}; + assert(dest.first == 42); + assert(dest.second == 1337); + } + + { // assignment, value to value + pair input{42, 1337}; + pair dest{1337, 42}; + dest = input; + assert(dest.first == 42); + assert(dest.second == 1337); + } + + { // comparison with pair + pair lhs{42, 1337}; + pair rhs{1337, 42}; + assert(!(lhs == rhs)); + assert(lhs != rhs); + assert(lhs < rhs); + assert(lhs <= rhs); + assert(!(lhs > rhs)); + assert(!(lhs >= rhs)); + } + + { // swap + pair lhs{42, 1337}; + pair rhs{1337, 42}; + lhs.swap(rhs); + assert(lhs.first == 1337); + assert(lhs.second == 42); + assert(rhs.first == 42); + assert(rhs.second == 1337); + + swap(lhs, rhs); + assert(lhs.first == 42); + assert(lhs.second == 1337); + assert(rhs.first == 1337); + assert(rhs.second == 42); + } +} + +__tile_global__ void test_kernel() +{ + test(); +} + +int main(int arg, char** argv) +{ + NV_IF_TARGET(NV_IS_HOST, (test_kernel<<<1, 1>>>();)) + return 0; +} diff --git a/libcudacxx/test/libcudacxx/cuda/utilities/variant/device_only_types.pass.cpp b/libcudacxx/test/libcudacxx/cuda/utilities/variant/device_only_types.pass.cpp index e3444d6e965..36700a84320 100644 --- a/libcudacxx/test/libcudacxx/cuda/utilities/variant/device_only_types.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/utilities/variant/device_only_types.pass.cpp @@ -123,6 +123,13 @@ __global__ void test_kernel() int main(int arg, char** argv) { - NV_IF_TARGET(NV_IS_DEVICE, (test();)) + NV_IF_TARGET(NV_IS_HOST, (test_kernel<<<1, 1>>>();)) return 0; } +#else // ^^^ _CCCL_TILE_COMPILATION() ^^^ / vvv !_CCCL_TILE_COMPILATION() vvv +int main(int arg, char** argv) +{ + NV_IF_TARGET(NV_IS_DEVICE, test();) + return 0; +} +#endif // !_CCCL_TILE_COMPILATION() diff --git a/libcudacxx/test/libcudacxx/cuda/utilities/variant/tile_only_types.pass.cpp b/libcudacxx/test/libcudacxx/cuda/utilities/variant/tile_only_types.pass.cpp new file mode 100644 index 00000000000..1d5218ca64e --- /dev/null +++ b/libcudacxx/test/libcudacxx/cuda/utilities/variant/tile_only_types.pass.cpp @@ -0,0 +1,127 @@ +//===----------------------------------------------------------------------===// +// +// Part of the libcu++ Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +// REQUIRES: enable-tile + +#include +#include + +#include "host_device_types.h" +#include "test_macros.h" + +TEST_TILE_FUNC void test() +{ + using variant = cuda::std::variant; + { // default construction + variant default_constructed{}; + assert(cuda::std::get<0>(default_constructed) == 0); + } + + { // value initialization + variant value_initialization{tile_only_type{42}}; + assert(cuda::std::get<0>(value_initialization) == 42); + } + + { // value initialization + variant value_initialization{42}; + assert(cuda::std::get<0>(value_initialization) == 42); + } + + { // in_place_type_t initialization + variant in_place_initialization{cuda::std::in_place_type_t{}, 42}; + assert(cuda::std::get<0>(in_place_initialization) == 42); + } + + { // in_place_index_t initialization + variant in_place_initialization{cuda::std::in_place_index_t<0>{}, 42}; + assert(cuda::std::get<0>(in_place_initialization) == 42); + } + + { // in_place_type_t initializer_list initialization + variant init_list_initialization{ + cuda::std::in_place_type_t{}, cuda::std::initializer_list{}, 42}; + assert(cuda::std::get<0>(init_list_initialization) == 42); + } + + { // in_place_type_t initializer_list initialization + variant init_list_initialization{cuda::std::in_place_index_t<0>{}, cuda::std::initializer_list{}, 42}; + assert(cuda::std::get<0>(init_list_initialization) == 42); + } + + { // copy construction + variant input{42}; + variant dest{input}; + assert(cuda::std::get<0>(dest) == 42); + } + + { // move construction + variant input{42}; + variant dest{cuda::std::move(input)}; + assert(cuda::std::get<0>(dest) == 42); + } + + { // assignment, value to value + variant input{42}; + variant dest{1337}; + dest = input; + assert(cuda::std::get<0>(dest) == 42); + } + + { // emplace + variant var{42}; + var.emplace(42); + assert(cuda::std::get<0>(var) == 42); + } + + { // emplace + variant var{42}; + var.emplace<0>(42); + assert(cuda::std::get<0>(var) == 42); + } + + { // emplace init list + variant var{42}; + var.emplace(cuda::std::initializer_list{}, 42); + assert(cuda::std::get<0>(var) == 42); + } + + { // comparison with variant + variant lhs{42}; + variant rhs{1337}; + assert(!(lhs == rhs)); + assert(lhs != rhs); + assert(lhs < rhs); + assert(lhs <= rhs); + assert(!(lhs > rhs)); + assert(!(lhs >= rhs)); + } + + { // swap + variant lhs{42}; + variant rhs{1337}; + lhs.swap(rhs); + assert(cuda::std::get<0>(lhs) == 1337); + assert(cuda::std::get<0>(rhs) == 42); + + swap(lhs, rhs); + assert(cuda::std::get<0>(lhs) == 42); + assert(cuda::std::get<0>(rhs) == 1337); + } +} + +__tile_global__ void test_kernel() +{ + test(); +} + +int main(int arg, char** argv) +{ + NV_IF_TARGET(NV_IS_HOST, (test_kernel<<<1, 1>>>();)) + return 0; +} diff --git a/libcudacxx/test/libcudacxx/std/utilities/function.objects/arithmetic.operations/divides.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/function.objects/arithmetic.operations/divides.pass.cpp index 3c070b7bdd1..464dbb9b05c 100644 --- a/libcudacxx/test/libcudacxx/std/utilities/function.objects/arithmetic.operations/divides.pass.cpp +++ b/libcudacxx/test/libcudacxx/std/utilities/function.objects/arithmetic.operations/divides.pass.cpp @@ -18,7 +18,7 @@ #include "test_macros.h" -// ensure that we allow `TEST_DEVICE_FUNC` functions too +// ensure that we allow `__device__` functions too struct with_device_op { TEST_DEVICE_FUNC friend constexpr with_device_op operator/(const with_device_op&, const with_device_op&) @@ -37,6 +37,27 @@ __global__ void test_global_kernel() assert(f({}, {})); } +#if _CCCL_TILE_COMPILATION() +// ensure that we allow `__tile__` functions too +struct with_tile_op +{ + TEST_TILE_FUNC friend constexpr with_tile_op operator/(const with_tile_op&, const with_tile_op&) + { + return {}; + } + TEST_TILE_FUNC constexpr operator bool() const + { + return true; + } +}; + +__tile_global__ void test_tile_kernel() +{ + const cuda::std::divides f; + assert(f({}, {})); +} +#endif // _CCCL_TILE_COMPILATION() + int main(int, char**) { using F = cuda::std::divides; diff --git a/libcudacxx/test/libcudacxx/std/utilities/function.objects/arithmetic.operations/minus.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/function.objects/arithmetic.operations/minus.pass.cpp index f2603cfe0b7..4d2f5c1136a 100644 --- a/libcudacxx/test/libcudacxx/std/utilities/function.objects/arithmetic.operations/minus.pass.cpp +++ b/libcudacxx/test/libcudacxx/std/utilities/function.objects/arithmetic.operations/minus.pass.cpp @@ -18,7 +18,7 @@ #include "test_macros.h" -// ensure that we allow `TEST_DEVICE_FUNC` functions too +// ensure that we allow `__device__` functions too struct with_device_op { TEST_DEVICE_FUNC friend constexpr with_device_op operator-(const with_device_op&, const with_device_op&) @@ -37,6 +37,27 @@ __global__ void test_global_kernel() assert(f({}, {})); } +#if _CCCL_TILE_COMPILATION() +// ensure that we allow `__tile__` functions too +struct with_tile_op +{ + TEST_TILE_FUNC friend constexpr with_tile_op operator-(const with_tile_op&, const with_tile_op&) + { + return {}; + } + TEST_TILE_FUNC constexpr operator bool() const + { + return true; + } +}; + +__tile_global__ void test_tile_kernel() +{ + const cuda::std::minus f; + assert(f({}, {})); +} +#endif // _CCCL_TILE_COMPILATION() + int main(int, char**) { using F = cuda::std::minus; diff --git a/libcudacxx/test/libcudacxx/std/utilities/function.objects/arithmetic.operations/modulus.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/function.objects/arithmetic.operations/modulus.pass.cpp index d6e7228ed62..e781a720d61 100644 --- a/libcudacxx/test/libcudacxx/std/utilities/function.objects/arithmetic.operations/modulus.pass.cpp +++ b/libcudacxx/test/libcudacxx/std/utilities/function.objects/arithmetic.operations/modulus.pass.cpp @@ -18,7 +18,7 @@ #include "test_macros.h" -// ensure that we allow `TEST_DEVICE_FUNC` functions too +// ensure that we allow `__device__` functions too struct with_device_op { TEST_DEVICE_FUNC friend constexpr with_device_op operator%(const with_device_op&, const with_device_op&) @@ -37,6 +37,27 @@ __global__ void test_global_kernel() assert(f({}, {})); } +#if _CCCL_TILE_COMPILATION() +// ensure that we allow `__tile__` functions too +struct with_tile_op +{ + TEST_TILE_FUNC friend constexpr with_tile_op operator%(const with_tile_op&, const with_tile_op&) + { + return {}; + } + TEST_TILE_FUNC constexpr operator bool() const + { + return true; + } +}; + +__tile_global__ void test_tile_kernel() +{ + const cuda::std::modulus f; + assert(f({}, {})); +} +#endif // _CCCL_TILE_COMPILATION() + int main(int, char**) { using F = cuda::std::modulus; diff --git a/libcudacxx/test/libcudacxx/std/utilities/function.objects/arithmetic.operations/multiplies.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/function.objects/arithmetic.operations/multiplies.pass.cpp index 32cb542060b..27b4fd5e8d8 100644 --- a/libcudacxx/test/libcudacxx/std/utilities/function.objects/arithmetic.operations/multiplies.pass.cpp +++ b/libcudacxx/test/libcudacxx/std/utilities/function.objects/arithmetic.operations/multiplies.pass.cpp @@ -18,7 +18,7 @@ #include "test_macros.h" -// ensure that we allow `TEST_DEVICE_FUNC` functions too +// ensure that we allow `__device__` functions too struct with_device_op { TEST_DEVICE_FUNC friend constexpr with_device_op operator*(const with_device_op&, const with_device_op&) @@ -37,6 +37,27 @@ __global__ void test_global_kernel() assert(f({}, {})); } +#if _CCCL_TILE_COMPILATION() +// ensure that we allow `__tile__` functions too +struct with_tile_op +{ + TEST_TILE_FUNC friend constexpr with_tile_op operator*(const with_tile_op&, const with_tile_op&) + { + return {}; + } + TEST_TILE_FUNC constexpr operator bool() const + { + return true; + } +}; + +__tile_global__ void test_tile_kernel() +{ + const cuda::std::multiplies f; + assert(f({}, {})); +} +#endif // _CCCL_TILE_COMPILATION() + int main(int, char**) { using F = cuda::std::multiplies; diff --git a/libcudacxx/test/libcudacxx/std/utilities/function.objects/arithmetic.operations/negate.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/function.objects/arithmetic.operations/negate.pass.cpp index 36901818685..074b26dae62 100644 --- a/libcudacxx/test/libcudacxx/std/utilities/function.objects/arithmetic.operations/negate.pass.cpp +++ b/libcudacxx/test/libcudacxx/std/utilities/function.objects/arithmetic.operations/negate.pass.cpp @@ -18,7 +18,7 @@ #include "test_macros.h" -// ensure that we allow `TEST_DEVICE_FUNC` functions too +// ensure that we allow `__device__` functions too struct with_device_op { TEST_DEVICE_FUNC friend constexpr with_device_op operator-(const with_device_op&) @@ -37,6 +37,27 @@ __global__ void test_global_kernel() assert(f({})); } +#if _CCCL_TILE_COMPILATION() +// ensure that we allow `__tile__` functions too +struct with_tile_op +{ + TEST_TILE_FUNC friend constexpr with_tile_op operator-(const with_tile_op&) + { + return {}; + } + TEST_TILE_FUNC constexpr operator bool() const + { + return true; + } +}; + +__tile_global__ void test_tile_kernel() +{ + const cuda::std::negate f; + assert(f({})); +} +#endif // _CCCL_TILE_COMPILATION() + int main(int, char**) { using F = cuda::std::negate; diff --git a/libcudacxx/test/libcudacxx/std/utilities/function.objects/arithmetic.operations/plus.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/function.objects/arithmetic.operations/plus.pass.cpp index e884ec494dc..b2169d34bf4 100644 --- a/libcudacxx/test/libcudacxx/std/utilities/function.objects/arithmetic.operations/plus.pass.cpp +++ b/libcudacxx/test/libcudacxx/std/utilities/function.objects/arithmetic.operations/plus.pass.cpp @@ -18,7 +18,7 @@ #include "test_macros.h" -// ensure that we allow `TEST_DEVICE_FUNC` functions too +// ensure that we allow `__device__` functions too struct with_device_op { TEST_DEVICE_FUNC friend constexpr with_device_op operator+(const with_device_op&, const with_device_op&) @@ -37,6 +37,27 @@ __global__ void test_global_kernel() assert(f({}, {})); } +#if _CCCL_TILE_COMPILATION() +// ensure that we allow `__tile__` functions too +struct with_tile_op +{ + TEST_TILE_FUNC friend constexpr with_tile_op operator+(const with_tile_op&, const with_tile_op&) + { + return {}; + } + TEST_TILE_FUNC constexpr operator bool() const + { + return true; + } +}; + +__tile_global__ void test_tile_kernel() +{ + const cuda::std::plus f; + assert(f({}, {})); +} +#endif // _CCCL_TILE_COMPILATION() + int main(int, char**) { using F = cuda::std::plus; diff --git a/libcudacxx/test/libcudacxx/std/utilities/function.objects/bitwise.operations/bit_and.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/function.objects/bitwise.operations/bit_and.pass.cpp index e51dc89d18c..4a7ee127379 100644 --- a/libcudacxx/test/libcudacxx/std/utilities/function.objects/bitwise.operations/bit_and.pass.cpp +++ b/libcudacxx/test/libcudacxx/std/utilities/function.objects/bitwise.operations/bit_and.pass.cpp @@ -18,7 +18,7 @@ #include "test_macros.h" -// ensure that we allow `TEST_DEVICE_FUNC` functions too +// ensure that we allow `__device__` functions too struct with_device_op { TEST_DEVICE_FUNC friend constexpr with_device_op operator&(const with_device_op&, const with_device_op&) @@ -37,6 +37,27 @@ __global__ void test_global_kernel() assert(f({}, {})); } +#if _CCCL_TILE_COMPILATION() +// ensure that we allow `__tile__` functions too +struct with_tile_op +{ + TEST_TILE_FUNC friend constexpr with_tile_op operator&(const with_tile_op&, const with_tile_op&) + { + return {}; + } + TEST_TILE_FUNC constexpr operator bool() const + { + return true; + } +}; + +__tile_global__ void test_tile_kernel() +{ + const cuda::std::bit_and f; + assert(f({}, {})); +} +#endif // _CCCL_TILE_COMPILATION() + int main(int, char**) { using F = cuda::std::bit_and; diff --git a/libcudacxx/test/libcudacxx/std/utilities/function.objects/bitwise.operations/bit_not.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/function.objects/bitwise.operations/bit_not.pass.cpp index 5e07d8a8ed7..fafff9a863c 100644 --- a/libcudacxx/test/libcudacxx/std/utilities/function.objects/bitwise.operations/bit_not.pass.cpp +++ b/libcudacxx/test/libcudacxx/std/utilities/function.objects/bitwise.operations/bit_not.pass.cpp @@ -18,7 +18,7 @@ #include "test_macros.h" -// ensure that we allow `TEST_DEVICE_FUNC` functions too +// ensure that we allow `__device__` functions too struct with_device_op { TEST_DEVICE_FUNC friend constexpr with_device_op operator~(const with_device_op&) @@ -37,6 +37,27 @@ __global__ void test_global_kernel() assert(f({})); } +#if _CCCL_TILE_COMPILATION() +// ensure that we allow `__tile__` functions too +struct with_tile_op +{ + TEST_TILE_FUNC friend constexpr with_tile_op operator~(const with_tile_op&) + { + return {}; + } + TEST_TILE_FUNC constexpr operator bool() const + { + return true; + } +}; + +__tile_global__ void test_tile_kernel() +{ + const cuda::std::bit_not f; + assert(f({})); +} +#endif // _CCCL_TILE_COMPILATION() + int main(int, char**) { using F = cuda::std::bit_not; diff --git a/libcudacxx/test/libcudacxx/std/utilities/function.objects/bitwise.operations/bit_or.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/function.objects/bitwise.operations/bit_or.pass.cpp index baee01a8d1e..064510cec88 100644 --- a/libcudacxx/test/libcudacxx/std/utilities/function.objects/bitwise.operations/bit_or.pass.cpp +++ b/libcudacxx/test/libcudacxx/std/utilities/function.objects/bitwise.operations/bit_or.pass.cpp @@ -18,7 +18,7 @@ #include "test_macros.h" -// ensure that we allow `TEST_DEVICE_FUNC` functions too +// ensure that we allow `__device__` functions too struct with_device_op { TEST_DEVICE_FUNC friend constexpr with_device_op operator|(const with_device_op&, const with_device_op&) @@ -37,6 +37,27 @@ __global__ void test_global_kernel() assert(f({}, {})); } +#if _CCCL_TILE_COMPILATION() +// ensure that we allow `__tile__` functions too +struct with_tile_op +{ + TEST_TILE_FUNC friend constexpr with_tile_op operator|(const with_tile_op&, const with_tile_op&) + { + return {}; + } + TEST_TILE_FUNC constexpr operator bool() const + { + return true; + } +}; + +__tile_global__ void test_tile_kernel() +{ + const cuda::std::bit_or f; + assert(f({}, {})); +} +#endif // _CCCL_TILE_COMPILATION() + int main(int, char**) { using F = cuda::std::bit_or; diff --git a/libcudacxx/test/libcudacxx/std/utilities/function.objects/bitwise.operations/bit_xor.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/function.objects/bitwise.operations/bit_xor.pass.cpp index 56dd60b3349..f21842b29f7 100644 --- a/libcudacxx/test/libcudacxx/std/utilities/function.objects/bitwise.operations/bit_xor.pass.cpp +++ b/libcudacxx/test/libcudacxx/std/utilities/function.objects/bitwise.operations/bit_xor.pass.cpp @@ -18,7 +18,7 @@ #include "test_macros.h" -// ensure that we allow `TEST_DEVICE_FUNC` functions too +// ensure that we allow `__device__` functions too struct with_device_op { TEST_DEVICE_FUNC friend constexpr with_device_op operator^(const with_device_op&, const with_device_op&) @@ -37,6 +37,27 @@ __global__ void test_global_kernel() assert(f({}, {})); } +#if _CCCL_TILE_COMPILATION() +// ensure that we allow `__tile__` functions too +struct with_tile_op +{ + TEST_TILE_FUNC friend constexpr with_tile_op operator^(const with_tile_op&, const with_tile_op&) + { + return {}; + } + TEST_TILE_FUNC constexpr operator bool() const + { + return true; + } +}; + +__tile_global__ void test_tile_kernel() +{ + const cuda::std::bit_xor f; + assert(f({}, {})); +} +#endif // _CCCL_TILE_COMPILATION() + int main(int, char**) { { diff --git a/libcudacxx/test/libcudacxx/std/utilities/function.objects/comparisons/equal_to.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/function.objects/comparisons/equal_to.pass.cpp index a9452bb9c2e..657edd7cc7f 100644 --- a/libcudacxx/test/libcudacxx/std/utilities/function.objects/comparisons/equal_to.pass.cpp +++ b/libcudacxx/test/libcudacxx/std/utilities/function.objects/comparisons/equal_to.pass.cpp @@ -18,7 +18,7 @@ #include "test_macros.h" -// ensure that we allow `TEST_DEVICE_FUNC` functions too +// ensure that we allow `__device__` functions too struct with_device_op { TEST_DEVICE_FUNC friend constexpr bool operator==(const with_device_op&, const with_device_op&) @@ -33,6 +33,23 @@ __global__ void test_global_kernel() assert(f({}, {})); } +#if _CCCL_TILE_COMPILATION() +// ensure that we allow `__tile__` functions too +struct with_tile_op +{ + TEST_TILE_FUNC friend constexpr bool operator==(const with_tile_op&, const with_tile_op&) + { + return true; + } +}; + +__tile_global__ void test_tile_kernel() +{ + const cuda::std::equal_to f; + assert(f({}, {})); +} +#endif // _CCCL_TILE_COMPILATION() + int main(int, char**) { using F = cuda::std::equal_to; diff --git a/libcudacxx/test/libcudacxx/std/utilities/function.objects/comparisons/greater.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/function.objects/comparisons/greater.pass.cpp index f2a8ad13957..9419943e166 100644 --- a/libcudacxx/test/libcudacxx/std/utilities/function.objects/comparisons/greater.pass.cpp +++ b/libcudacxx/test/libcudacxx/std/utilities/function.objects/comparisons/greater.pass.cpp @@ -21,7 +21,7 @@ # include "pointer_comparison_test_helper.hpp" #endif // !TEST_COMPILER(NVRTC) -// ensure that we allow `TEST_DEVICE_FUNC` functions too +// ensure that we allow `__device__` functions too struct with_device_op { TEST_DEVICE_FUNC friend constexpr bool operator>(const with_device_op&, const with_device_op&) @@ -36,6 +36,23 @@ __global__ void test_global_kernel() assert(f({}, {})); } +#if _CCCL_TILE_COMPILATION() +// ensure that we allow `__tile__` functions too +struct with_tile_op +{ + TEST_TILE_FUNC friend constexpr bool operator>(const with_tile_op&, const with_tile_op&) + { + return true; + } +}; + +__tile_global__ void test_tile_kernel() +{ + const cuda::std::greater f; + assert(f({}, {})); +} +#endif // _CCCL_TILE_COMPILATION() + int main(int, char**) { using F = cuda::std::greater; diff --git a/libcudacxx/test/libcudacxx/std/utilities/function.objects/comparisons/greater_equal.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/function.objects/comparisons/greater_equal.pass.cpp index e6038d51a1d..ad65605ade2 100644 --- a/libcudacxx/test/libcudacxx/std/utilities/function.objects/comparisons/greater_equal.pass.cpp +++ b/libcudacxx/test/libcudacxx/std/utilities/function.objects/comparisons/greater_equal.pass.cpp @@ -21,7 +21,7 @@ # include "pointer_comparison_test_helper.hpp" #endif // !TEST_COMPILER(NVRTC) -// ensure that we allow `TEST_DEVICE_FUNC` functions too +// ensure that we allow `__device__` functions too struct with_device_op { TEST_DEVICE_FUNC friend constexpr bool operator>=(const with_device_op&, const with_device_op&) @@ -36,6 +36,23 @@ __global__ void test_global_kernel() assert(f({}, {})); } +#if _CCCL_TILE_COMPILATION() +// ensure that we allow `__tile__` functions too +struct with_tile_op +{ + TEST_TILE_FUNC friend constexpr bool operator>=(const with_tile_op&, const with_tile_op&) + { + return true; + } +}; + +__tile_global__ void test_tile_kernel() +{ + const cuda::std::greater_equal f; + assert(f({}, {})); +} +#endif // _CCCL_TILE_COMPILATION() + int main(int, char**) { using F = cuda::std::greater_equal; diff --git a/libcudacxx/test/libcudacxx/std/utilities/function.objects/comparisons/less.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/function.objects/comparisons/less.pass.cpp index abec60a575f..426fa5dad40 100644 --- a/libcudacxx/test/libcudacxx/std/utilities/function.objects/comparisons/less.pass.cpp +++ b/libcudacxx/test/libcudacxx/std/utilities/function.objects/comparisons/less.pass.cpp @@ -21,7 +21,7 @@ # include "pointer_comparison_test_helper.hpp" #endif // !TEST_COMPILER(NVRTC) -// ensure that we allow `TEST_DEVICE_FUNC` functions too +// ensure that we allow `__device__` functions too struct with_device_op { TEST_DEVICE_FUNC friend constexpr bool operator<(const with_device_op&, const with_device_op&) @@ -36,6 +36,23 @@ __global__ void test_global_kernel() assert(f({}, {})); } +#if _CCCL_TILE_COMPILATION() +// ensure that we allow `__tile__` functions too +struct with_tile_op +{ + TEST_TILE_FUNC friend constexpr bool operator<(const with_tile_op&, const with_tile_op&) + { + return true; + } +}; + +__tile_global__ void test_tile_kernel() +{ + const cuda::std::less f; + assert(f({}, {})); +} +#endif // _CCCL_TILE_COMPILATION() + int main(int, char**) { using F = cuda::std::less; diff --git a/libcudacxx/test/libcudacxx/std/utilities/function.objects/comparisons/less_equal.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/function.objects/comparisons/less_equal.pass.cpp index d71d3f0d11c..d8b4a526928 100644 --- a/libcudacxx/test/libcudacxx/std/utilities/function.objects/comparisons/less_equal.pass.cpp +++ b/libcudacxx/test/libcudacxx/std/utilities/function.objects/comparisons/less_equal.pass.cpp @@ -21,7 +21,7 @@ # include "pointer_comparison_test_helper.hpp" #endif // !TEST_COMPILER(NVRTC) -// ensure that we allow `TEST_DEVICE_FUNC` functions too +// ensure that we allow `__device__` functions too struct with_device_op { TEST_DEVICE_FUNC friend constexpr bool operator<=(const with_device_op&, const with_device_op&) @@ -36,6 +36,23 @@ __global__ void test_global_kernel() assert(f({}, {})); } +#if _CCCL_TILE_COMPILATION() +// ensure that we allow `__tile__` functions too +struct with_tile_op +{ + TEST_TILE_FUNC friend constexpr bool operator<=(const with_tile_op&, const with_tile_op&) + { + return true; + } +}; + +__tile_global__ void test_tile_kernel() +{ + const cuda::std::less_equal f; + assert(f({}, {})); +} +#endif // _CCCL_TILE_COMPILATION() + int main(int, char**) { using F = cuda::std::less_equal; diff --git a/libcudacxx/test/libcudacxx/std/utilities/function.objects/comparisons/not_equal_to.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/function.objects/comparisons/not_equal_to.pass.cpp index 502b8f556a2..1918879ee61 100644 --- a/libcudacxx/test/libcudacxx/std/utilities/function.objects/comparisons/not_equal_to.pass.cpp +++ b/libcudacxx/test/libcudacxx/std/utilities/function.objects/comparisons/not_equal_to.pass.cpp @@ -18,7 +18,7 @@ #include "test_macros.h" -// ensure that we allow `TEST_DEVICE_FUNC` functions too +// ensure that we allow `__device__` functions too struct with_device_op { TEST_DEVICE_FUNC friend constexpr bool operator!=(const with_device_op&, const with_device_op&) @@ -33,6 +33,23 @@ __global__ void test_global_kernel() assert(f({}, {})); } +#if _CCCL_TILE_COMPILATION() +// ensure that we allow `__tile__` functions too +struct with_tile_op +{ + TEST_TILE_FUNC friend constexpr bool operator!=(const with_tile_op&, const with_tile_op&) + { + return true; + } +}; + +__tile_global__ void test_tile_kernel() +{ + const cuda::std::not_equal_to f; + assert(f({}, {})); +} +#endif // _CCCL_TILE_COMPILATION() + int main(int, char**) { using F = cuda::std::not_equal_to; diff --git a/libcudacxx/test/libcudacxx/std/utilities/function.objects/func.invoke/invoke.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/function.objects/func.invoke/invoke.pass.cpp index 04f2038615c..8ebfc9a9944 100644 --- a/libcudacxx/test/libcudacxx/std/utilities/function.objects/func.invoke/invoke.pass.cpp +++ b/libcudacxx/test/libcudacxx/std/utilities/function.objects/func.invoke/invoke.pass.cpp @@ -395,6 +395,38 @@ TEST_FUNC void noexcept_test() } } +// ensure that we allow `__device__` functions too +struct with_device_op +{ + TEST_DEVICE_FUNC constexpr bool operator()(const int) const + { + return {}; + } +}; + +__global__ void test_kernel() +{ + with_device_op op{}; + assert(cuda::std::invoke(op, 42)); +} + +#if _CCCL_TILE_COMPILATION() +// ensure that we allow `__tile__` functions too +struct with_tile_op +{ + TEST_TILE_FUNC constexpr bool operator()(const int) const + { + return {}; + } +}; + +__tile_global__ void test_tile_kernel() +{ + with_tile_op op{}; + assert(cuda::std::invoke(op, 42)); +} +#endif // _CCCL_TILE_COMPILATION() + int main(int, char**) { #if !_CCCL_TILE_COMPILATION() // error: taking address or reference of a function is unsupported in tile mode! diff --git a/libcudacxx/test/libcudacxx/std/utilities/function.objects/logical.operations/logical_and.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/function.objects/logical.operations/logical_and.pass.cpp index 6f18ee85a4c..d82e38e25d7 100644 --- a/libcudacxx/test/libcudacxx/std/utilities/function.objects/logical.operations/logical_and.pass.cpp +++ b/libcudacxx/test/libcudacxx/std/utilities/function.objects/logical.operations/logical_and.pass.cpp @@ -18,7 +18,7 @@ #include "test_macros.h" -// ensure that we allow `TEST_DEVICE_FUNC` functions too +// ensure that we allow `__device__` functions too struct with_device_op { TEST_DEVICE_FUNC friend constexpr with_device_op operator&&(const with_device_op&, const with_device_op&) @@ -37,6 +37,27 @@ __global__ void test_global_kernel() assert(f({}, {})); } +#if _CCCL_TILE_COMPILATION() +// ensure that we allow `__tile__` functions too +struct with_tile_op +{ + TEST_TILE_FUNC friend constexpr with_tile_op operator&&(const with_tile_op&, const with_tile_op&) + { + return {}; + } + TEST_TILE_FUNC constexpr operator bool() const + { + return true; + } +}; + +__tile_global__ void test_tile_kernel() +{ + const cuda::std::logical_and f; + assert(f({}, {})); +} +#endif // _CCCL_TILE_COMPILATION() + int main(int, char**) { using F = cuda::std::logical_and; diff --git a/libcudacxx/test/libcudacxx/std/utilities/function.objects/logical.operations/logical_not.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/function.objects/logical.operations/logical_not.pass.cpp index e6c92b8bece..e5afda7d003 100644 --- a/libcudacxx/test/libcudacxx/std/utilities/function.objects/logical.operations/logical_not.pass.cpp +++ b/libcudacxx/test/libcudacxx/std/utilities/function.objects/logical.operations/logical_not.pass.cpp @@ -18,7 +18,7 @@ #include "test_macros.h" -// ensure that we allow `TEST_DEVICE_FUNC` functions too +// ensure that we allow `__device__` functions too struct with_device_op { TEST_DEVICE_FUNC friend constexpr with_device_op operator!(const with_device_op&) @@ -37,6 +37,27 @@ __global__ void test_global_kernel() assert(f({})); } +#if _CCCL_TILE_COMPILATION() +// ensure that we allow `__tile__` functions too +struct with_tile_op +{ + TEST_TILE_FUNC friend constexpr with_tile_op operator!(const with_tile_op&) + { + return {}; + } + TEST_TILE_FUNC constexpr operator bool() const + { + return true; + } +}; + +__tile_global__ void test_tile_kernel() +{ + const cuda::std::logical_not f; + assert(f({})); +} +#endif // _CCCL_TILE_COMPILATION() + int main(int, char**) { using F = cuda::std::logical_not; diff --git a/libcudacxx/test/libcudacxx/std/utilities/function.objects/logical.operations/logical_or.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/function.objects/logical.operations/logical_or.pass.cpp index 97c9d61ee21..df01c3376d5 100644 --- a/libcudacxx/test/libcudacxx/std/utilities/function.objects/logical.operations/logical_or.pass.cpp +++ b/libcudacxx/test/libcudacxx/std/utilities/function.objects/logical.operations/logical_or.pass.cpp @@ -18,7 +18,7 @@ #include "test_macros.h" -// ensure that we allow `TEST_DEVICE_FUNC` functions too +// ensure that we allow `__device__` functions too struct with_device_op { TEST_DEVICE_FUNC friend constexpr with_device_op operator||(const with_device_op&, const with_device_op&) @@ -37,6 +37,27 @@ __global__ void test_global_kernel() assert(f({}, {})); } +#if _CCCL_TILE_COMPILATION() +// ensure that we allow `__tile__` functions too +struct with_tile_op +{ + TEST_TILE_FUNC friend constexpr with_tile_op operator||(const with_tile_op&, const with_tile_op&) + { + return {}; + } + TEST_TILE_FUNC constexpr operator bool() const + { + return true; + } +}; + +__tile_global__ void test_tile_kernel() +{ + const cuda::std::logical_or f; + assert(f({}, {})); +} +#endif // _CCCL_TILE_COMPILATION() + int main(int, char**) { using F = cuda::std::logical_or; diff --git a/libcudacxx/test/libcudacxx/std/utilities/function.objects/negators/binary_negate.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/function.objects/negators/binary_negate.pass.cpp index 16e4e7ea88b..95488e50da5 100644 --- a/libcudacxx/test/libcudacxx/std/utilities/function.objects/negators/binary_negate.pass.cpp +++ b/libcudacxx/test/libcudacxx/std/utilities/function.objects/negators/binary_negate.pass.cpp @@ -20,7 +20,7 @@ #include "test_macros.h" -// ensure that we allow `TEST_DEVICE_FUNC` functions too +// ensure that we allow `__device__` functions too struct with_device_op { using first_argument_type = int; @@ -38,6 +38,26 @@ __global__ void test_global_kernel() assert(!f(36, 36)); } +#if _CCCL_TILE_COMPILATION() +// ensure that we allow `__tile__` functions too +struct with_tile_op +{ + using first_argument_type = int; + using second_argument_type = int; + using result_type = bool; + TEST_TILE_FUNC constexpr bool operator()(const int& lhs, const int& rhs) const + { + return lhs && rhs; + } +}; + +__tile_global__ void test_tile_kernel() +{ + const cuda::std::binary_negate f{with_tile_op{}}; + assert(!f(36, 36)); +} +#endif // _CCCL_TILE_COMPILATION() + int main(int, char**) { using F = cuda::std::binary_negate>; diff --git a/libcudacxx/test/libcudacxx/std/utilities/function.objects/negators/unary_negate.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/function.objects/negators/unary_negate.pass.cpp index 2d9f6ca3eb2..9374c07c8f6 100644 --- a/libcudacxx/test/libcudacxx/std/utilities/function.objects/negators/unary_negate.pass.cpp +++ b/libcudacxx/test/libcudacxx/std/utilities/function.objects/negators/unary_negate.pass.cpp @@ -20,7 +20,7 @@ #include "test_macros.h" -// ensure that we allow `TEST_DEVICE_FUNC` functions too +// ensure that we allow `__device__` functions too struct with_device_op { using argument_type = int; @@ -37,6 +37,25 @@ __global__ void test_global_kernel() assert(!f(36)); } +#if _CCCL_TILE_COMPILATION() +// ensure that we allow `__tile__` functions too +struct with_tile_op +{ + using argument_type = int; + using result_type = bool; + TEST_TILE_FUNC constexpr bool operator()(const int&) const + { + return true; + } +}; + +__tile_global__ void test_tile_kernel() +{ + const cuda::std::unary_negate f{with_tile_op{}}; + assert(!f(36)); +} +#endif // _CCCL_TILE_COMPILATION() + int main(int, char**) { using F = cuda::std::unary_negate>; diff --git a/libcudacxx/test/libcudacxx/std/utilities/time/time.traits/time.traits.duration_values/max.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/time/time.traits/time.traits.duration_values/max.pass.cpp index b09fb14f62a..3a667fc5037 100644 --- a/libcudacxx/test/libcudacxx/std/utilities/time/time.traits/time.traits.duration_values/max.pass.cpp +++ b/libcudacxx/test/libcudacxx/std/utilities/time/time.traits/time.traits.duration_values/max.pass.cpp @@ -17,9 +17,6 @@ #include "../../rep.h" #include "test_macros.h" -#ifndef TEST_DEVICE_FUNC -# error whomp whomp -#endif int main(int, char**) { diff --git a/libcudacxx/test/support/host_device_types.h b/libcudacxx/test/support/host_device_types.h index dfe3acab7ab..edcb3aaccda 100644 --- a/libcudacxx/test/support/host_device_types.h +++ b/libcudacxx/test/support/host_device_types.h @@ -132,69 +132,131 @@ struct device_only_type { int val_; - TEST_DEVICE_FUNC device_only_type(const int val = 0) noexcept + __device__ device_only_type(const int val = 0) noexcept : val_(val) {} - TEST_DEVICE_FUNC device_only_type(cuda::std::initializer_list, const int val) noexcept + __device__ device_only_type(cuda::std::initializer_list, const int val) noexcept : val_(val) {} - TEST_DEVICE_FUNC device_only_type(const device_only_type& other) noexcept + __device__ device_only_type(const device_only_type& other) noexcept : val_(other.val_) {} - TEST_DEVICE_FUNC device_only_type(device_only_type&& other) noexcept + __device__ device_only_type(device_only_type&& other) noexcept : val_(cuda::std::exchange(other.val_, -1)) {} - TEST_DEVICE_FUNC device_only_type& operator=(const device_only_type& other) noexcept + __device__ device_only_type& operator=(const device_only_type& other) noexcept { val_ = other.val_; return *this; } - TEST_DEVICE_FUNC device_only_type& operator=(device_only_type&& other) noexcept + __device__ device_only_type& operator=(device_only_type&& other) noexcept { val_ = cuda::std::exchange(other.val_, -1); return *this; } - TEST_DEVICE_FUNC ~device_only_type() noexcept {} + __device__ ~device_only_type() noexcept {} - [[nodiscard]] TEST_DEVICE_FUNC friend bool - operator==(const device_only_type& lhs, const device_only_type& rhs) noexcept + [[nodiscard]] __device__ friend bool operator==(const device_only_type& lhs, const device_only_type& rhs) noexcept { return lhs.val_ == rhs.val_; } - [[nodiscard]] TEST_DEVICE_FUNC friend bool - operator!=(const device_only_type& lhs, const device_only_type& rhs) noexcept + [[nodiscard]] __device__ friend bool operator!=(const device_only_type& lhs, const device_only_type& rhs) noexcept { return lhs.val_ != rhs.val_; } - [[nodiscard]] TEST_DEVICE_FUNC friend bool operator<(const device_only_type& lhs, const device_only_type& rhs) noexcept + [[nodiscard]] __device__ friend bool operator<(const device_only_type& lhs, const device_only_type& rhs) noexcept { return lhs.val_ < rhs.val_; } - [[nodiscard]] TEST_DEVICE_FUNC friend bool - operator<=(const device_only_type& lhs, const device_only_type& rhs) noexcept + [[nodiscard]] __device__ friend bool operator<=(const device_only_type& lhs, const device_only_type& rhs) noexcept { return lhs.val_ <= rhs.val_; } - [[nodiscard]] TEST_DEVICE_FUNC friend bool operator>(const device_only_type& lhs, const device_only_type& rhs) noexcept + [[nodiscard]] __device__ friend bool operator>(const device_only_type& lhs, const device_only_type& rhs) noexcept { return lhs.val_ > rhs.val_; } - [[nodiscard]] TEST_DEVICE_FUNC friend bool - operator>=(const device_only_type& lhs, const device_only_type& rhs) noexcept + [[nodiscard]] __device__ friend bool operator>=(const device_only_type& lhs, const device_only_type& rhs) noexcept { return lhs.val_ >= rhs.val_; } - TEST_DEVICE_FUNC void swap(device_only_type& other) noexcept + __device__ void swap(device_only_type& other) noexcept { cuda::std::swap(val_, other.val_); } }; #endif // _CCCL_CUDA_COMPILATION() +#if _CCCL_TILE_COMPILATION() +struct tile_only_type +{ + int val_; + + __tile__ tile_only_type(const int val = 0) noexcept + : val_(val) + {} + __tile__ tile_only_type(cuda::std::initializer_list, const int val) noexcept + : val_(val) + {} + + __tile__ tile_only_type(const tile_only_type& other) noexcept + : val_(other.val_) + {} + __tile__ tile_only_type(tile_only_type&& other) noexcept + : val_(cuda::std::exchange(other.val_, -1)) + {} + + __tile__ tile_only_type& operator=(const tile_only_type& other) noexcept + { + val_ = other.val_; + return *this; + } + + __tile__ tile_only_type& operator=(tile_only_type&& other) noexcept + + { + val_ = cuda::std::exchange(other.val_, -1); + return *this; + } + + __tile__ ~tile_only_type() noexcept {} + + [[nodiscard]] __tile__ friend bool operator==(const tile_only_type& lhs, const tile_only_type& rhs) noexcept + { + return lhs.val_ == rhs.val_; + } + [[nodiscard]] __tile__ friend bool operator!=(const tile_only_type& lhs, const tile_only_type& rhs) noexcept + { + return lhs.val_ != rhs.val_; + } + [[nodiscard]] __tile__ friend bool operator<(const tile_only_type& lhs, const tile_only_type& rhs) noexcept + { + return lhs.val_ < rhs.val_; + } + [[nodiscard]] __tile__ friend bool operator<=(const tile_only_type& lhs, const tile_only_type& rhs) noexcept + { + return lhs.val_ <= rhs.val_; + } + [[nodiscard]] __tile__ friend bool operator>(const tile_only_type& lhs, const tile_only_type& rhs) noexcept + { + return lhs.val_ > rhs.val_; + } + [[nodiscard]] __tile__ friend bool operator>=(const tile_only_type& lhs, const tile_only_type& rhs) noexcept + { + return lhs.val_ >= rhs.val_; + } + + __tile__ void swap(tile_only_type& other) noexcept + { + cuda::std::swap(val_, other.val_); + } +}; +#endif // _CCCL_TILE_COMPILATION() + #endif // TEST_SUPPORT_HOST_DEVICE_TYPES diff --git a/libcudacxx/test/support/test_macros.h b/libcudacxx/test/support/test_macros.h index 30cdf97a5f5..fbc1d098949 100644 --- a/libcudacxx/test/support/test_macros.h +++ b/libcudacxx/test/support/test_macros.h @@ -26,8 +26,10 @@ #define TEST_NV_DIAG_SUPPRESS(...) _CCCL_BEGIN_NV_DIAG_SUPPRESS(__VA_ARGS__) // Use the CCCL host device function -#define TEST_FUNC _CCCL_HOST_DEVICE _CCCL_TILE -#define TEST_DEVICE_FUNC _CCCL_DEVICE _CCCL_TILE +#define TEST_FUNC _CCCL_HOST_DEVICE _CCCL_TILE +#define TEST_DEVICE_FUNC _CCCL_DEVICE +#define TEST_TILE_FUNC _CCCL_TILE +#define TEST_TILE_DEVICE_FUNC _CCCL_TILE _CCCL_DEVICE // Use the CCCL C++ dialect detection #define TEST_STD_VER _CCCL_STD_VER