Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion libcudacxx/include/cuda/__memory/align_down.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
_CCCL_BEGIN_NAMESPACE_CUDA

template <typename _Tp>
[[nodiscard]] _CCCL_API _Tp* align_down(_Tp* __ptr, ::cuda::std::size_t __alignment) noexcept
[[nodiscard]] _CCCL_HOST_DEVICE_API _Tp* align_down(_Tp* __ptr, ::cuda::std::size_t __alignment) noexcept
{
using ::cuda::std::uintptr_t;
_CCCL_ASSERT(::cuda::__is_valid_alignment<_Tp>(__alignment), "invalid alignment");
Expand Down
2 changes: 1 addition & 1 deletion libcudacxx/include/cuda/__memory/align_up.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
_CCCL_BEGIN_NAMESPACE_CUDA

template <typename _Tp>
[[nodiscard]] _CCCL_API inline _Tp* align_up(_Tp* __ptr, ::cuda::std::size_t __alignment) noexcept
[[nodiscard]] _CCCL_HOST_DEVICE_API inline _Tp* align_up(_Tp* __ptr, ::cuda::std::size_t __alignment) noexcept
{
using ::cuda::std::uintptr_t;
_CCCL_ASSERT(::cuda::__is_valid_alignment<_Tp>(__alignment), "invalid alignment");
Expand Down
8 changes: 4 additions & 4 deletions libcudacxx/include/cuda/__memory/ptr_rebind.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
_CCCL_BEGIN_NAMESPACE_CUDA

template <typename _Up, typename _Tp>
[[nodiscard]] _CCCL_API _Up* ptr_rebind(_Tp* __ptr) noexcept
[[nodiscard]] _CCCL_HOST_DEVICE_API _Up* ptr_rebind(_Tp* __ptr) noexcept
{
if constexpr (::cuda::std::is_same_v<_Up, _Tp>) // also handle _Tp == _Up == void
{
Expand All @@ -51,19 +51,19 @@ template <typename _Up, typename _Tp>
}

template <typename _Up, typename _Tp>
[[nodiscard]] _CCCL_API const _Up* ptr_rebind(const _Tp* __ptr) noexcept
[[nodiscard]] _CCCL_HOST_DEVICE_API const _Up* ptr_rebind(const _Tp* __ptr) noexcept
{
return ::cuda::ptr_rebind<const _Up>(const_cast<_Tp*>(__ptr));
}

template <typename _Up, typename _Tp>
[[nodiscard]] _CCCL_API volatile _Up* ptr_rebind(volatile _Tp* __ptr) noexcept
[[nodiscard]] _CCCL_HOST_DEVICE_API volatile _Up* ptr_rebind(volatile _Tp* __ptr) noexcept
{
return ::cuda::ptr_rebind<volatile _Up>(const_cast<_Tp*>(__ptr));
}

template <typename _Up, typename _Tp>
[[nodiscard]] _CCCL_API const volatile _Up* ptr_rebind(const volatile _Tp* __ptr) noexcept
[[nodiscard]] _CCCL_HOST_DEVICE_API const volatile _Up* ptr_rebind(const volatile _Tp* __ptr) noexcept
{
return ::cuda::ptr_rebind<const volatile _Up>(const_cast<_Tp*>(__ptr));
}
Expand Down
12 changes: 6 additions & 6 deletions libcudacxx/include/cuda/std/__cccl/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,12 +111,6 @@
# define _CCCL_CUDA_COMPILATION() 0
#endif // ^^^ not compiling .cu file ^^^

#ifdef __CUDACC_TILE__
# define _CCCL_TILE_COMPILATION() 1
#else // ^^^ compiling .cu file in tile mode ^^^ / vvv not compiling in tile mode vvv
# define _CCCL_TILE_COMPILATION() 0
#endif // ^^^ not compiling .cu file ^^^

// The CUDA compiler version shares the implementation with the C++ compiler
#define _CCCL_CUDA_COMPILER_MAKE_VERSION(_MAJOR, _MINOR) _CCCL_COMPILER_MAKE_VERSION(_MAJOR, _MINOR)
#define _CCCL_CUDA_COMPILER(...) _CCCL_VERSION_COMPARE(_CCCL_CUDA_COMPILER_, _CCCL_CUDA_COMPILER_##__VA_ARGS__)
Expand Down Expand Up @@ -157,6 +151,12 @@
# define _CCCL_DEVICE_COMPILATION() 0
#endif // ^^^ not compiling device code ^^^

#if defined(__CUDACC_TILE__) && _CCCL_CUDA_COMPILER(NVCC, >, 13, 3)
# define _CCCL_TILE_COMPILATION() 1
#else // ^^^ compiling .cu file in tile mode ^^^ / vvv not compiling in tile mode vvv
# define _CCCL_TILE_COMPILATION() 0
#endif // ^^^ not compiling .cu file ^^^

#define _CCCL_CUDACC_MAKE_VERSION(_MAJOR, _MINOR) ((_MAJOR) * 1000 + (_MINOR) * 10)

// clang-cuda does not define __CUDACC_VER_MAJOR__ and friends. They are instead retrieved from the CUDA_VERSION macro
Expand Down
2 changes: 2 additions & 0 deletions libcudacxx/include/cuda/std/__cccl/visibility.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,11 +114,13 @@
# define _CCCL_HOST_DEVICE_API _CCCL_HOST_DEVICE
# define _CCCL_HOST_API _CCCL_HOST
# define _CCCL_DEVICE_API _CCCL_DEVICE
# define _CCCL_TILE_API _CCCL_TILE
#else // ^^^ _CCCL_COMPILER(NVHPC) ^^^ / vvv !_CCCL_COMPILER(NVHPC) vvv
# define _CCCL_API _CCCL_TILE _CCCL_HOST_DEVICE _CCCL_VISIBILITY_HIDDEN _CCCL_EXCLUDE_FROM_EXPLICIT_INSTANTIATION
# define _CCCL_HOST_DEVICE_API _CCCL_HOST_DEVICE _CCCL_VISIBILITY_HIDDEN _CCCL_EXCLUDE_FROM_EXPLICIT_INSTANTIATION
# define _CCCL_HOST_API _CCCL_HOST _CCCL_VISIBILITY_HIDDEN _CCCL_EXCLUDE_FROM_EXPLICIT_INSTANTIATION
# define _CCCL_DEVICE_API _CCCL_DEVICE _CCCL_VISIBILITY_HIDDEN _CCCL_EXCLUDE_FROM_EXPLICIT_INSTANTIATION
# define _CCCL_TILE_API _CCCL_TILE _CCCL_VISIBILITY_HIDDEN _CCCL_EXCLUDE_FROM_EXPLICIT_INSTANTIATION
#endif // !_CCCL_COMPILER(NVHPC)

//! @brief \c _CCCL_NODEBUG_API marks a function's visibility as hidden and causes
Expand Down
5 changes: 3 additions & 2 deletions libcudacxx/include/cuda/std/__mdspan/aligned_accessor.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,12 +77,13 @@ class aligned_accessor
return {};
}

_CCCL_API constexpr reference access(data_handle_type __p, size_t __i) const noexcept
_CCCL_HOST_DEVICE_API constexpr reference access(data_handle_type __p, size_t __i) const noexcept
{
return ::cuda::std::assume_aligned<byte_alignment>(__p)[__i];
}

_CCCL_API constexpr typename offset_policy::data_handle_type offset(data_handle_type __p, size_t __i) const noexcept
_CCCL_HOST_DEVICE_API constexpr typename offset_policy::data_handle_type
offset(data_handle_type __p, size_t __i) const noexcept
{
return ::cuda::std::assume_aligned<byte_alignment>(__p) + __i;
}
Expand Down
2 changes: 1 addition & 1 deletion libcudacxx/include/cuda/std/__memory/align.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ _CCCL_DIAG_SUPPRESS_MSVC(4146) // unary minus operator applied to unsigned type,

_CCCL_BEGIN_NAMESPACE_CUDA_STD

_CCCL_API inline void* align(size_t __alignment, size_t __size, void*& __ptr, size_t& __space)
_CCCL_HOST_DEVICE_API inline void* align(size_t __alignment, size_t __size, void*& __ptr, size_t& __space)
{
_CCCL_ASSERT(::cuda::__is_valid_alignment(__alignment), "cuda::std::align: invalid alignment");
if (__space < __size)
Expand Down
2 changes: 1 addition & 1 deletion libcudacxx/include/cuda/std/__memory/assume_aligned.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
_CCCL_BEGIN_NAMESPACE_CUDA_STD

template <size_t _Align, class _Tp>
[[nodiscard]] _CCCL_API constexpr _Tp* assume_aligned(_Tp* __ptr) noexcept
[[nodiscard]] _CCCL_HOST_DEVICE_API constexpr _Tp* assume_aligned(_Tp* __ptr) noexcept
{
static_assert(::cuda::__is_valid_alignment<_Tp>(_Align), "invalid _Align value for _Tp");
#if !defined(_CCCL_BUILTIN_IS_CONSTANT_EVALUATED)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@
_CCCL_BEGIN_NAMESPACE_CUDA_STD

template <typename _Tp>
[[nodiscard]] _CCCL_API _Tp* __runtime_assume_aligned(_Tp* __ptr, ::cuda::std::size_t __alignment) noexcept
[[nodiscard]] _CCCL_HOST_DEVICE_API _Tp*
__runtime_assume_aligned(_Tp* __ptr, [[maybe_unused]] ::cuda::std::size_t __alignment) noexcept
{
#if defined(_CCCL_BUILTIN_ASSUME_ALIGNED)
using _Up = remove_volatile_t<_Tp>;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,76 +40,72 @@ TEST_DIAG_SUPPRESS_GCC("-Wcomma-subscript")
#endif // TEST_COMPILER(GCC, >=, 10)

template <class MDS>
TEST_DEVICE_FUNC constexpr auto& access(MDS mds, int64_t i0)
TEST_TILE_DEVICE_FUNC constexpr auto& access(MDS mds, int64_t i0)
{
return mds[i0];
}

#if _CCCL_HAS_MULTIARG_OPERATOR_BRACKETS()
template <
class MDS,
class... Indices,
class = cuda::std::enable_if_t<
cuda::std::is_same_v<decltype(cuda::std::declval<MDS>()[cuda::std::declval<Indices>()...]), typename MDS::reference>,
int> = 0>
TEST_DEVICE_FUNC constexpr bool check_operator_constraints(MDS m, Indices... idxs)
template <class MDS, class... Indices>
requires requires(MDS mds, Indices... indices) { mds[indices...]; }
TEST_TILE_DEVICE_FUNC constexpr bool check_operator_constraints(MDS m, Indices... idxs)
{
unused(m[idxs...]);
return true;
}
#else // ^^^ _CCCL_HAS_MULTIARG_OPERATOR_BRACKETS() ^^^ / vvv !_CCCL_HAS_MULTIARG_OPERATOR_BRACKETS() vvv
template <
class MDS,
class Index,
class = cuda::std::enable_if_t<cuda::std::is_same<decltype(cuda::std::declval<MDS>()[cuda::std::declval<Index>()]),
typename MDS::reference>::value>>
TEST_DEVICE_FUNC constexpr bool check_operator_constraints(MDS m, Index idx)
template <class MDS,
class Index,
cuda::std::enable_if_t<cuda::std::is_same_v<decltype(cuda::std::declval<MDS>()[cuda::std::declval<Index>()]),
typename MDS::reference>,
int> = 0>
TEST_TILE_DEVICE_FUNC constexpr bool check_operator_constraints(MDS m, Index idx)
{
unused(m[idx]);
return true;
}
#endif // !_CCCL_HAS_MULTIARG_OPERATOR_BRACKETS()

template <class MDS, class... Indices>
TEST_DEVICE_FUNC constexpr bool check_operator_constraints(MDS, Indices...)
TEST_TILE_DEVICE_FUNC constexpr bool check_operator_constraints(MDS, Indices...)
{
return false;
}

#if _CCCL_HAS_MULTIARG_OPERATOR_BRACKETS()
template <class MDS>
TEST_DEVICE_FUNC constexpr auto& access(MDS mds)
TEST_TILE_DEVICE_FUNC constexpr auto& access(MDS mds)
{
return mds[];
}
template <class MDS>
TEST_DEVICE_FUNC constexpr auto& access(MDS mds, int64_t i0, int64_t i1)
TEST_TILE_DEVICE_FUNC constexpr auto& access(MDS mds, int64_t i0, int64_t i1)
{
return mds[i0, i1];
}
template <class MDS>
TEST_DEVICE_FUNC constexpr auto& access(MDS mds, int64_t i0, int64_t i1, int64_t i2)
TEST_TILE_DEVICE_FUNC constexpr auto& access(MDS mds, int64_t i0, int64_t i1, int64_t i2)
{
return mds[i0, i1, i2];
}
template <class MDS>
TEST_DEVICE_FUNC constexpr auto& access(MDS mds, int64_t i0, int64_t i1, int64_t i2, int64_t i3)
TEST_TILE_DEVICE_FUNC constexpr auto& access(MDS mds, int64_t i0, int64_t i1, int64_t i2, int64_t i3)
{
return mds[i0, i1, i2, i3];
}
#endif // _CCCL_HAS_MULTIARG_OPERATOR_BRACKETS()

// We must ensure that we do not try to access multiarg accessors
template <class MDS, class Arg, cuda::std::enable_if_t<(MDS::extents_type::rank() == 1), int> = 0>
TEST_DEVICE_FUNC constexpr void assert_access(MDS mds, Arg arg)
TEST_TILE_DEVICE_FUNC constexpr void assert_access(MDS mds, Arg arg)
{
int* ptr1 = &(mds.accessor().access(mds.data_handle(), mds.mapping()(arg)));
int* ptr2 = &access(mds, arg);
assert(ptr1 == ptr2);
}

template <class MDS, class... Args, cuda::std::enable_if_t<(MDS::extents_type::rank() == sizeof...(Args)), int> = 0>
TEST_DEVICE_FUNC constexpr void assert_access(MDS mds, Args... args)
TEST_TILE_DEVICE_FUNC constexpr void assert_access(MDS mds, Args... args)
{
#if _CCCL_HAS_MULTIARG_OPERATOR_BRACKETS()
int* ptr1 = &(mds.accessor().access(mds.data_handle(), mds.mapping()(args...)));
Expand All @@ -121,7 +117,7 @@ TEST_DEVICE_FUNC constexpr void assert_access(MDS mds, Args... args)
}

template <class MDS, class... Args, cuda::std::enable_if_t<(MDS::extents_type::rank() == sizeof...(Args)), int> = 0>
TEST_DEVICE_FUNC constexpr void iterate(MDS mds, Args... args)
TEST_TILE_DEVICE_FUNC constexpr void iterate(MDS mds, Args... args)
{
int* ptr1 = &(mds.accessor().access(mds.data_handle(), mds.mapping()(args...)));
assert_access(mds, args...);
Expand All @@ -134,7 +130,7 @@ TEST_DEVICE_FUNC constexpr void iterate(MDS mds, Args... args)
}

template <class MDS, class... Args, cuda::std::enable_if_t<(MDS::extents_type::rank() != sizeof...(Args)), int> = 0>
TEST_DEVICE_FUNC constexpr void iterate(MDS mds, Args... args)
TEST_TILE_DEVICE_FUNC constexpr void iterate(MDS mds, Args... args)
{
constexpr int r = static_cast<int>(MDS::extents_type::rank()) - 1 - static_cast<int>(sizeof...(Args));
for (typename MDS::index_type i = 0; i < mds.extents().extent(r); i++)
Expand All @@ -144,7 +140,7 @@ TEST_DEVICE_FUNC constexpr void iterate(MDS mds, Args... args)
}

template <class Mapping>
TEST_DEVICE_FUNC constexpr void test_iteration(Mapping m)
TEST_TILE_DEVICE_FUNC constexpr void test_iteration(Mapping m)
{
cuda::std::array<int, 1024> data{};
using MDS = cuda::device_mdspan<int, typename Mapping::extents_type, typename Mapping::layout_type>;
Expand All @@ -153,7 +149,7 @@ TEST_DEVICE_FUNC constexpr void test_iteration(Mapping m)
}

template <class Layout>
TEST_DEVICE_FUNC constexpr void test_layout()
TEST_TILE_DEVICE_FUNC constexpr void test_layout()
{
[[maybe_unused]] constexpr size_t D = cuda::std::dynamic_extent;
test_iteration(construct_mapping(Layout(), cuda::std::extents<unsigned, D>(1)));
Expand Down Expand Up @@ -300,7 +296,7 @@ TEST_DEVICE_FUNC constexpr void test_layout()
}

template <class Layout>
TEST_DEVICE_FUNC constexpr void test_layout_large()
TEST_TILE_DEVICE_FUNC constexpr void test_layout_large()
{
[[maybe_unused]] constexpr size_t D = cuda::std::dynamic_extent;
test_iteration(construct_mapping(Layout(), cuda::std::extents<int64_t, D, 4, D, D>(3, 5, 6)));
Expand All @@ -309,17 +305,17 @@ TEST_DEVICE_FUNC constexpr void test_layout_large()

// mdspan::operator[] casts to index_type before calling mapping
// mapping requirements only require the index operator to mixed integer types not anything convertible to index_type
TEST_DEVICE_FUNC constexpr void test_index_cast_happens() {}
TEST_TILE_DEVICE_FUNC constexpr void test_index_cast_happens() {}

TEST_DEVICE_FUNC constexpr bool test()
TEST_TILE_DEVICE_FUNC constexpr bool test()
{
test_layout<cuda::std::layout_left>();
test_layout<cuda::std::layout_right>();
test_layout<layout_wrapping_integral<4>>();
return true;
}

TEST_DEVICE_FUNC void test_device()
TEST_TILE_DEVICE_FUNC void test_device()
{
test();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
#include "test_macros.h"

template <class H, class M, class A>
TEST_DEVICE_FUNC constexpr void test_mdspan_types(const H& handle, const M& map, const A& acc)
TEST_TILE_DEVICE_FUNC constexpr void test_mdspan_types(const H& handle, const M& map, const A& acc)
{
using MDS =
cuda::shared_memory_mdspan<typename A::element_type, typename M::extents_type, typename M::layout_type, A>;
Expand All @@ -45,7 +45,7 @@ TEST_DEVICE_FUNC constexpr void test_mdspan_types(const H& handle, const M& map,
}

template <class H, class L, class A>
TEST_DEVICE_FUNC constexpr void mixin_extents(const H& handle, const L& layout, const A& acc)
TEST_TILE_DEVICE_FUNC constexpr void mixin_extents(const H& handle, const L& layout, const A& acc)
{
[[maybe_unused]] constexpr size_t D = cuda::std::dynamic_extent;
test_mdspan_types(handle, construct_mapping(layout, cuda::std::extents<int>()), acc);
Expand All @@ -57,7 +57,7 @@ TEST_DEVICE_FUNC constexpr void mixin_extents(const H& handle, const L& layout,
}

template <class H, class A>
TEST_DEVICE_FUNC constexpr void mixin_layout(const H& handle, const A& acc)
TEST_TILE_DEVICE_FUNC constexpr void mixin_layout(const H& handle, const A& acc)
{
// make sure we test a trivially assignable mapping
static_assert(cuda::std::is_trivially_assignable_v<
Expand All @@ -73,7 +73,7 @@ TEST_DEVICE_FUNC constexpr void mixin_layout(const H& handle, const A& acc)
}

template <class T, cuda::std::enable_if_t<cuda::std::is_default_constructible_v<T>, int> = 0>
TEST_DEVICE_FUNC constexpr void mixin_accessor()
TEST_TILE_DEVICE_FUNC constexpr void mixin_accessor()
{
cuda::std::array<T, 1024> elements{42};
// make sure we test trivially constructible accessor and data_handle
Expand All @@ -83,7 +83,7 @@ TEST_DEVICE_FUNC constexpr void mixin_accessor()
}

template <class T, cuda::std::enable_if_t<!cuda::std::is_default_constructible_v<T>, int> = 0>
TEST_DEVICE_FUNC void mixin_accessor()
TEST_TILE_DEVICE_FUNC void mixin_accessor()
{
ElementPool<T, 1024> elements;
// make sure we test trivially constructible accessor and data_handle
Expand All @@ -92,15 +92,15 @@ TEST_DEVICE_FUNC void mixin_accessor()
mixin_layout(elements.get_ptr(), cuda::std::default_accessor<T>());
}

TEST_DEVICE_FUNC void test()
TEST_TILE_DEVICE_FUNC void test()
{
mixin_accessor<int>();
mixin_accessor<const int>();
mixin_accessor<double>();
mixin_accessor<const double>();
}

TEST_DEVICE_FUNC void test_evil()
TEST_TILE_DEVICE_FUNC void test_evil()
{
mixin_accessor<MinimalElementType>();
mixin_accessor<const MinimalElementType>();
Expand Down
Loading
Loading