diff --git a/src/ATen/native/xpu/UpSample.h b/src/ATen/native/xpu/UpSample.h index 802992ce2d..d7d1e237ee 100644 --- a/src/ATen/native/xpu/UpSample.h +++ b/src/ATen/native/xpu/UpSample.h @@ -185,7 +185,7 @@ static inline accscalar_t area_pixel_compute_source_index( struct NearestIndexOp { int operator()(const float scale, int dst_index, int input_size) const { const int src_index = - min(static_cast(floorf((dst_index)*scale)), input_size - 1); + min(static_cast(sycl::floor((dst_index)*scale)), input_size - 1); return src_index; } }; @@ -193,7 +193,7 @@ struct NearestIndexOp { struct NearestExactIndexOp { int operator()(const float scale, int dst_index, int input_size) const { const int src_index = min( - static_cast(floorf((dst_index + static_cast(0.5)) * scale)), + static_cast(sycl::floor((dst_index + static_cast(0.5)) * scale)), input_size - 1); return src_index; } diff --git a/src/ATen/native/xpu/sycl/BinaryDivFloorKernel.cpp b/src/ATen/native/xpu/sycl/BinaryDivFloorKernel.cpp index d8a02adeed..6e27e20f7c 100644 --- a/src/ATen/native/xpu/sycl/BinaryDivFloorKernel.cpp +++ b/src/ATen/native/xpu/sycl/BinaryDivFloorKernel.cpp @@ -53,7 +53,7 @@ struct DivFloorWithScalarFunctor { scalar_t floordiv; if (div != 0) { - floordiv = std::floor(div); + floordiv = sycl::floor(div); if (div - floordiv > scalar_t(0.5)) { floordiv += scalar_t(1.0); } diff --git a/src/ATen/native/xpu/sycl/DeformConv2dKernels.cpp b/src/ATen/native/xpu/sycl/DeformConv2dKernels.cpp index bd3ed4d7aa..ed04c5209a 100644 --- a/src/ATen/native/xpu/sycl/DeformConv2dKernels.cpp +++ b/src/ATen/native/xpu/sycl/DeformConv2dKernels.cpp @@ -78,6 +78,7 @@ DISABLE_RETURN_TYPE_WARNING_BEGIN // clang-format on #include +#include #include #include #include @@ -101,8 +102,9 @@ scalar_t bilinear_interpolate( return 0; } - index_t h_low = std::floor(h); - index_t w_low = std::floor(w); + using opmath_t = at::opmath_type; + index_t h_low = sycl::floor(static_cast(h)); + index_t w_low = sycl::floor(static_cast(w)); index_t h_high = h_low + 1; index_t w_high = w_low + 1; @@ -609,8 +611,9 @@ scalar_t get_coordinate_weight( scalar_t y, scalar_t x, bool is_y_direction) { - index_t y_l = std::floor(y); - index_t x_l = std::floor(x); + using opmath_t = at::opmath_type; + index_t y_l = sycl::floor(static_cast(y)); + index_t x_l = sycl::floor(static_cast(x)); index_t y_h = y_l + 1; index_t x_h = x_l + 1; diff --git a/src/ATen/native/xpu/sycl/GridSampler.cpp b/src/ATen/native/xpu/sycl/GridSampler.cpp index 4d3d9b4233..79c3a5881b 100644 --- a/src/ATen/native/xpu/sycl/GridSampler.cpp +++ b/src/ATen/native/xpu/sycl/GridSampler.cpp @@ -56,8 +56,8 @@ struct GridSampler2dKernelFunctor { if (interpolation_mode_ == GridSamplerInterpolation::Bilinear) { // get NE, NW, SE, SW pixel values from (x, y) - index_t ix_nw = static_cast(std::floor(ix)); - index_t iy_nw = static_cast(std::floor(iy)); + index_t ix_nw = static_cast(sycl::floor(ix)); + index_t iy_nw = static_cast(sycl::floor(iy)); index_t ix_ne = ix_nw + 1; index_t iy_ne = iy_nw; index_t ix_sw = ix_nw; @@ -113,8 +113,8 @@ struct GridSampler2dKernelFunctor { ix = grid_sampler_unnormalize(x, inp_W_, align_corners_); iy = grid_sampler_unnormalize(y, inp_H_, align_corners_); - opmath_t ix_nw = std::floor(ix); - opmath_t iy_nw = std::floor(iy); + opmath_t ix_nw = sycl::floor(ix); + opmath_t iy_nw = sycl::floor(iy); const opmath_t tx = ix - ix_nw; const opmath_t ty = iy - iy_nw; @@ -370,6 +370,7 @@ Tensor grid_sampler_2d_kernel( template struct GridSampler2dBackwardKernelFunctor { + using opmath_t = at::opmath_type; void operator()(sycl::nd_item<1> item) const { auto index = item.get_global_linear_id(); if (index >= nthreads_) @@ -392,8 +393,8 @@ struct GridSampler2dBackwardKernelFunctor { if (interpolation_mode_ == GridSamplerInterpolation::Bilinear) { // get NE, NW, SE, SW pixel values from (x, y) - index_t ix_nw = static_cast(std::floor(ix)); - index_t iy_nw = static_cast(std::floor(iy)); + index_t ix_nw = static_cast(sycl::floor(static_cast(ix))); + index_t iy_nw = static_cast(sycl::floor(static_cast(iy))); index_t ix_ne = ix_nw + 1; index_t iy_ne = iy_nw; index_t ix_sw = ix_nw; @@ -530,8 +531,8 @@ struct GridSampler2dBackwardKernelFunctor { iy = grid_sampler_unnormalize_set_grad( y, inp_H_, align_corners_, &giy_mult); - scalar_t ix_nw = std::floor(ix); - scalar_t iy_nw = std::floor(iy); + scalar_t ix_nw = static_cast(sycl::floor(static_cast(ix))); + scalar_t iy_nw = static_cast(sycl::floor(static_cast(iy))); const scalar_t tx = ix - ix_nw; const scalar_t ty = iy - iy_nw; @@ -882,9 +883,9 @@ struct GridSampler3dKernelFunctor { // get corner pixel values from (x, y, z) // for 4d, we used north-east-south-west // for 5d, we add top-bottom - index_t ix_tnw = static_cast(std::floor(ix)); - index_t iy_tnw = static_cast(std::floor(iy)); - index_t iz_tnw = static_cast(std::floor(iz)); + index_t ix_tnw = static_cast(sycl::floor(ix)); + index_t iy_tnw = static_cast(sycl::floor(iy)); + index_t iz_tnw = static_cast(sycl::floor(iz)); index_t ix_tne = ix_tnw + 1; index_t iy_tne = iy_tnw; @@ -1227,6 +1228,7 @@ Tensor grid_sampler_3d_kernel( template struct GridSampler3dBackwardKernelFunctor { + using opmath_t = at::opmath_type; void operator()(sycl::nd_item<1> item) const { auto index = item.get_global_linear_id(); if (index >= nthreads_) @@ -1257,9 +1259,9 @@ struct GridSampler3dBackwardKernelFunctor { // get corner pixel values from (x, y, z) // for 4d, we used north-east-south-west // for 5d, we add top-bottom - index_t ix_tnw = static_cast(std::floor(ix)); - index_t iy_tnw = static_cast(std::floor(iy)); - index_t iz_tnw = static_cast(std::floor(iz)); + index_t ix_tnw = static_cast(sycl::floor(static_cast(ix))); + index_t iy_tnw = static_cast(sycl::floor(static_cast(iy))); + index_t iz_tnw = static_cast(sycl::floor(static_cast(iz))); index_t ix_tne = ix_tnw + 1; index_t iy_tne = iy_tnw; diff --git a/src/ATen/native/xpu/sycl/Philox4x32.h b/src/ATen/native/xpu/sycl/Philox4x32.h index 9a20c5d108..f2627c90d6 100644 --- a/src/ATen/native/xpu/sycl/Philox4x32.h +++ b/src/ATen/native/xpu/sycl/Philox4x32.h @@ -569,7 +569,7 @@ static inline unsigned int rand_poisson_gammainc( while (true) { y = rand_uniform(state); x = pgammaincinv(lambda, y); - x = floorf(x); + x = sycl::floor(x); z = rand_uniform(state); v = (pgammainc(lambda, x + 1.0f) - pgammainc(lambda, x)) * 1.3f; z = z * v; diff --git a/src/ATen/native/xpu/sycl/PsRoiPoolKernels.cpp b/src/ATen/native/xpu/sycl/PsRoiPoolKernels.cpp index 7f572baf83..f02bfd1f7b 100644 --- a/src/ATen/native/xpu/sycl/PsRoiPoolKernels.cpp +++ b/src/ATen/native/xpu/sycl/PsRoiPoolKernels.cpp @@ -17,6 +17,7 @@ DISABLE_RETURN_TYPE_WARNING_BEGIN // clang-format on #include +#include #include #include #include @@ -52,11 +53,13 @@ struct PsRoiPoolForwardKernel { T bin_size_h = static_cast(roi_height) / static_cast(pooled_height_); T bin_size_w = static_cast(roi_width) / static_cast(pooled_width_); + + using opmath_t = at::opmath_type; int hstart = - static_cast(std::floor(static_cast(ph) * bin_size_h)); + static_cast(sycl::floor(static_cast(static_cast(ph) * bin_size_h))); int wstart = - static_cast(std::floor(static_cast(pw) * bin_size_w)); + static_cast(sycl::floor(static_cast(static_cast(pw) * bin_size_w))); int hend = static_cast(std::ceil(static_cast(ph + 1) * bin_size_h)); int wend = @@ -204,11 +207,13 @@ struct PsRoiPoolBackwardKernel { T bin_size_h = static_cast(roi_height) / static_cast(pooled_height_); T bin_size_w = static_cast(roi_width) / static_cast(pooled_width_); + + using opmath_t = at::opmath_type; int hstart = - static_cast(std::floor(static_cast(ph) * bin_size_h)); + static_cast(sycl::floor(static_cast(static_cast(ph) * bin_size_h))); int wstart = - static_cast(std::floor(static_cast(pw) * bin_size_w)); + static_cast(sycl::floor(static_cast(static_cast(pw) * bin_size_w))); int hend = static_cast(std::ceil(static_cast(ph + 1) * bin_size_h)); int wend = diff --git a/src/ATen/native/xpu/sycl/RoiPoolKernels.cpp b/src/ATen/native/xpu/sycl/RoiPoolKernels.cpp index 6c055add3d..9feed1d652 100644 --- a/src/ATen/native/xpu/sycl/RoiPoolKernels.cpp +++ b/src/ATen/native/xpu/sycl/RoiPoolKernels.cpp @@ -17,6 +17,7 @@ DISABLE_RETURN_TYPE_WARNING_BEGIN // clang-format on #include +#include #include #include #include @@ -48,11 +49,13 @@ struct RoiPoolForwardKernel { T bin_size_h = static_cast(roi_height) / static_cast(pooled_height_); T bin_size_w = static_cast(roi_width) / static_cast(pooled_width_); + + using opmath_t = at::opmath_type; int hstart = - static_cast(std::floor(static_cast(ph) * bin_size_h)); + static_cast(sycl::floor(static_cast(static_cast(ph) * bin_size_h))); int wstart = - static_cast(std::floor(static_cast(pw) * bin_size_w)); + static_cast(sycl::floor(static_cast(static_cast(pw) * bin_size_w))); int hend = static_cast(std::ceil(static_cast(ph + 1) * bin_size_h)); int wend = diff --git a/src/ATen/native/xpu/sycl/UnaryFractionKernels.cpp b/src/ATen/native/xpu/sycl/UnaryFractionKernels.cpp index 778ef97ffa..e9d8fa09ed 100644 --- a/src/ATen/native/xpu/sycl/UnaryFractionKernels.cpp +++ b/src/ATen/native/xpu/sycl/UnaryFractionKernels.cpp @@ -9,6 +9,7 @@ */ #include +#include #include #include @@ -172,7 +173,8 @@ void round_decimals_kernel(TensorIteratorBase& iter, int64_t decimals) { template struct FloorFunctor { scalar_t operator()(scalar_t a) const { - return std::floor(a); + using opmath_t = at::opmath_type; + return sycl::floor(static_cast(a)); } }; diff --git a/src/ATen/native/xpu/sycl/UpSampleBicubic2dKernels.cpp b/src/ATen/native/xpu/sycl/UpSampleBicubic2dKernels.cpp index 93fcb4e9b2..74dff96e33 100644 --- a/src/ATen/native/xpu/sycl/UpSampleBicubic2dKernels.cpp +++ b/src/ATen/native/xpu/sycl/UpSampleBicubic2dKernels.cpp @@ -53,12 +53,12 @@ struct UpsampleBicubic2dKernelFunctor { // Interpolation kernel accscalar_t real_x = area_pixel_compute_source_index( width_scale_, output_x, align_corners_, /*cubic=*/true); - int in_x = floorf(real_x); + int in_x = sycl::floor(real_x); accscalar_t t_x = real_x - in_x; accscalar_t real_y = area_pixel_compute_source_index( height_scale_, output_y, align_corners_, /*cubic=*/true); - int in_y = floorf(real_y); + int in_y = sycl::floor(real_y); accscalar_t t_y = real_y - in_y; for (int n = 0; n < nbatch; n++) { for (int c = 0; c < channels; c++) { @@ -223,12 +223,12 @@ struct UpsampleBicubic2dBackwardKernelFunctor { accscalar_t real_x = area_pixel_compute_source_index( width_scale_, output_x, align_corners_, /*cubic=*/true); - int input_x = floorf(real_x); + int input_x = sycl::floor(real_x); accscalar_t t_x = real_x - input_x; accscalar_t real_y = area_pixel_compute_source_index( height_scale_, output_y, align_corners_, /*cubic=*/true); - int input_y = floorf(real_y); + int input_y = sycl::floor(real_y); accscalar_t t_y = real_y - input_y; accscalar_t x_coeffs[4];