diff --git a/CMakeLists.txt b/CMakeLists.txt index 9ad41245..03ab40a5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -22,9 +22,9 @@ if(CMAKE_CUDA_ARCHITECTURES) endif() project(cvcuda - LANGUAGES C CXX - VERSION 0.16.0 - DESCRIPTION "CUDA-accelerated Computer Vision algorithms" + LANGUAGES C CXX + VERSION 0.15.0 + DESCRIPTION "CUDA-accelerated Computer Vision algorithms" ) # Make sure the cuda host compiler agrees with what we're using, diff --git a/src/cvcuda/priv/OpResize.cu b/src/cvcuda/priv/OpResize.cu index 448ff958..e29919b0 100644 --- a/src/cvcuda/priv/OpResize.cu +++ b/src/cvcuda/priv/OpResize.cu @@ -354,16 +354,14 @@ __global__ void CubicResize(SrcWrapper src, DstWrapper dst, int2 srcSize, int2 d if (dstCoord.y < dstSize.y && dstCoord.x < dstSize.x) { - float2 srcCoord = (cuda::DropCast<2>(dstCoord) + .5f) * scaleRatio - .5f; - int3 iSrcCoord{(int)floor(srcCoord.x), (int)floor(srcCoord.y), dstCoord.z}; - - float fx = srcCoord.x - iSrcCoord.x; - float fy = srcCoord.y - iSrcCoord.y; + const float2 srcCoord = (cuda::DropCast<2>(dstCoord) + .5f) * scaleRatio - .5f; + int3 baseCoord{(int)floor(srcCoord.x), (int)floor(srcCoord.y), dstCoord.z}; - fx = (iSrcCoord.x < 1 || iSrcCoord.x >= srcSize.x - 3) ? 0 : fx; + const float fx = srcCoord.x - baseCoord.x; + const float fy = srcCoord.y - baseCoord.y; - iSrcCoord.y = cuda::max(1, cuda::min(iSrcCoord.y, srcSize.y - 3)); - iSrcCoord.x = cuda::max(1, cuda::min(iSrcCoord.x, srcSize.x - 3)); + const int xMax = srcSize.x - 1; + const int yMax = srcSize.y - 1; float wx[4]; float wy[4]; @@ -376,10 +374,13 @@ __global__ void CubicResize(SrcWrapper src, DstWrapper dst, int2 srcSize, int2 d #pragma unroll for (int cy = -1; cy <= 2; cy++) { + const int sy = cuda::min(cuda::max(baseCoord.y + cy, 0), yMax); #pragma unroll for (int cx = -1; cx <= 2; cx++) { - sum += src[int3{iSrcCoord.x + cx, iSrcCoord.y + cy, iSrcCoord.z}] * (wx[cx + 1] * wy[cy + 1]); + const int sx = cuda::min(cuda::max(baseCoord.x + cx, 0), xMax); + + sum += src[int3{sx, sy, baseCoord.z}] * (wx[cx + 1] * wy[cy + 1]); } } diff --git a/src/cvcuda/priv/legacy/random_resized_crop.cu b/src/cvcuda/priv/legacy/random_resized_crop.cu index 053fe16c..735997a0 100644 --- a/src/cvcuda/priv/legacy/random_resized_crop.cu +++ b/src/cvcuda/priv/legacy/random_resized_crop.cu @@ -181,7 +181,9 @@ __global__ void resize_cubic_v1(const SrcWrapper src, DstWrapper dst, int2 srcSi float fy = (float)((dst_y + 0.5f) * scale_y - 0.5f + top); int sy = cuda::round(fy); fy -= sy; - sy = cuda::max(1, cuda::min(sy, height - 3)); + const int syClamped = cuda::max(1, cuda::min(sy, height - 3)); + fy += static_cast(sy - syClamped); + sy = syClamped; const float A = -0.75f; @@ -196,8 +198,9 @@ __global__ void resize_cubic_v1(const SrcWrapper src, DstWrapper dst, int2 srcSi float fx = (float)((dst_x + 0.5f) * scale_x - 0.5f + left); int sx = cuda::round(fx); fx -= sx; - fx *= ((sx >= 1) && (sx < width - 3)); - sx = cuda::max(1, cuda::min(sx, width - 3)); + const int sxClamped = cuda::max(1, cuda::min(sx, width - 3)); + fx += static_cast(sx - sxClamped); + sx = sxClamped; float cX[4]; cX[0] = ((A * (fx + 1.0f) - 5.0f * A) * (fx + 1.0f) + 8.0f * A) * (fx + 1.0f) - 4.0f * A; diff --git a/src/cvcuda/priv/legacy/random_resized_crop_var_shape.cu b/src/cvcuda/priv/legacy/random_resized_crop_var_shape.cu index 79f135f4..8ce5fd58 100644 --- a/src/cvcuda/priv/legacy/random_resized_crop_var_shape.cu +++ b/src/cvcuda/priv/legacy/random_resized_crop_var_shape.cu @@ -193,7 +193,9 @@ __global__ void resize_cubic_v1(const SrcWrapper src, DstWrapper dst, const int float fy = (float)((dst_y + 0.5f) * scale_y - 0.5f + top); int sy = cuda::round(fy); fy -= sy; - sy = cuda::max(1, cuda::min(sy, height - 3)); + const int syClamped = cuda::max(1, cuda::min(sy, height - 3)); + fy += static_cast(sy - syClamped); + sy = syClamped; const float A = -0.75f; @@ -208,8 +210,9 @@ __global__ void resize_cubic_v1(const SrcWrapper src, DstWrapper dst, const int float fx = (float)((dst_x + 0.5f) * scale_x - 0.5f + left); int sx = cuda::round(fx); fx -= sx; - fx *= ((sx >= 1) && (sx < width - 3)); - sx = cuda::max(1, cuda::min(sx, width - 3)); + const int sxClamped = cuda::max(1, cuda::min(sx, width - 3)); + fx += static_cast(sx - sxClamped); + sx = sxClamped; float cX[4]; cX[0] = ((A * (fx + 1.0f) - 5.0f * A) * (fx + 1.0f) + 8.0f * A) * (fx + 1.0f) - 4.0f * A; diff --git a/src/cvcuda/priv/legacy/resize_var_shape.cu b/src/cvcuda/priv/legacy/resize_var_shape.cu index e7342bb1..c7561ade 100644 --- a/src/cvcuda/priv/legacy/resize_var_shape.cu +++ b/src/cvcuda/priv/legacy/resize_var_shape.cu @@ -188,7 +188,9 @@ __global__ void resize_bicubic(cuda::ImageBatchVarShapeWrap src, cuda:: float fy = (float)((dst_y + 0.5f) * scale_y - 0.5f); int sy = cuda::round(fy); fy -= sy; - sy = cuda::max(1, cuda::min(sy, height - 3)); + const int syClamped = cuda::max(1, cuda::min(sy, height - 3)); + fy += static_cast(sy - syClamped); // rebase fractional offset after clamp + sy = syClamped; const float A = -0.75f; @@ -203,8 +205,9 @@ __global__ void resize_bicubic(cuda::ImageBatchVarShapeWrap src, cuda:: float fx = (float)((dst_x + 0.5f) * scale_x - 0.5f); int sx = cuda::round(fx); fx -= sx; - fx *= ((sx >= 1) && (sx < width - 3)); - sx = cuda::max(1, cuda::min(sx, width - 3)); + const int sxClamped = cuda::max(1, cuda::min(sx, width - 3)); + fx += static_cast(sx - sxClamped); + sx = sxClamped; float cX[4]; cX[0] = ((A * (fx + 1.0f) - 5.0f * A) * (fx + 1.0f) + 8.0f * A) * (fx + 1.0f) - 4.0f * A;