Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions cuda_rasterizer/auxiliary.h
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,11 @@ __forceinline__ __device__ bool in_frustum(int idx,
if (prefiltered)
{
printf("Point is filtered although prefiltered is set. This shouldn't happen!");
__trap();
#if defined(__HIPCC__)
__builtin_trap();
#else
__trap();
#endif
}
return false;
}
Expand Down Expand Up @@ -255,7 +259,11 @@ __forceinline__ __device__ bool in_frustum_triangle(int idx,
if (prefiltered)
{
printf("Point is filtered although prefiltered is set. This shouldn't happen!");
__trap();
#if defined(__HIPCC__)
__builtin_trap();
#else
__trap();
#endif
}
return false;
}
Expand Down
6 changes: 4 additions & 2 deletions cuda_rasterizer/backward.cu
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@
#include "backward.h"
#include "auxiliary.h"
#include <cooperative_groups.h>
#ifndef __HIPCC__
#include <cooperative_groups/reduce.h>
#endif
namespace cg = cooperative_groups;


Expand Down Expand Up @@ -762,7 +764,7 @@
// Propagate gradients for remaining steps: finish 3D mean gradients,
// propagate color gradients to SH (if desireD), propagate 3D covariance
// matrix gradients to scale and rotation.
preprocessCUDA<NUM_CHANNELS> << < (P + 255) / 256, 256 >> > (
preprocessCUDA<NUM_CHANNELS> <<<(P + 255) / 256, 256>>> (
P, D, M,
triangles_points,
W, H,
Expand Down Expand Up @@ -820,7 +822,7 @@
float* dL_dcolors,
float* dL_dsigma_factor)
{
renderCUDA<NUM_CHANNELS> << <grid, block >> >(
renderCUDA<NUM_CHANNELS><<<grid, block>>>(
ranges,
point_list,
W, H,
Expand Down
2 changes: 2 additions & 0 deletions cuda_rasterizer/backward.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@

#include <cuda.h>
#include "cuda_runtime.h"
#ifndef __HIPCC__
#include "device_launch_parameters.h"
#endif
#define GLM_FORCE_CUDA
#include <glm/glm.hpp>

Expand Down
6 changes: 4 additions & 2 deletions cuda_rasterizer/forward.cu
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@
#include "forward.h"
#include "auxiliary.h"
#include <cooperative_groups.h>
#ifndef __HIPCC__
#include <cooperative_groups/reduce.h>
#endif
namespace cg = cooperative_groups;


Expand Down Expand Up @@ -646,7 +648,7 @@
float* out_others,
float* max_blending)
{
renderCUDA<NUM_CHANNELS> << <grid, block >> > (
renderCUDA<NUM_CHANNELS> <<<grid, block>>> (
ranges,
point_list,
W, H,
Expand Down Expand Up @@ -704,7 +706,7 @@
uint32_t* tiles_touched,
bool prefiltered)
{
preprocessCUDA<NUM_CHANNELS> << <(P + 255) / 256, 256 >> > (
preprocessCUDA<NUM_CHANNELS> <<<(P + 255) / 256, 256>>> (
P, D, M,
triangles_points,
sigma,
Expand Down
2 changes: 2 additions & 0 deletions cuda_rasterizer/forward.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@

#include <cuda.h>
#include "cuda_runtime.h"
#ifndef __HIPCC__
#include "device_launch_parameters.h"
#endif
#define GLM_FORCE_CUDA
#include <glm/glm.hpp>

Expand Down
10 changes: 7 additions & 3 deletions cuda_rasterizer/rasterizer_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,18 @@
#include <numeric>
#include <cuda.h>
#include "cuda_runtime.h"
#ifndef __HIPCC__
#include "device_launch_parameters.h"
#endif
#include <cub/cub.cuh>
#include <cub/device/device_radix_sort.cuh>
#define GLM_FORCE_CUDA
#include <glm/glm.hpp>

#include <cooperative_groups.h>
#ifndef __HIPCC__
#include <cooperative_groups/reduce.h>
#endif
namespace cg = cooperative_groups;

#include "auxiliary.h"
Expand Down Expand Up @@ -156,7 +160,7 @@
float* projmatrix,
bool* present)
{
checkFrustum << <(P + 255) / 256, 256 >> > (
checkFrustum <<<(P + 255) / 256, 256>>> (
P,
means3D,
viewmatrix, projmatrix,
Expand Down Expand Up @@ -320,7 +324,7 @@

// For each instance to be rendered, produce adequate [ tile | depth ] key
// and corresponding dublicated Triangle indices to be sorted
duplicateWithKeys << <(P + 255) / 256, 256 >> > (
duplicateWithKeys<<<(P + 255) / 256, 256>>> (
P,
geomState.means2D,
geomState.depths,
Expand Down Expand Up @@ -349,7 +353,7 @@

// Identify start and end of per-tile workloads in sorted list
if (num_rendered > 0)
identifyTileRanges << <(num_rendered + 255) / 256, 256 >> > (
identifyTileRanges <<<(num_rendered + 255) / 256, 256>>> (
num_rendered,
binningState.point_list_keys,
imgState.ranges);
Expand Down
2 changes: 2 additions & 0 deletions cuda_rasterizer/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@

#include <cuda.h>
#include "cuda_runtime.h"
#ifndef __HIPCC__
#include "device_launch_parameters.h"
#endif

namespace UTILS
{
Expand Down
8 changes: 7 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,14 @@
from setuptools import setup
from torch.utils.cpp_extension import CUDAExtension, BuildExtension
import os
import torch
os.path.dirname(os.path.abspath(__file__))

use_fast_math_flag = "--use_fast_math"
if torch.version.hip is not None:
# hipcc (clang) does not recognize --use_fast_math; use clang's flag
use_fast_math_flag = "-ffast-math"

setup(
name="diff_triangle_rasterization",
packages=['diff_triangle_rasterization'],
Expand All @@ -38,7 +44,7 @@
"cuda_rasterizer/utils.cu",
"rasterize_points.cu",
"ext.cpp"],
extra_compile_args={"nvcc": ["-I" + os.path.join(os.path.dirname(os.path.abspath(__file__)), "third_party/glm/"), "--use_fast_math"]})
extra_compile_args={"nvcc": ["-I" + os.path.join(os.path.dirname(os.path.abspath(__file__)), "third_party/glm/"), use_fast_math_flag]})
],
cmdclass={
'build_ext': BuildExtension
Expand Down