Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions cuda_rasterizer/auxiliary.h
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ __forceinline__ __device__ bool in_frustum(int idx,
if (prefiltered)
{
printf("Point is filtered although prefiltered is set. This shouldn't happen!");
__trap();
__builtin_trap();
}
return false;
}
Expand Down Expand Up @@ -255,7 +255,7 @@ __forceinline__ __device__ bool in_frustum_triangle(int idx,
if (prefiltered)
{
printf("Point is filtered although prefiltered is set. This shouldn't happen!");
__trap();
__builtin_trap();
}
return false;
}
Expand Down
7 changes: 3 additions & 4 deletions cuda_rasterizer/backward.cu
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,7 @@
#include "backward.h"
#include "auxiliary.h"
#include <cooperative_groups.h>
#include <cooperative_groups/reduce.h>
namespace cg = cooperative_groups;
namespace cg = cooperative_groups;



Expand Down Expand Up @@ -762,7 +761,7 @@
// Propagate gradients for remaining steps: finish 3D mean gradients,
// propagate color gradients to SH (if desireD), propagate 3D covariance
// matrix gradients to scale and rotation.
preprocessCUDA<NUM_CHANNELS> << < (P + 255) / 256, 256 >> > (
preprocessCUDA<NUM_CHANNELS> <<< (P + 255) / 256, 256 >>> (
P, D, M,
triangles_points,
W, H,
Expand Down Expand Up @@ -820,7 +819,7 @@
float* dL_dcolors,
float* dL_dsigma_factor)
{
renderCUDA<NUM_CHANNELS> << <grid, block >> >(
renderCUDA<NUM_CHANNELS> <<<grid, block >>>(
ranges,
point_list,
W, H,
Expand Down
3 changes: 1 addition & 2 deletions cuda_rasterizer/backward.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,7 @@

#include <cuda.h>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#define GLM_FORCE_CUDA
#define GLM_FORCE_CUDA
#include <glm/glm.hpp>

namespace BACKWARD
Expand Down
7 changes: 3 additions & 4 deletions cuda_rasterizer/forward.cu
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,7 @@
#include "forward.h"
#include "auxiliary.h"
#include <cooperative_groups.h>
#include <cooperative_groups/reduce.h>
namespace cg = cooperative_groups;
namespace cg = cooperative_groups;



Expand Down Expand Up @@ -646,7 +645,7 @@
float* out_others,
float* max_blending)
{
renderCUDA<NUM_CHANNELS> << <grid, block >> > (
renderCUDA<NUM_CHANNELS> <<<grid, block >>> (
ranges,
point_list,
W, H,
Expand Down Expand Up @@ -704,7 +703,7 @@
uint32_t* tiles_touched,
bool prefiltered)
{
preprocessCUDA<NUM_CHANNELS> << <(P + 255) / 256, 256 >> > (
preprocessCUDA<NUM_CHANNELS> <<<(P + 255) / 256, 256 >>> (
P, D, M,
triangles_points,
sigma,
Expand Down
3 changes: 1 addition & 2 deletions cuda_rasterizer/forward.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,7 @@

#include <cuda.h>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#define GLM_FORCE_CUDA
#define GLM_FORCE_CUDA
#include <glm/glm.hpp>

namespace FORWARD
Expand Down
12 changes: 5 additions & 7 deletions cuda_rasterizer/rasterizer_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,13 @@
#include <numeric>
#include <cuda.h>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <cub/cub.cuh>
#include <cub/cub.cuh>
#include <cub/device/device_radix_sort.cuh>
#define GLM_FORCE_CUDA
#include <glm/glm.hpp>

#include <cooperative_groups.h>
#include <cooperative_groups/reduce.h>
namespace cg = cooperative_groups;
namespace cg = cooperative_groups;

#include "auxiliary.h"
#include "forward.h"
Expand Down Expand Up @@ -156,7 +154,7 @@
float* projmatrix,
bool* present)
{
checkFrustum << <(P + 255) / 256, 256 >> > (
checkFrustum <<<(P + 255) / 256, 256 >>> (
P,
means3D,
viewmatrix, projmatrix,
Expand Down Expand Up @@ -320,7 +318,7 @@

// For each instance to be rendered, produce adequate [ tile | depth ] key
// and corresponding dublicated Triangle indices to be sorted
duplicateWithKeys << <(P + 255) / 256, 256 >> > (
duplicateWithKeys <<<(P + 255) / 256, 256 >>> (
P,
geomState.means2D,
geomState.depths,
Expand Down Expand Up @@ -349,7 +347,7 @@

// Identify start and end of per-tile workloads in sorted list
if (num_rendered > 0)
identifyTileRanges << <(num_rendered + 255) / 256, 256 >> > (
identifyTileRanges <<<(num_rendered + 255) / 256, 256 >>> (
num_rendered,
binningState.point_list_keys,
imgState.ranges);
Expand Down
2 changes: 1 addition & 1 deletion cuda_rasterizer/rasterizer_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
template <typename T>
static void obtain(char*& chunk, T*& ptr, std::size_t count, std::size_t alignment)
{
std::size_t offset = (reinterpret_cast<std::uintptr_t>(chunk) + alignment - 1) & ~(alignment - 1);
std::size_t offset = (reinterpret_cast<uintptr_t>(chunk) + alignment - 1) & ~(alignment - 1);
ptr = reinterpret_cast<T*>(offset);
chunk = reinterpret_cast<char*>(ptr + count);
}
Expand Down
1 change: 0 additions & 1 deletion cuda_rasterizer/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

#include <cuda.h>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"

namespace UTILS
{
Expand Down
20 changes: 16 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,22 @@
# For inquiries contact jan.held@uliege.be
#

from pathlib import Path

import torch
from setuptools import setup
from torch.utils.cpp_extension import CUDAExtension, BuildExtension
import os
os.path.dirname(os.path.abspath(__file__))
from torch.utils.cpp_extension import BuildExtension, CUDAExtension

ROOT = Path(__file__).resolve().parent
GLM_INCLUDE = ROOT / "third_party" / "glm"
IS_ROCM = bool(getattr(torch.version, "hip", None))


def _nvcc_flags():
flags = [f"-I{GLM_INCLUDE}"]
if not IS_ROCM:
flags.append("--use_fast_math")
return flags

setup(
name="diff_triangle_rasterization",
Expand All @@ -38,7 +50,7 @@
"cuda_rasterizer/utils.cu",
"rasterize_points.cu",
"ext.cpp"],
extra_compile_args={"nvcc": ["-I" + os.path.join(os.path.dirname(os.path.abspath(__file__)), "third_party/glm/"), "--use_fast_math"]})
extra_compile_args={"nvcc": _nvcc_flags()})
],
cmdclass={
'build_ext': BuildExtension
Expand Down