Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file removed img/antialias_cornell_avocado_1.png
Binary file not shown.
2 changes: 1 addition & 1 deletion scenes/avocado_cornell.txt
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ EMITTANCE 0
CAMERA
RES 800 800
FOVY 45
ITERATIONS 2000
ITERATIONS 20
DEPTH 8
FILE cornell
EYE 0.0 5 4.9
Expand Down
2 changes: 1 addition & 1 deletion scenes/metal.txt
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ EMITTANCE 10
CAMERA
RES 960 720
FOVY 40
ITERATIONS 2000
ITERATIONS 20
DEPTH 8
FILE cornell
EYE 0.0 5 9
Expand Down
2 changes: 1 addition & 1 deletion scenes/motorcycle.txt
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ EMITTANCE 10
CAMERA
RES 960 720
FOVY 40
ITERATIONS 2000
ITERATIONS 500
DEPTH 8
FILE cornell
EYE 0.0 5 9
Expand Down
39 changes: 33 additions & 6 deletions src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ using namespace scene_structs;

double totalIterTime = 0;

std::chrono::system_clock::time_point pathtraceStart;

//-------------------------------
//-------------MAIN--------------
//-------------------------------
Expand Down Expand Up @@ -95,7 +97,11 @@ int main(int argc, char** argv) {
}

void saveImage() {
#if DENOISE
float samples = 1.f;
#else
float samples = iteration;
#endif
// output image file
image img(width, height);

Expand Down Expand Up @@ -143,13 +149,17 @@ void runCuda() {

if (iteration == 0) {
pathtraceFree();

pathtraceStart = std::chrono::system_clock::now();

pathtraceInit(scene);
}

uchar4* pbo_dptr = NULL;
cudaGLMapBufferObject((void**)&pbo_dptr, pbo);

if (iteration < renderState->iterations) {
uchar4* pbo_dptr = NULL;
iteration++;
cudaGLMapBufferObject((void**)&pbo_dptr, pbo);

// execute the kernel
int frame = 0;
Expand All @@ -163,17 +173,34 @@ void runCuda() {
#else
pathtrace(pbo_dptr, frame, iteration);
#endif

// unmap buffer object
cudaGLUnmapBufferObject(pbo);
}
else {

#if DENOISE
int filterSize = 80;
float colorWeight = 2;
float normalWeight = 0.12;
float positionWeight = 0.5;

auto denoiseStart = std::chrono::system_clock::now();

denoiseAndWriteToPbo(pbo_dptr, iteration, filterSize, colorWeight, normalWeight, positionWeight);

auto end = std::chrono::system_clock::now();

std::cout << "Total pathtrace run-time: " << ((std::chrono::duration<double>) (end - pathtraceStart)).count() << std::endl;
std::cout << "Total denoise run-time: " << ((std::chrono::duration<double>) (end - denoiseStart)).count() << std::endl;
#endif

saveImage();
pathtraceFree();
cudaDeviceReset();
exit(EXIT_SUCCESS);
//exit(EXIT_SUCCESS);
}

// unmap buffer object
cudaGLUnmapBufferObject(pbo);

#if MEASURE_PERF
std::cout << "Total iter time " << totalIterTime << std::endl;
#endif
Expand Down
212 changes: 212 additions & 0 deletions src/pathtrace.cu
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include <thrust/remove.h>
#include <thrust/partition.h>
#include <thrust/device_vector.h>
#include <chrono>

#include "sceneStructs.h"
#include "scene.h"
Expand Down Expand Up @@ -105,12 +106,54 @@ static Triangle* dev_triangles;
#if BVH
static BvhNode* dev_bvh;
#endif
static glm::vec3* dev_image_denoised_in = NULL; // ping pong
static glm::vec3* dev_image_denoised_out = NULL;
static glm::ivec2* dev_offset = NULL;
static float* dev_kernel = NULL;
static GBufferPixel* dev_gBuffer = NULL;

void InitDataContainer(GuiDataContainer* imGuiData)
{
guiData = imGuiData;
}

void denoiseInit() {
int pixelcount = hst_scene->state.camera.resolution.x * hst_scene->state.camera.resolution.y;
cudaMalloc(&dev_image_denoised_in, pixelcount * sizeof(glm::vec3));
cudaMemset(dev_image_denoised_in, 0, pixelcount * sizeof(glm::vec3));

cudaMalloc(&dev_image_denoised_out, pixelcount * sizeof(glm::vec3));
cudaMemset(dev_image_denoised_out, 0, pixelcount * sizeof(glm::vec3));

cudaMalloc(&dev_offset, 25 * sizeof(glm::ivec2));
glm::ivec2 offset[25];
for (int i = 0, int y = 0; y < 5; ++y) { // read array from left to right, top to bottom
for (int x = 0; x < 5; ++x) {
offset[i++] = glm::ivec2(x - 2, y - 2);
}
}
cudaMemcpy(dev_offset, offset, 25 * sizeof(glm::ivec2), cudaMemcpyHostToDevice);

cudaMalloc(&dev_kernel, 25 * sizeof(float));
float kernel[25] =
{ 1.f / 256, 1.f / 64, 3.f / 128, 1.f / 64, 1.f / 256,
1.f / 64, 1.f / 16, 3.f / 32, 1.f / 16, 1.f / 64,
3.f / 128, 3.f / 32, 9.f / 64, 3.f / 32, 3.f / 128,
1.f / 64, 1.f / 16, 3.f / 32, 1.f / 16, 1.f / 64,
1.f / 256, 1.f / 64, 3.f / 128, 1.f / 64, 1.f / 256 };
cudaMemcpy(dev_kernel, kernel, 25 * sizeof(float), cudaMemcpyHostToDevice);

checkCUDAError("denoiseInit");
}

void denoiseFree() {
cudaFree(dev_image_denoised_in);
cudaFree(dev_image_denoised_out);
cudaFree(dev_kernel);
cudaFree(dev_offset);
checkCUDAError("denoiseFree");
}

void pathtraceInit(Scene* scene) {
hst_scene = scene;

Expand Down Expand Up @@ -180,6 +223,8 @@ void pathtraceInit(Scene* scene) {
checkCUDAError("cudaMemcpy of dev_bvh");
#endif

cudaMalloc(&dev_gBuffer, pixelcount * sizeof(GBufferPixel));

checkCUDAError("pathtraceInit");
}

Expand Down Expand Up @@ -340,6 +385,28 @@ __device__ glm::vec3 getTextureColor(const DevImage& image, glm::vec3 *imageBuff
//return glm::vec3(uv[0], uv[1], 0.5);
}

__global__ void generateGBuffer (
int num_paths,
ShadeableIntersection* shadeableIntersections,
PathSegment* pathSegments,
GBufferPixel* gBuffer) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < num_paths)
{
auto& intersect = shadeableIntersections[idx];
gBuffer[idx].normal = intersect.surfaceNormal;

if (intersect.t < 0) {
// Position doesn't matter too much since the colour is black anyway
gBuffer[idx].position = glm::vec3(0);
}
else {
auto& ray = pathSegments[idx].ray;
gBuffer[idx].position = ray.origin + ray.direction * intersect.t;
}
}
}

__global__ void shadeMaterial(
int iter
, int num_paths
Expand Down Expand Up @@ -573,6 +640,19 @@ void pathtrace(uchar4* pbo, int frame, int iter) {
#endif

cudaDeviceSynchronize();
#if DENOISE
if (depth == 0 && iter == 1) {

auto start = std::chrono::system_clock::now();

generateGBuffer << <numblocksPathSegmentTracing, blockSize1d >> > (pixelcount, dev_intersections, dev_paths, dev_gBuffer);
cudaDeviceSynchronize();

auto end = std::chrono::system_clock::now();
std::cout << "Total gbuffer run-time: " << ((std::chrono::duration<double>) (end - start)).count() << std::endl;
}
#endif

depth++;

// TODO:
Expand Down Expand Up @@ -625,3 +705,135 @@ void pathtrace(uchar4* pbo, int frame, int iter) {

checkCUDAError("pathtrace");
}

void showImage(uchar4* pbo, int iter) {
const Camera& cam = hst_scene->state.camera;
const dim3 blockSize2d(8, 8);
const dim3 blocksPerGrid2d(
(cam.resolution.x + blockSize2d.x - 1) / blockSize2d.x,
(cam.resolution.y + blockSize2d.y - 1) / blockSize2d.y);

// Send results to OpenGL buffer for rendering
sendImageToPBO << <blocksPerGrid2d, blockSize2d >> > (pbo, cam.resolution, iter, dev_image);
}

__global__ void kernInitDenoiseBuffer(glm::vec3* image, glm::ivec2 resolution, float pathtraceIter, glm::vec3* image_denoised) {
int x = (blockIdx.x * blockDim.x) + threadIdx.x;
int y = (blockIdx.y * blockDim.y) + threadIdx.y;
if (!(x < resolution.x && y < resolution.y)) {
return;
}
int index = x + (y * resolution.x);
image_denoised[index] = image[index] / pathtraceIter;
}

__device__ float getWeight(glm::vec3 v1, glm::vec3 v2, float sigma) {
glm::vec3 t = v1 - v2;
float dist_squared = glm::max(glm::dot(t, t), 0.0f);
return glm::min(exp(-dist_squared / (sigma * sigma)), 1.0f);
}

__global__ void kernDenoise(
glm::ivec2 resolution,
GBufferPixel* gBuffer,
int stepWidth,
float* kernel,
glm::ivec2* offset,
float colorWeight,
float normalWeight,
float positionWeight,
glm::vec3* image_denoised_in,
glm::vec3* image_denoised_out
) {
int x = (blockIdx.x * blockDim.x) + threadIdx.x;
int y = (blockIdx.y * blockDim.y) + threadIdx.y;

if (x >= resolution.x || y >= resolution.y) {
return;
}

int index = x + (y * resolution.x);

auto& color = image_denoised_in[index];
auto& position = gBuffer[index].position;
auto& normal = gBuffer[index].normal;

float cum_w = 0.0f;
glm::vec3 sum(0.f);

for (int i = 0; i < 25; ++i) {
glm::ivec2 neighbourIdx = glm::ivec2(x, y) + offset[i] * stepWidth;

if (neighbourIdx.x >= 0 && neighbourIdx.x < resolution.x
&& neighbourIdx.y >= 0 && neighbourIdx.y < resolution.y) {

int n = neighbourIdx.x + (neighbourIdx.y * resolution.x);

auto& neighbourColor = image_denoised_in[n];
auto& neighbourPos = gBuffer[n].position;
auto& neighbourNorm = gBuffer[n].normal;

float c_w = getWeight(color, neighbourColor, colorWeight);
float p_w = getWeight(position, neighbourPos, positionWeight);
float n_w = getWeight(normal, neighbourNorm, normalWeight);

float weight = c_w * n_w * p_w;
//weight = 1;
sum += kernel[i] * weight * neighbourColor;
cum_w += kernel[i] * weight;
}
}

image_denoised_out[index] = sum / cum_w;
}

void denoiseAndWriteToPbo(
uchar4* pbo,
int pathtraceIter,
int filterSize,
float colorWeight,
float normalWeight,
float positionWeight
) {
denoiseInit();

const Camera& cam = hst_scene->state.camera;
const dim3 blockSize2d(8, 8);
const dim3 blocksPerGrid2d(
(cam.resolution.x + blockSize2d.x - 1) / blockSize2d.x,
(cam.resolution.y + blockSize2d.y - 1) / blockSize2d.y);

kernInitDenoiseBuffer << <blocksPerGrid2d, blockSize2d >> > (dev_image, cam.resolution, pathtraceIter, dev_image_denoised_in);

// filter size is size of window on the last iteration
int numDenoiseIters = glm::log2(filterSize / 5);
int stepWidth = 1;

for (int i = 0; i < numDenoiseIters; ++i) {
kernDenoise << <blocksPerGrid2d, blockSize2d >> > (
cam.resolution,
dev_gBuffer,
stepWidth,
dev_kernel,
dev_offset,
colorWeight,
normalWeight,
positionWeight,
dev_image_denoised_in,
dev_image_denoised_out);

// filter doubles every iter
stepWidth = stepWidth << 2;
// At each pass we set sigma rt = 2^{-i} * sigma_rt
// allowing for smaller illumination variations to be smoothed
colorWeight = colorWeight / stepWidth;

std::swap(dev_image_denoised_in, dev_image_denoised_out); // most updated version is _in now
}
sendImageToPBO << <blocksPerGrid2d, blockSize2d >> > (pbo, cam.resolution, 1, dev_image_denoised_in);

cudaMemcpy(hst_scene->state.image.data(), dev_image_denoised_in,
cam.resolution.x * cam.resolution.y * sizeof(glm::vec3), cudaMemcpyDeviceToHost);

denoiseFree();
}
8 changes: 8 additions & 0 deletions src/pathtrace.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,11 @@ void InitDataContainer(GuiDataContainer* guiData);
void pathtraceInit(Scene *scene);
void pathtraceFree();
void pathtrace(uchar4 *pbo, int frame, int iteration);
void denoiseAndWriteToPbo(
uchar4* pbo,
int pathtraceIter,
int filterSize,
float colorWeight,
float normalWeight,
float positionWeight
);
10 changes: 8 additions & 2 deletions src/sceneStructs.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,10 @@
#define BVH 1
#define ROUGHNESS_METALLIC 1
#define SORT_BY_MATERIALS 0
// turn on at most ONE of first bounce caching and anti-aliasing
// turn on at most ONE of first bounce caching and anti-aliasing and denoising
#define CACHE_FIRST_BOUNCE 0
#define ANTI_ALIAS 1
#define ANTI_ALIAS 0
#define DENOISE 1
// for debugging
#define SHOW_NORMALS 0
#define SHOW_METALLIC 0
Expand Down Expand Up @@ -134,4 +135,9 @@ struct ShadeableIntersection {
int materialId;
};

struct GBufferPixel {
glm::vec3 normal;
glm::vec3 position; // todo: store t value instead and reconstruct position based on camera
};

}