diff --git a/img/antialias_cornell_avocado_1.png b/img/antialias_cornell_avocado_1.png deleted file mode 100644 index 35e248b5..00000000 Binary files a/img/antialias_cornell_avocado_1.png and /dev/null differ diff --git a/scenes/avocado_cornell.txt b/scenes/avocado_cornell.txt index eb764e55..62f62923 100644 --- a/scenes/avocado_cornell.txt +++ b/scenes/avocado_cornell.txt @@ -72,7 +72,7 @@ EMITTANCE 0 CAMERA RES 800 800 FOVY 45 -ITERATIONS 2000 +ITERATIONS 20 DEPTH 8 FILE cornell EYE 0.0 5 4.9 diff --git a/scenes/metal.txt b/scenes/metal.txt index 8874ed6f..19c8befb 100644 --- a/scenes/metal.txt +++ b/scenes/metal.txt @@ -63,7 +63,7 @@ EMITTANCE 10 CAMERA RES 960 720 FOVY 40 -ITERATIONS 2000 +ITERATIONS 20 DEPTH 8 FILE cornell EYE 0.0 5 9 diff --git a/scenes/motorcycle.txt b/scenes/motorcycle.txt index b5c1420d..e8f928e8 100644 --- a/scenes/motorcycle.txt +++ b/scenes/motorcycle.txt @@ -63,7 +63,7 @@ EMITTANCE 10 CAMERA RES 960 720 FOVY 40 -ITERATIONS 2000 +ITERATIONS 500 DEPTH 8 FILE cornell EYE 0.0 5 9 diff --git a/src/main.cpp b/src/main.cpp index 7c2235c1..fd855f64 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -33,6 +33,8 @@ using namespace scene_structs; double totalIterTime = 0; +std::chrono::system_clock::time_point pathtraceStart; + //------------------------------- //-------------MAIN-------------- //------------------------------- @@ -95,7 +97,11 @@ int main(int argc, char** argv) { } void saveImage() { +#if DENOISE + float samples = 1.f; +#else float samples = iteration; +#endif // output image file image img(width, height); @@ -143,13 +149,17 @@ void runCuda() { if (iteration == 0) { pathtraceFree(); + + pathtraceStart = std::chrono::system_clock::now(); + pathtraceInit(scene); } + uchar4* pbo_dptr = NULL; + cudaGLMapBufferObject((void**)&pbo_dptr, pbo); + if (iteration < renderState->iterations) { - uchar4* pbo_dptr = NULL; iteration++; - cudaGLMapBufferObject((void**)&pbo_dptr, pbo); // execute the kernel int frame = 0; @@ -163,17 +173,34 @@ void runCuda() { #else pathtrace(pbo_dptr, frame, iteration); #endif - - // unmap buffer object - cudaGLUnmapBufferObject(pbo); } else { + +#if DENOISE + int filterSize = 80; + float colorWeight = 2; + float normalWeight = 0.12; + float positionWeight = 0.5; + + auto denoiseStart = std::chrono::system_clock::now(); + + denoiseAndWriteToPbo(pbo_dptr, iteration, filterSize, colorWeight, normalWeight, positionWeight); + + auto end = std::chrono::system_clock::now(); + + std::cout << "Total pathtrace run-time: " << ((std::chrono::duration) (end - pathtraceStart)).count() << std::endl; + std::cout << "Total denoise run-time: " << ((std::chrono::duration) (end - denoiseStart)).count() << std::endl; +#endif + saveImage(); pathtraceFree(); cudaDeviceReset(); - exit(EXIT_SUCCESS); + //exit(EXIT_SUCCESS); } + // unmap buffer object + cudaGLUnmapBufferObject(pbo); + #if MEASURE_PERF std::cout << "Total iter time " << totalIterTime << std::endl; #endif diff --git a/src/pathtrace.cu b/src/pathtrace.cu index 4580e771..70c5fca7 100644 --- a/src/pathtrace.cu +++ b/src/pathtrace.cu @@ -6,6 +6,7 @@ #include #include #include +#include #include "sceneStructs.h" #include "scene.h" @@ -105,12 +106,54 @@ static Triangle* dev_triangles; #if BVH static BvhNode* dev_bvh; #endif +static glm::vec3* dev_image_denoised_in = NULL; // ping pong +static glm::vec3* dev_image_denoised_out = NULL; +static glm::ivec2* dev_offset = NULL; +static float* dev_kernel = NULL; +static GBufferPixel* dev_gBuffer = NULL; void InitDataContainer(GuiDataContainer* imGuiData) { guiData = imGuiData; } +void denoiseInit() { + int pixelcount = hst_scene->state.camera.resolution.x * hst_scene->state.camera.resolution.y; + cudaMalloc(&dev_image_denoised_in, pixelcount * sizeof(glm::vec3)); + cudaMemset(dev_image_denoised_in, 0, pixelcount * sizeof(glm::vec3)); + + cudaMalloc(&dev_image_denoised_out, pixelcount * sizeof(glm::vec3)); + cudaMemset(dev_image_denoised_out, 0, pixelcount * sizeof(glm::vec3)); + + cudaMalloc(&dev_offset, 25 * sizeof(glm::ivec2)); + glm::ivec2 offset[25]; + for (int i = 0, int y = 0; y < 5; ++y) { // read array from left to right, top to bottom + for (int x = 0; x < 5; ++x) { + offset[i++] = glm::ivec2(x - 2, y - 2); + } + } + cudaMemcpy(dev_offset, offset, 25 * sizeof(glm::ivec2), cudaMemcpyHostToDevice); + + cudaMalloc(&dev_kernel, 25 * sizeof(float)); + float kernel[25] = + { 1.f / 256, 1.f / 64, 3.f / 128, 1.f / 64, 1.f / 256, + 1.f / 64, 1.f / 16, 3.f / 32, 1.f / 16, 1.f / 64, + 3.f / 128, 3.f / 32, 9.f / 64, 3.f / 32, 3.f / 128, + 1.f / 64, 1.f / 16, 3.f / 32, 1.f / 16, 1.f / 64, + 1.f / 256, 1.f / 64, 3.f / 128, 1.f / 64, 1.f / 256 }; + cudaMemcpy(dev_kernel, kernel, 25 * sizeof(float), cudaMemcpyHostToDevice); + + checkCUDAError("denoiseInit"); +} + +void denoiseFree() { + cudaFree(dev_image_denoised_in); + cudaFree(dev_image_denoised_out); + cudaFree(dev_kernel); + cudaFree(dev_offset); + checkCUDAError("denoiseFree"); +} + void pathtraceInit(Scene* scene) { hst_scene = scene; @@ -180,6 +223,8 @@ void pathtraceInit(Scene* scene) { checkCUDAError("cudaMemcpy of dev_bvh"); #endif + cudaMalloc(&dev_gBuffer, pixelcount * sizeof(GBufferPixel)); + checkCUDAError("pathtraceInit"); } @@ -340,6 +385,28 @@ __device__ glm::vec3 getTextureColor(const DevImage& image, glm::vec3 *imageBuff //return glm::vec3(uv[0], uv[1], 0.5); } +__global__ void generateGBuffer ( + int num_paths, + ShadeableIntersection* shadeableIntersections, + PathSegment* pathSegments, + GBufferPixel* gBuffer) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < num_paths) + { + auto& intersect = shadeableIntersections[idx]; + gBuffer[idx].normal = intersect.surfaceNormal; + + if (intersect.t < 0) { + // Position doesn't matter too much since the colour is black anyway + gBuffer[idx].position = glm::vec3(0); + } + else { + auto& ray = pathSegments[idx].ray; + gBuffer[idx].position = ray.origin + ray.direction * intersect.t; + } + } +} + __global__ void shadeMaterial( int iter , int num_paths @@ -573,6 +640,19 @@ void pathtrace(uchar4* pbo, int frame, int iter) { #endif cudaDeviceSynchronize(); +#if DENOISE + if (depth == 0 && iter == 1) { + + auto start = std::chrono::system_clock::now(); + + generateGBuffer << > > (pixelcount, dev_intersections, dev_paths, dev_gBuffer); + cudaDeviceSynchronize(); + + auto end = std::chrono::system_clock::now(); + std::cout << "Total gbuffer run-time: " << ((std::chrono::duration) (end - start)).count() << std::endl; + } +#endif + depth++; // TODO: @@ -625,3 +705,135 @@ void pathtrace(uchar4* pbo, int frame, int iter) { checkCUDAError("pathtrace"); } + +void showImage(uchar4* pbo, int iter) { + const Camera& cam = hst_scene->state.camera; + const dim3 blockSize2d(8, 8); + const dim3 blocksPerGrid2d( + (cam.resolution.x + blockSize2d.x - 1) / blockSize2d.x, + (cam.resolution.y + blockSize2d.y - 1) / blockSize2d.y); + + // Send results to OpenGL buffer for rendering + sendImageToPBO << > > (pbo, cam.resolution, iter, dev_image); +} + +__global__ void kernInitDenoiseBuffer(glm::vec3* image, glm::ivec2 resolution, float pathtraceIter, glm::vec3* image_denoised) { + int x = (blockIdx.x * blockDim.x) + threadIdx.x; + int y = (blockIdx.y * blockDim.y) + threadIdx.y; + if (!(x < resolution.x && y < resolution.y)) { + return; + } + int index = x + (y * resolution.x); + image_denoised[index] = image[index] / pathtraceIter; +} + +__device__ float getWeight(glm::vec3 v1, glm::vec3 v2, float sigma) { + glm::vec3 t = v1 - v2; + float dist_squared = glm::max(glm::dot(t, t), 0.0f); + return glm::min(exp(-dist_squared / (sigma * sigma)), 1.0f); +} + +__global__ void kernDenoise( + glm::ivec2 resolution, + GBufferPixel* gBuffer, + int stepWidth, + float* kernel, + glm::ivec2* offset, + float colorWeight, + float normalWeight, + float positionWeight, + glm::vec3* image_denoised_in, + glm::vec3* image_denoised_out +) { + int x = (blockIdx.x * blockDim.x) + threadIdx.x; + int y = (blockIdx.y * blockDim.y) + threadIdx.y; + + if (x >= resolution.x || y >= resolution.y) { + return; + } + + int index = x + (y * resolution.x); + + auto& color = image_denoised_in[index]; + auto& position = gBuffer[index].position; + auto& normal = gBuffer[index].normal; + + float cum_w = 0.0f; + glm::vec3 sum(0.f); + + for (int i = 0; i < 25; ++i) { + glm::ivec2 neighbourIdx = glm::ivec2(x, y) + offset[i] * stepWidth; + + if (neighbourIdx.x >= 0 && neighbourIdx.x < resolution.x + && neighbourIdx.y >= 0 && neighbourIdx.y < resolution.y) { + + int n = neighbourIdx.x + (neighbourIdx.y * resolution.x); + + auto& neighbourColor = image_denoised_in[n]; + auto& neighbourPos = gBuffer[n].position; + auto& neighbourNorm = gBuffer[n].normal; + + float c_w = getWeight(color, neighbourColor, colorWeight); + float p_w = getWeight(position, neighbourPos, positionWeight); + float n_w = getWeight(normal, neighbourNorm, normalWeight); + + float weight = c_w * n_w * p_w; + //weight = 1; + sum += kernel[i] * weight * neighbourColor; + cum_w += kernel[i] * weight; + } + } + + image_denoised_out[index] = sum / cum_w; +} + +void denoiseAndWriteToPbo( + uchar4* pbo, + int pathtraceIter, + int filterSize, + float colorWeight, + float normalWeight, + float positionWeight +) { + denoiseInit(); + + const Camera& cam = hst_scene->state.camera; + const dim3 blockSize2d(8, 8); + const dim3 blocksPerGrid2d( + (cam.resolution.x + blockSize2d.x - 1) / blockSize2d.x, + (cam.resolution.y + blockSize2d.y - 1) / blockSize2d.y); + + kernInitDenoiseBuffer << > > (dev_image, cam.resolution, pathtraceIter, dev_image_denoised_in); + + // filter size is size of window on the last iteration + int numDenoiseIters = glm::log2(filterSize / 5); + int stepWidth = 1; + + for (int i = 0; i < numDenoiseIters; ++i) { + kernDenoise << > > ( + cam.resolution, + dev_gBuffer, + stepWidth, + dev_kernel, + dev_offset, + colorWeight, + normalWeight, + positionWeight, + dev_image_denoised_in, + dev_image_denoised_out); + + // filter doubles every iter + stepWidth = stepWidth << 2; + // At each pass we set sigma rt = 2^{-i} * sigma_rt + // allowing for smaller illumination variations to be smoothed + colorWeight = colorWeight / stepWidth; + + std::swap(dev_image_denoised_in, dev_image_denoised_out); // most updated version is _in now + } + sendImageToPBO << > > (pbo, cam.resolution, 1, dev_image_denoised_in); + + cudaMemcpy(hst_scene->state.image.data(), dev_image_denoised_in, + cam.resolution.x * cam.resolution.y * sizeof(glm::vec3), cudaMemcpyDeviceToHost); + + denoiseFree(); +} diff --git a/src/pathtrace.h b/src/pathtrace.h index e767d0ef..af0b0439 100644 --- a/src/pathtrace.h +++ b/src/pathtrace.h @@ -7,3 +7,11 @@ void InitDataContainer(GuiDataContainer* guiData); void pathtraceInit(Scene *scene); void pathtraceFree(); void pathtrace(uchar4 *pbo, int frame, int iteration); +void denoiseAndWriteToPbo( + uchar4* pbo, + int pathtraceIter, + int filterSize, + float colorWeight, + float normalWeight, + float positionWeight +); diff --git a/src/sceneStructs.h b/src/sceneStructs.h index f78a16e2..3a5f1a90 100644 --- a/src/sceneStructs.h +++ b/src/sceneStructs.h @@ -11,9 +11,10 @@ #define BVH 1 #define ROUGHNESS_METALLIC 1 #define SORT_BY_MATERIALS 0 -// turn on at most ONE of first bounce caching and anti-aliasing +// turn on at most ONE of first bounce caching and anti-aliasing and denoising #define CACHE_FIRST_BOUNCE 0 -#define ANTI_ALIAS 1 +#define ANTI_ALIAS 0 +#define DENOISE 1 // for debugging #define SHOW_NORMALS 0 #define SHOW_METALLIC 0 @@ -134,4 +135,9 @@ struct ShadeableIntersection { int materialId; }; +struct GBufferPixel { + glm::vec3 normal; + glm::vec3 position; // todo: store t value instead and reconstruct position based on camera +}; + } \ No newline at end of file