diff --git a/README.md b/README.md index 110697c..120b56a 100644 --- a/README.md +++ b/README.md @@ -3,11 +3,44 @@ CUDA Path Tracer **University of Pennsylvania, CIS 565: GPU Programming and Architecture, Project 3** -* (TODO) YOUR NAME HERE -* Tested on: (TODO) Windows 22, i7-2222 @ 2.22GHz 22GB, GTX 222 222MB (Moore 2222 Lab) +* Fengkai Wu +* Tested on: Windows 7, i7-6700 @ 3.40GHz 16GB, Quadro K620 4095MB (Moore 100C Lab) -### (TODO: Your README) +## Results +![result_img](https://github.com/wufk/Project3-CUDA-Path-Tracer/blob/master/img/final.PNG) -*DO NOT* leave the README to the last minute! It is a crucial part of the -project, and we will not be able to grade you without a good README. +Features: + +1. Shading with BSDF evaluation + +2. Path termination using stream compaction + +3. Toggeable option to cache first bounce and sort path segments by materials + +4. Refraction with frenesel effects using Shilick's approximation + +5. Stochastic antialiasing + +## Analysis + +### Antialiasing + +Before: +![nojitter_img](https://github.com/wufk/Project3-CUDA-Path-Tracer/blob/master/img/nonjitter.PNG) + +After jittering: +![jitter_img](https://github.com/wufk/Project3-CUDA-Path-Tracer/blob/master/img/jitter.PNG) + +By adding a uniform random value to the ray, the aliasing effect is removed. As you can see from the picture, the edges of the cube and the wall is smoothened. + +### Sorting materials +![sort_img](https://github.com/wufk/Project3-CUDA-Path-Tracer/blob/master/img/sort.PNG) + +The sorting is on ray/path arrays with respect to their materials. It is performed right after computing intersections. However it increase the running time primialy due to this addition operation. Making ray/paths contiguous in memory sorting by material does seem to be a good choice. The reason might due to that each path is independent and the kernel does not access each pixel by material type. + +### Caching first bounce + +![cache_img](https://github.com/wufk/Project3-CUDA-Path-Tracer/blob/master/img/cache.PNG) + +The outcome of the first iteration of the pathtracing is cached in device and reused for the subsequent bouncing. The graph above shows that it indeed increase performance but at a constant rate. Reloading the cache for reuse is also a high cost. diff --git a/img/cache.PNG b/img/cache.PNG new file mode 100644 index 0000000..983f3aa Binary files /dev/null and b/img/cache.PNG differ diff --git a/img/final.PNG b/img/final.PNG new file mode 100644 index 0000000..302828d Binary files /dev/null and b/img/final.PNG differ diff --git a/img/jitter.PNG b/img/jitter.PNG new file mode 100644 index 0000000..b23e410 Binary files /dev/null and b/img/jitter.PNG differ diff --git a/img/nonjitter.PNG b/img/nonjitter.PNG new file mode 100644 index 0000000..1b03065 Binary files /dev/null and b/img/nonjitter.PNG differ diff --git a/img/sort.PNG b/img/sort.PNG new file mode 100644 index 0000000..4237b2d Binary files /dev/null and b/img/sort.PNG differ diff --git a/scenes/cornell.txt b/scenes/cornell.txt index 83ff820..4c1aab2 100644 --- a/scenes/cornell.txt +++ b/scenes/cornell.txt @@ -48,6 +48,26 @@ REFR 0 REFRIOR 0 EMITTANCE 0 +// Transparent white +MATERIAL 5 +RGB .7 .6 .6 +SPECEX 0 +SPECRGB .7 .6 .6 +REFL 0 +REFR 1.5 +REFRIOR 1.4 +EMITTANCE 0 + +// Diffuse red +MATERIAL 6 +RGB .1 .5 .9 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + // Camera CAMERA RES 800 800 @@ -112,6 +132,38 @@ SCALE .01 10 10 OBJECT 6 sphere material 4 -TRANS -1 4 -1 +TRANS -1 1.5 -1 ROTAT 0 0 0 SCALE 3 3 3 + +// Sphere +OBJECT 7 +sphere +material 5 +TRANS 3 1.5 -1 +ROTAT 0 0 0 +SCALE 2 2 2 + +// right wall light +OBJECT 8 +cube +material 0 +TRANS 5 5 0 +ROTAT 0 0 0 +SCALE .3 3 3 + +// cube +//OBJECT 9 +//cube +//material 4 +//TRANS 0 1 -2 +//ROTAT 0 30 30 +//SCALE 2 2 2 + +// cube +OBJECT 9 +cube +material 6 +TRANS -3 0 1 +ROTAT 30 0 0 +SCALE 2 2 2 diff --git a/src/interactions.h b/src/interactions.h index 5ce3628..9e87850 100644 --- a/src/interactions.h +++ b/src/interactions.h @@ -41,6 +41,69 @@ glm::vec3 calculateRandomDirectionInHemisphere( + sin(around) * over * perpendicularDirection2; } +__host__ __device__ float schlick(float costheta, float n1, float n2) +{ + float R0 = (n1 - n2) / (n1 + n2); + R0 *= R0; + return R0 + (1 - R0) * pow((1 - costheta), 5); +} + +__host__ __device__ void reflect( + PathSegment & pathSegment, + glm::vec3 intersect, + glm::vec3 &normal, + const Material &m +) +{ + pathSegment.ray.direction = glm::reflect(pathSegment.ray.direction, normal); + pathSegment.ray.direction = glm::normalize(pathSegment.ray.direction); + pathSegment.ray.origin = intersect + pathSegment.ray.direction * 0.001f; + pathSegment.color *= m.color; + pathSegment.remainingBounces--; +} + +__host__ __device__ void refract( + PathSegment & pathSegment, + glm::vec3 intersect, + glm::vec3 &normal, + const Material &m, + thrust::default_random_engine &rng) +{ + float n1, n2; + float cosTheta, eta; + float fresnel; + + n1 = 1.0f; + n2 = m.indexOfRefraction; + cosTheta = glm::dot(pathSegment.ray.direction, normal); + + if (cosTheta > .0f) + { + normal = -normal; + eta = n2 / n1; + } + else + { + eta = n1 / n2; + } + + thrust::uniform_real_distribution u01(0, 1); + fresnel = schlick(fabs(cosTheta), n1, n2); + if (u01(rng) < fresnel) + { + pathSegment.ray.direction = glm::reflect(pathSegment.ray.direction, normal); + pathSegment.color *= m.color; + } + else + { + pathSegment.ray.direction = glm::refract(pathSegment.ray.direction, normal, eta); + } + + pathSegment.ray.origin = intersect + pathSegment.ray.direction * 0.001f; + pathSegment.ray.direction = glm::normalize(pathSegment.ray.direction); + pathSegment.remainingBounces--; +} + /** * Scatter a ray with some probabilities according to the material properties. * For example, a diffuse surface scatters in a cosine-weighted hemisphere. @@ -70,10 +133,42 @@ __host__ __device__ void scatterRay( PathSegment & pathSegment, glm::vec3 intersect, - glm::vec3 normal, + glm::vec3 &normal, const Material &m, thrust::default_random_engine &rng) { // TODO: implement this. // A basic implementation of pure-diffuse shading will just call the // calculateRandomDirectionInHemisphere defined above. + + if (glm::dot(pathSegment.ray.direction, normal) > 0.0f && m.hasRefractive <= 0.001f) + { + pathSegment.color = glm::vec3(0.0f); + pathSegment.remainingBounces = 0; + return; + } + if (m.hasReflective > 0.0f) + { + reflect(pathSegment, intersect, normal, m); + } + else if (m.hasRefractive > 0.0f) + { + refract(pathSegment, intersect, normal, m, rng); + } + else if (m.emittance > 0.0f) + { + pathSegment.color *= m.color * m.emittance; + pathSegment.remainingBounces = 0; + } + else + { + + //PathSegment temp = pathSegment; + + pathSegment.ray.direction = calculateRandomDirectionInHemisphere(normal, rng); + pathSegment.ray.direction = glm::normalize(pathSegment.ray.direction); + pathSegment.ray.origin = intersect + pathSegment.ray.direction * 0.001f; + pathSegment.color *= m.color; + pathSegment.remainingBounces--; + } + } diff --git a/src/intersections.h b/src/intersections.h index 6f23872..58fef7c 100644 --- a/src/intersections.h +++ b/src/intersections.h @@ -136,9 +136,9 @@ __host__ __device__ float sphereIntersectionTest(Geom sphere, Ray r, intersectionPoint = multiplyMV(sphere.transform, glm::vec4(objspaceIntersection, 1.f)); normal = glm::normalize(multiplyMV(sphere.invTranspose, glm::vec4(objspaceIntersection, 0.f))); - if (!outside) { - normal = -normal; - } + //if (!outside) { + // normal = -normal; + //} return glm::length(r.origin - intersectionPoint); } diff --git a/src/main.cpp b/src/main.cpp index fe8e85e..169a49f 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -23,6 +23,10 @@ Scene *scene; RenderState *renderState; int iteration; +double totalTimeMs = 0.0f; +float iterationTimeMs; +double totalElapsedTimeMs = 0.0; + int width; int height; @@ -134,8 +138,21 @@ void runCuda() { // execute the kernel int frame = 0; + cudaEvent_t start, stop; + cudaEventCreate(&start); + cudaEventCreate(&stop); + cudaEventRecord(start); + pathtrace(pbo_dptr, frame, iteration); + cudaEventRecord(stop); + cudaEventSynchronize(stop); + cudaEventElapsedTime(&iterationTimeMs, start, stop); + totalElapsedTimeMs += iterationTimeMs; + + if (iteration % 50 == 0) { + totalTimeMs = totalElapsedTimeMs; + } // unmap buffer object cudaGLUnmapBufferObject(pbo); } else { diff --git a/src/main.h b/src/main.h index fdb7d5d..cd1bb38 100644 --- a/src/main.h +++ b/src/main.h @@ -28,6 +28,8 @@ using namespace std; extern Scene* scene; extern int iteration; +extern float iterationTimeMs; +extern double totalTimeMs; extern int width; extern int height; diff --git a/src/pathtrace.cu b/src/pathtrace.cu index c1ec122..47d31a1 100644 --- a/src/pathtrace.cu +++ b/src/pathtrace.cu @@ -13,6 +13,11 @@ #include "pathtrace.h" #include "intersections.h" #include "interactions.h" +#include "device_launch_parameters.h" + +#include +#include +#include #define ERRORCHECK 1 @@ -73,6 +78,10 @@ static Geom * dev_geoms = NULL; static Material * dev_materials = NULL; static PathSegment * dev_paths = NULL; static ShadeableIntersection * dev_intersections = NULL; +static bool * dev_flag = NULL; +static int *dev_pathMaterials = nullptr; +static PathSegment * dev_cachePaths = NULL; +static ShadeableIntersection * dev_cacheIntersections = NULL; // TODO: static variables for device memory, any extra info you need, etc // ... @@ -96,6 +105,13 @@ void pathtraceInit(Scene *scene) { cudaMemset(dev_intersections, 0, pixelcount * sizeof(ShadeableIntersection)); // TODO: initialize any extra device memeory you need + cudaMalloc(&dev_flag, pixelcount * sizeof(bool)); + + cudaMalloc(&dev_cachePaths, pixelcount * sizeof(PathSegment)); + cudaMalloc(&dev_cacheIntersections, pixelcount * sizeof(ShadeableIntersection)); + + cudaMalloc(&dev_pathMaterials, pixelcount * sizeof(unsigned)); + cudaMemset(dev_pathMaterials, 0, pixelcount * sizeof(unsigned)); checkCUDAError("pathtraceInit"); } @@ -107,6 +123,10 @@ void pathtraceFree() { cudaFree(dev_materials); cudaFree(dev_intersections); // TODO: clean up any extra device memory you created + cudaFree(dev_flag); + cudaFree(dev_pathMaterials); + cudaFree(dev_cachePaths); + cudaFree(dev_cacheIntersections); checkCUDAError("pathtraceFree"); } @@ -128,13 +148,19 @@ __global__ void generateRayFromCamera(Camera cam, int iter, int traceDepth, Path int index = x + (y * cam.resolution.x); PathSegment & segment = pathSegments[index]; + thrust::default_random_engine rng = makeSeededRandomEngine(iter, index, 0); + thrust::uniform_real_distribution u01(0, 1); + segment.ray.origin = cam.position; - segment.color = glm::vec3(1.0f, 1.0f, 1.0f); + segment.color = glm::vec3(1.0f, 1.0f, 1.0f); // TODO: implement antialiasing by jittering the ray + float x_jitter = x + u01(rng); + float y_jitter = y + u01(rng); + segment.ray.direction = glm::normalize(cam.view - - cam.right * cam.pixelLength.x * ((float)x - (float)cam.resolution.x * 0.5f) - - cam.up * cam.pixelLength.y * ((float)y - (float)cam.resolution.y * 0.5f) + - cam.right * cam.pixelLength.x * ((float)x_jitter - (float)cam.resolution.x * 0.5f) + - cam.up * cam.pixelLength.y * ((float)y_jitter - (float)cam.resolution.y * 0.5f) ); segment.pixelIndex = index; @@ -153,6 +179,7 @@ __global__ void computeIntersections( , Geom * geoms , int geoms_size , ShadeableIntersection * intersections + , int * pathMaterials ) { int path_index = blockIdx.x * blockDim.x + threadIdx.x; @@ -201,6 +228,7 @@ __global__ void computeIntersections( if (hit_geom_index == -1) { intersections[path_index].t = -1.0f; + pathMaterials[path_index] = -1; } else { @@ -208,6 +236,8 @@ __global__ void computeIntersections( intersections[path_index].t = t_min; intersections[path_index].materialId = geoms[hit_geom_index].materialid; intersections[path_index].surfaceNormal = normal; + + pathMaterials[path_index] = geoms[hit_geom_index].materialid; } } } @@ -227,11 +257,14 @@ __global__ void shadeFakeMaterial ( , ShadeableIntersection * shadeableIntersections , PathSegment * pathSegments , Material * materials + , bool *flag + , glm::vec3 *image ) { int idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx < num_paths) { + PathSegment &refpath = pathSegments[idx]; ShadeableIntersection intersection = shadeableIntersections[idx]; if (intersection.t > 0.0f) { // if the intersection exists... // Set up the RNG @@ -243,24 +276,36 @@ __global__ void shadeFakeMaterial ( Material material = materials[intersection.materialId]; glm::vec3 materialColor = material.color; + if (refpath.remainingBounces) + { + scatterRay(refpath, intersection.t * refpath.ray.direction + refpath.ray.origin, intersection.surfaceNormal, material, rng); + flag[idx] = true; + } + else + { + flag[idx] = false; + image[refpath.pixelIndex] += refpath.color; + } + // If the material indicates that the object was a light, "light" the ray - if (material.emittance > 0.0f) { - pathSegments[idx].color *= (materialColor * material.emittance); - } + //if (material.emittance > 0.0f) { + // pathSegments[idx].color *= (materialColor * material.emittance); + //} // Otherwise, do some pseudo-lighting computation. This is actually more // like what you would expect from shading in a rasterizer like OpenGL. // TODO: replace this! you should be able to start with basically a one-liner - else { - float lightTerm = glm::dot(intersection.surfaceNormal, glm::vec3(0.0f, 1.0f, 0.0f)); - pathSegments[idx].color *= (materialColor * lightTerm) * 0.3f + ((1.0f - intersection.t * 0.02f) * materialColor) * 0.7f; - pathSegments[idx].color *= u01(rng); // apply some noise because why not - } + //else { + // float lightTerm = glm::dot(intersection.surfaceNormal, glm::vec3(0.0f, 1.0f, 0.0f)); + // pathSegments[idx].color *= (materialColor * lightTerm) * 0.3f + ((1.0f - intersection.t * 0.02f) * materialColor) * 0.7f; + // pathSegments[idx].color *= u01(rng); // apply some noise because why not + //} // If there was no intersection, color the ray black. // Lots of renderers use 4 channel color, RGBA, where A = alpha, often // used for opacity, in which case they can indicate "no opacity". // This can be useful for post-processing and image compositing. } else { - pathSegments[idx].color = glm::vec3(0.0f); + //pathSegments[idx].color = glm::vec3(0.0f); + flag[idx] = false; } } } @@ -277,6 +322,28 @@ __global__ void finalGather(int nPaths, glm::vec3 * image, PathSegment * iterati } } +void compactPath(int& num_paths, PathSegment *paths, bool *flag) +{ + thrust::device_ptr thrust_flags(flag); + thrust::device_ptr thrust_paths(paths); + thrust::remove_if(thrust_paths, thrust_paths + num_paths, thrust_flags, thrust::logical_not()); + num_paths = thrust::count_if(thrust_flags, thrust_flags + num_paths, thrust::identity()); +} + +void sortByMaterials(int num_path) { + thrust::device_ptr thrust_paths(dev_paths); + thrust::device_ptr thrust_intersections(dev_intersections); + thrust::device_ptr thrust_pathMaterials(dev_pathMaterials); + + thrust::device_vector indices(num_path); + thrust::device_vector pvec(thrust_paths, thrust_paths + num_path); + thrust::device_vector ivec(thrust_intersections, thrust_intersections + num_path); + thrust::sequence(indices.begin(), indices.end()); + thrust::stable_sort_by_key(thrust_pathMaterials, thrust_pathMaterials + num_path, indices.begin()); + thrust::gather(indices.begin(), indices.end(), pvec.begin(), thrust_paths); + thrust::gather(indices.begin(), indices.end(), ivec.begin(), thrust_intersections); +} + /** * Wrapper for the __global__ call that sets up the kernel calls and does a ton * of memory management @@ -293,7 +360,7 @@ void pathtrace(uchar4 *pbo, int frame, int iter) { (cam.resolution.y + blockSize2d.y - 1) / blockSize2d.y); // 1D block for path tracing - const int blockSize1d = 128; + const int blockSize1d = 512; /////////////////////////////////////////////////////////////////////////// @@ -325,69 +392,101 @@ void pathtrace(uchar4 *pbo, int frame, int iter) { // for you. // TODO: perform one iteration of path tracing - - generateRayFromCamera <<>>(cam, iter, traceDepth, dev_paths); - checkCUDAError("generate camera ray"); - + cudaMemset(dev_pathMaterials, 0, pixelcount * sizeof(unsigned)); int depth = 0; PathSegment* dev_path_end = dev_paths + pixelcount; int num_paths = dev_path_end - dev_paths; + //if (iter == 1) { + generateRayFromCamera << > > (cam, iter, traceDepth, dev_paths); + cudaMemset(dev_intersections, 0, pixelcount * sizeof(ShadeableIntersection)); + + //dim3 numblocksPathSegmentTracing = (pixelcount + blockSize1d - 1) / blockSize1d; + //computeIntersections << > > ( + // depth + // , num_paths + // , dev_paths + // , dev_geoms + // , hst_scene->geoms.size() + // , dev_intersections + // , dev_pathMaterials + // ); + + //cudaMemcpy(dev_cachePaths, dev_paths, pixelcount * sizeof(PathSegment), cudaMemcpyDeviceToDevice); + //cudaMemcpy(dev_cacheIntersections, dev_intersections, pixelcount * sizeof(ShadeableIntersection), cudaMemcpyDeviceToDevice); + + checkCUDAError("generate camera ray"); + //} + //else + //{ + // cudaMemcpy(dev_paths, dev_cachePaths, pixelcount * sizeof(PathSegment), cudaMemcpyDeviceToDevice); + // cudaMemcpy(dev_intersections, dev_cacheIntersections, pixelcount * sizeof(ShadeableIntersection), cudaMemcpyDeviceToDevice); + //} + + // --- PathSegment Tracing Stage --- // Shoot ray into scene, bounce between objects, push shading chunks - bool iterationComplete = false; + bool iterationComplete = false; while (!iterationComplete) { - - // clean shading chunks - cudaMemset(dev_intersections, 0, pixelcount * sizeof(ShadeableIntersection)); - - // tracing - dim3 numblocksPathSegmentTracing = (num_paths + blockSize1d - 1) / blockSize1d; - computeIntersections <<>> ( - depth - , num_paths - , dev_paths - , dev_geoms - , hst_scene->geoms.size() - , dev_intersections + // clean shading chunks + cudaMemset(dev_intersections, 0, pixelcount * sizeof(ShadeableIntersection)); + + // tracing + dim3 numblocksPathSegmentTracing = (num_paths + blockSize1d - 1) / blockSize1d; + computeIntersections <<>> ( + depth + , num_paths + , dev_paths + , dev_geoms + , hst_scene->geoms.size() + , dev_intersections + , dev_pathMaterials + ); + checkCUDAError("trace one bounce"); + cudaDeviceSynchronize(); + depth++; + + + // TODO: + // --- Shading Stage --- + // Shade path segments based on intersections and generate new rays by + // evaluating the BSDF. + // Start off with just a big kernel that handles all the different + // materials you have in the scenefile. + // TODO: compare between directly shading the path segments and shading + // path segments that have been reshuffled to be contiguous in memory. + + //sortByMaterials(num_paths); + + shadeFakeMaterial<<>> ( + iter, + num_paths, + dev_intersections, + dev_paths, + dev_materials, + dev_flag, + dev_image ); - checkCUDAError("trace one bounce"); - cudaDeviceSynchronize(); - depth++; - - - // TODO: - // --- Shading Stage --- - // Shade path segments based on intersections and generate new rays by - // evaluating the BSDF. - // Start off with just a big kernel that handles all the different - // materials you have in the scenefile. - // TODO: compare between directly shading the path segments and shading - // path segments that have been reshuffled to be contiguous in memory. - - shadeFakeMaterial<<>> ( - iter, - num_paths, - dev_intersections, - dev_paths, - dev_materials - ); - iterationComplete = true; // TODO: should be based off stream compaction results. + + compactPath(num_paths, dev_paths, dev_flag); + + if (!num_paths) iterationComplete = true; + //iterationComplete = true; // TODO: should be based off stream compaction results. } - // Assemble this iteration and apply it to the image - dim3 numBlocksPixels = (pixelcount + blockSize1d - 1) / blockSize1d; + // Assemble this iteration and apply it to the image + dim3 numBlocksPixels = (pixelcount + blockSize1d - 1) / blockSize1d; finalGather<<>>(num_paths, dev_image, dev_paths); - /////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////// - // Send results to OpenGL buffer for rendering - sendImageToPBO<<>>(pbo, cam.resolution, iter, dev_image); + // Send results to OpenGL buffer for rendering + sendImageToPBO<<>>(pbo, cam.resolution, iter, dev_image); - // Retrieve image from GPU - cudaMemcpy(hst_scene->state.image.data(), dev_image, - pixelcount * sizeof(glm::vec3), cudaMemcpyDeviceToHost); + // Retrieve image from GPU + cudaMemcpy(hst_scene->state.image.data(), dev_image, + pixelcount * sizeof(glm::vec3), cudaMemcpyDeviceToHost); - checkCUDAError("pathtrace"); + checkCUDAError("pathtrace"); } diff --git a/src/preview.cpp b/src/preview.cpp index 4eb0bc1..0ae7aae 100644 --- a/src/preview.cpp +++ b/src/preview.cpp @@ -173,7 +173,8 @@ void mainLoop() { glfwPollEvents(); runCuda(); - string title = "CIS565 Path Tracer | " + utilityCore::convertIntToString(iteration) + " Iterations"; + string title = "CIS565 Path Tracer | " + utilityCore::convertIntToString(iteration) + " Iterations " + + utilityCore::convertIntToString(iterationTimeMs) + " ms " + utilityCore::convertIntToString(totalTimeMs) + " total ms"; glfwSetWindowTitle(window, title.c_str()); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo);