diff --git a/README.md b/README.md index d2fa33d..fd30353 100644 --- a/README.md +++ b/README.md @@ -3,11 +3,101 @@ Project 0 Getting Started **University of Pennsylvania, CIS 5650: GPU Programming and Architecture, Project 0** -* (TODO) YOUR NAME HERE - * (TODO) [LinkedIn](), [personal website](), [twitter](), etc. -* Tested on: (TODO) Windows 22, i7-2222 @ 2.22GHz 22GB, GTX 222 222MB (Moore 2222 Lab) +* Thomas Shaw + * [LinkedIn](https://www.linkedin.com/in/thomas-shaw-54468b222), [personal website](https://tlshaw.me), [GitHub](https://github.com/printer83mph), etc. +* Tested on: Fedora 42, Ryzen 7 5700x @ 4.67GHz, 32GB, RTX 2070 8GB -### (TODO: Your README) +### Results -Include screenshots, analysis, etc. (Remember, this is public, so don't put -anything here that you don't want to share with the world.) +Configuring my machine to work with CUDA was a bit frustrating, as I previously had an installation of Fedora 40 with proprietary non-CUDA drivers. The process to both A) upgrade to 42, which I had wanted to do for a while, and B) install CUDA toolkit + drivers, was a bit overcomplicated, so after fiddling for a couple days, I went with a clean slate (fresh install) and things worked out much nicer. + +I used NVIDIA's "Package Manager Install" guide, which required some extra steps since I had Secure Boot enabled, but otherwise was straightforward. + +I'm primarily using Visual Studio Code as my code editing environment, with `clangd` as a language server + linter + formatter. With some minimal configuration, this works nicely. + +``` +# .clangd + +CompileFlags: + Add: + - -std=c++11 + - --cuda-path=/usr/local/cuda + - --cuda-gpu-arch=sm_75 + - -L/usr/local/cuda/lib64 + - -I/usr/local/cuda/include + Remove: + - "-forward-unknown-to-host-compiler" + - "-arch=native" + - "--expt-*" + - "--options-file" + - "-G" +``` + +I combined this with the following build configuration: + +```sh +CC=gcc CXX=g++ CUDACXX=nvcc cmake .. -G "Ninja" \ + -DCMAKE_BUILD_TYPE=Debug \ + -DCMAKE_CUDA_FLAGS="-g -G" \ + -DCMAKE_EXPORT_COMPILE_COMMANDS=ON +``` + +One of the main issues I ran into after a fresh install was that Wayland (compositor) didn't like the built binary. I wasn't sure exactly why without digging further into GTK, but the following error would appear: + +``` +$ cuda-gl-check/build/bin/cuda-gl-check +libdecor-gtk-WARNING: Failed to initialize GTK +Failed to load plugin 'libdecor-gtk.so': failed to init +``` + +This was resolved by setting the following environment variables when running the binary, whether debugging or just running raw: + +```sh +export WAYLAND_DISPLAY="" +export XDG_SESSION_TYPE=x11 +``` + +I also plugged these into `launch.json`: + +```json +{ + "$schema": "vscode://schemas/launch", + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "CUDA C++: Launch", + "type": "cuda-gdb", + "request": "launch", + "environment": [ + {"name": "WAYLAND_DISPLAY", "value": ""}, + {"name": "XDG_SESSION_TYPE", "value": "x11"} + ], + "program": "${workspaceFolder}/build/bin/cuda-gl-check" + }, + { + "name": "CUDA C++: Attach", + "type": "cuda-gdb", + "request": "attach" + } + ] +} +``` + +### Screencaps + +![gl check window](images/gl-check-screencap.png) + +![nsight gui](images/nsight-gui.png) + +![nsight summary](images/nsight-summary.png) + +![nsight analysis](images/nsight-analysis.png) + +![webgl report](images/webgl-report.png) + +For WebGPU, I had to enable the "Unsafe WebGPU Support" flag in Chrome. + +![webGPU report](images/webgpu-report.png) \ No newline at end of file diff --git a/cuda-gl-check/.clangd b/cuda-gl-check/.clangd new file mode 100644 index 0000000..97cf6e6 --- /dev/null +++ b/cuda-gl-check/.clangd @@ -0,0 +1,13 @@ +CompileFlags: + Add: + - -std=c++11 + - --cuda-path=/usr/local/cuda + - --cuda-gpu-arch=sm_75 + - -L/usr/local/cuda/lib64 + - -I/usr/local/cuda/include + Remove: + - "-forward-unknown-to-host-compiler" + - "-arch=native" + - "--expt-*" + - "--options-file" + - "-G" diff --git a/cuda-gl-check/.gitignore b/cuda-gl-check/.gitignore index 92a50c5..1cd8718 100644 --- a/cuda-gl-check/.gitignore +++ b/cuda-gl-check/.gitignore @@ -215,6 +215,10 @@ install_manifest.txt *.app +### Clangd ### +.cache/clangd + + ### CUDA ### *.i *.ii diff --git a/cuda-gl-check/.vscode/launch.json b/cuda-gl-check/.vscode/launch.json new file mode 100644 index 0000000..5891e54 --- /dev/null +++ b/cuda-gl-check/.vscode/launch.json @@ -0,0 +1,24 @@ +{ + "$schema": "vscode://schemas/launch", + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "CUDA C++: Launch", + "type": "cuda-gdb", + "request": "launch", + "environment": [ + {"name": "WAYLAND_DISPLAY", "value": ""}, + {"name": "XDG_SESSION_TYPE", "value": "x11"} + ], + "program": "${workspaceFolder}/build/bin/cuda-gl-check" + }, + { + "name": "CUDA C++: Attach", + "type": "cuda-gdb", + "request": "attach" + } + ] +} \ No newline at end of file diff --git a/cuda-gl-check/.vscode/settings.json b/cuda-gl-check/.vscode/settings.json new file mode 100644 index 0000000..9276bfd --- /dev/null +++ b/cuda-gl-check/.vscode/settings.json @@ -0,0 +1,11 @@ +{ + "files.associations": { + "*.cu": "cuda-cpp" + }, + "[cpp]": { + "editor.defaultFormatter": "llvm-vs-code-extensions.vscode-clangd" + }, + "[cuda-cpp]": { + "editor.defaultFormatter": "llvm-vs-code-extensions.vscode-clangd" + } +} \ No newline at end of file diff --git a/cuda-gl-check/src/glslUtility.cpp b/cuda-gl-check/src/glslUtility.cpp index daa69e9..83054fb 100644 --- a/cuda-gl-check/src/glslUtility.cpp +++ b/cuda-gl-check/src/glslUtility.cpp @@ -7,27 +7,27 @@ */ #define _CRT_SECURE_NO_WARNINGS -#include -#include -#include +#include "glslUtility.hpp" #include #include -#include "glslUtility.hpp" +#include +#include +#include using std::ios; namespace glslUtility { -// embedded passthrough shaders so that default passthrough shaders don't need to be loaded -static std::string passthroughVS = - "attribute vec4 Position;" - "attribute vec2 Texcoords;" - "varying vec2 v_Texcoords;" - "" - "void main(void){" - " v_Texcoords = Texcoords;" - " gl_Position = Position;" - "}"; +// embedded passthrough shaders so that default passthrough shaders don't need +// to be loaded +static std::string passthroughVS = "attribute vec4 Position;" + "attribute vec2 Texcoords;" + "varying vec2 v_Texcoords;" + "" + "void main(void){" + " v_Texcoords = Texcoords;" + " gl_Position = Position;" + "}"; static std::string passthroughFS = "varying vec2 v_Texcoords;" "" @@ -38,202 +38,208 @@ static std::string passthroughFS = "}"; typedef struct { - GLuint vertex; - GLuint fragment; + GLuint vertex; + GLuint fragment; } shaders_t; -char* loadFile(const char *fname, GLint &fSize) { - // file read based on example in cplusplus.com tutorial - std::ifstream file (fname, ios::in | ios::binary | ios::ate); - if (file.is_open()) { - unsigned int size = (unsigned int)file.tellg(); - fSize = size; - char *memblock = new char [size]; - file.seekg (0, ios::beg); - file.read (memblock, size); - file.close(); - //std::cout << "file " << fname << " loaded" << std::endl; - return memblock; - } - - std::cout << "Unable to open file " << fname << std::endl; - exit(EXIT_FAILURE); +char *loadFile(const char *fname, GLint &fSize) { + // file read based on example in cplusplus.com tutorial + std::ifstream file(fname, ios::in | ios::binary | ios::ate); + if (file.is_open()) { + unsigned int size = (unsigned int)file.tellg(); + fSize = size; + char *memblock = new char[size]; + file.seekg(0, ios::beg); + file.read(memblock, size); + file.close(); + // std::cout << "file " << fname << " loaded" << std::endl; + return memblock; + } + + std::cout << "Unable to open file " << fname << std::endl; + exit(EXIT_FAILURE); } // printShaderInfoLog // From OpenGL Shading Language 3rd Edition, p215-216 // Display (hopefully) useful error messages if shader fails to compile void printShaderInfoLog(GLint shader) { - int infoLogLen = 0; - int charsWritten = 0; - GLchar *infoLog; - - glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &infoLogLen); - - if (infoLogLen > 1) { - infoLog = new GLchar[infoLogLen]; - // error check for fail to allocate memory omitted - glGetShaderInfoLog(shader, infoLogLen, &charsWritten, infoLog); - //std::cout << "InfoLog:" << std::endl << infoLog << std::endl; - delete [] infoLog; - } + int infoLogLen = 0; + int charsWritten = 0; + GLchar *infoLog; + + glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &infoLogLen); + + if (infoLogLen > 1) { + infoLog = new GLchar[infoLogLen]; + // error check for fail to allocate memory omitted + glGetShaderInfoLog(shader, infoLogLen, &charsWritten, infoLog); + // std::cout << "InfoLog:" << std::endl << infoLog << std::endl; + delete[] infoLog; + } } void printLinkInfoLog(GLint prog) { - int infoLogLen = 0; - int charsWritten = 0; - GLchar *infoLog; - - glGetProgramiv(prog, GL_INFO_LOG_LENGTH, &infoLogLen); - - if (infoLogLen > 1) { - infoLog = new GLchar[infoLogLen]; - // error check for fail to allocate memory omitted - glGetProgramInfoLog(prog, infoLogLen, &charsWritten, infoLog); - //std::cout << "InfoLog:" << std::endl << infoLog << std::endl; - delete [] infoLog; - } + int infoLogLen = 0; + int charsWritten = 0; + GLchar *infoLog; + + glGetProgramiv(prog, GL_INFO_LOG_LENGTH, &infoLogLen); + + if (infoLogLen > 1) { + infoLog = new GLchar[infoLogLen]; + // error check for fail to allocate memory omitted + glGetProgramInfoLog(prog, infoLogLen, &charsWritten, infoLog); + // std::cout << "InfoLog:" << std::endl << infoLog << std::endl; + delete[] infoLog; + } } shaders_t loadDefaultShaders() { - GLuint f, v; + GLuint f, v; - char *vs, *fs; + char *vs, *fs; - v = glCreateShader(GL_VERTEX_SHADER); - f = glCreateShader(GL_FRAGMENT_SHADER); + v = glCreateShader(GL_VERTEX_SHADER); + f = glCreateShader(GL_FRAGMENT_SHADER); - // load shaders & get length of each - GLint vlen; - GLint flen; + // load shaders & get length of each + GLint vlen; + GLint flen; - vlen = (unsigned int)std::strlen(passthroughVS.c_str()); - flen = (unsigned int)std::strlen(passthroughFS.c_str()); + vlen = (unsigned int)std::strlen(passthroughVS.c_str()); + flen = (unsigned int)std::strlen(passthroughFS.c_str()); - vs = new char[passthroughVS.length() + 1]; - fs = new char[passthroughFS.length() + 1]; + vs = new char[passthroughVS.length() + 1]; + fs = new char[passthroughFS.length() + 1]; - std::strcpy(vs, passthroughVS.c_str()); - std::strcpy(fs, passthroughFS.c_str()); + std::strcpy(vs, passthroughVS.c_str()); + std::strcpy(fs, passthroughFS.c_str()); - const char * vv = vs; - const char * ff = fs; + const char *vv = vs; + const char *ff = fs; - glShaderSource(v, 1, &vv, &vlen); - glShaderSource(f, 1, &ff, &flen); + glShaderSource(v, 1, &vv, &vlen); + glShaderSource(f, 1, &ff, &flen); - GLint compiled; + GLint compiled; - glCompileShader(v); - glGetShaderiv(v, GL_COMPILE_STATUS, &compiled); - if (!compiled) { - std::cout << "Vertex shader not compiled." << std::endl; - } - printShaderInfoLog(v); + glCompileShader(v); + glGetShaderiv(v, GL_COMPILE_STATUS, &compiled); + if (!compiled) { + std::cout << "Vertex shader not compiled." << std::endl; + } + printShaderInfoLog(v); - glCompileShader(f); - glGetShaderiv(f, GL_COMPILE_STATUS, &compiled); - if (!compiled) { - std::cout << "Fragment shader not compiled." << std::endl; - } - printShaderInfoLog(f); + glCompileShader(f); + glGetShaderiv(f, GL_COMPILE_STATUS, &compiled); + if (!compiled) { + std::cout << "Fragment shader not compiled." << std::endl; + } + printShaderInfoLog(f); - shaders_t out; - out.vertex = v; - out.fragment = f; + shaders_t out; + out.vertex = v; + out.fragment = f; - delete [] vs; // dont forget to free allocated memory, or else really bad things start happening - delete [] fs; // we allocated this in the loadFile function... + delete[] vs; // dont forget to free allocated memory, or else really bad + // things start happening + delete[] fs; // we allocated this in the loadFile function... - return out; + return out; } -shaders_t loadShaders(const char * vert_path, const char * frag_path) { - GLuint f, v; +shaders_t loadShaders(const char *vert_path, const char *frag_path) { + GLuint f, v; - char *vs, *fs; + char *vs, *fs; - v = glCreateShader(GL_VERTEX_SHADER); - f = glCreateShader(GL_FRAGMENT_SHADER); + v = glCreateShader(GL_VERTEX_SHADER); + f = glCreateShader(GL_FRAGMENT_SHADER); - // load shaders & get length of each - GLint vlen; - GLint flen; + // load shaders & get length of each + GLint vlen; + GLint flen; - vs = loadFile(vert_path, vlen); - fs = loadFile(frag_path, flen); + vs = loadFile(vert_path, vlen); + fs = loadFile(frag_path, flen); - const char * vv = vs; - const char * ff = fs; + const char *vv = vs; + const char *ff = fs; - glShaderSource(v, 1, &vv, &vlen); - glShaderSource(f, 1, &ff, &flen); + glShaderSource(v, 1, &vv, &vlen); + glShaderSource(f, 1, &ff, &flen); - GLint compiled; + GLint compiled; - glCompileShader(v); - glGetShaderiv(v, GL_COMPILE_STATUS, &compiled); - if (!compiled) { - std::cout << "Vertex shader not compiled." << std::endl; - } - printShaderInfoLog(v); + glCompileShader(v); + glGetShaderiv(v, GL_COMPILE_STATUS, &compiled); + if (!compiled) { + std::cout << "Vertex shader not compiled." << std::endl; + } + printShaderInfoLog(v); - glCompileShader(f); - glGetShaderiv(f, GL_COMPILE_STATUS, &compiled); - if (!compiled) { - std::cout << "Fragment shader not compiled." << std::endl; - } - printShaderInfoLog(f); + glCompileShader(f); + glGetShaderiv(f, GL_COMPILE_STATUS, &compiled); + if (!compiled) { + std::cout << "Fragment shader not compiled." << std::endl; + } + printShaderInfoLog(f); - shaders_t out; - out.vertex = v; - out.fragment = f; + shaders_t out; + out.vertex = v; + out.fragment = f; - delete [] vs; // dont forget to free allocated memory, or else really bad things start happening - delete [] fs; // we allocated this in the loadFile function... + delete[] vs; // dont forget to free allocated memory, or else really bad + // things start happening + delete[] fs; // we allocated this in the loadFile function... - return out; + return out; } -void attachAndLinkProgram( GLuint program, shaders_t shaders) { - glAttachShader(program, shaders.vertex); - glAttachShader(program, shaders.fragment); - - glLinkProgram(program); - GLint linked; - glGetProgramiv(program, GL_LINK_STATUS, &linked); - if (!linked) { - std::cout << "Program did not link." << std::endl; - } - printLinkInfoLog(program); +void attachAndLinkProgram(GLuint program, shaders_t shaders) { + glAttachShader(program, shaders.vertex); + glAttachShader(program, shaders.fragment); + + glLinkProgram(program); + GLint linked; + glGetProgramiv(program, GL_LINK_STATUS, &linked); + if (!linked) { + std::cout << "Program did not link." << std::endl; + } + printLinkInfoLog(program); } -GLuint createDefaultProgram(const char *attributeLocations[], GLuint numberOfLocations) { - glslUtility::shaders_t shaders = glslUtility::loadDefaultShaders(); +GLuint createDefaultProgram(const char *attributeLocations[], + GLuint numberOfLocations) { + glslUtility::shaders_t shaders = glslUtility::loadDefaultShaders(); - GLuint program = glCreateProgram(); + GLuint program = glCreateProgram(); - for (GLuint i = 0; i < numberOfLocations; ++i) { - glBindAttribLocation(program, i, attributeLocations[i]); - } + for (GLuint i = 0; i < numberOfLocations; ++i) { + glBindAttribLocation(program, i, attributeLocations[i]); + } - glslUtility::attachAndLinkProgram(program, shaders); + glslUtility::attachAndLinkProgram(program, shaders); - return program; + return program; } -GLuint createProgram(const char *vertexShaderPath, const char *fragmentShaderPath, - const char *attributeLocations[], GLuint numberOfLocations) { - glslUtility::shaders_t shaders = glslUtility::loadShaders(vertexShaderPath, fragmentShaderPath); +GLuint createProgram(const char *vertexShaderPath, + const char *fragmentShaderPath, + const char *attributeLocations[], + GLuint numberOfLocations) { + glslUtility::shaders_t shaders = + glslUtility::loadShaders(vertexShaderPath, fragmentShaderPath); - GLuint program = glCreateProgram(); + GLuint program = glCreateProgram(); - for (GLuint i = 0; i < numberOfLocations; ++i) { - glBindAttribLocation(program, i, attributeLocations[i]); - } + for (GLuint i = 0; i < numberOfLocations; ++i) { + glBindAttribLocation(program, i, attributeLocations[i]); + } - glslUtility::attachAndLinkProgram(program, shaders); + glslUtility::attachAndLinkProgram(program, shaders); - return program; -} + return program; } +} // namespace glslUtility diff --git a/cuda-gl-check/src/glslUtility.hpp b/cuda-gl-check/src/glslUtility.hpp index cfce352..02ac34b 100644 --- a/cuda-gl-check/src/glslUtility.hpp +++ b/cuda-gl-check/src/glslUtility.hpp @@ -11,8 +11,10 @@ #include namespace glslUtility { - GLuint createDefaultProgram(const char *attributeLocations[], GLuint numberOfLocations); - GLuint createProgram(const char *vertexShaderPath, const char - *fragmentShaderPath, const char *attributeLocations[], GLuint - numberOfLocations); -} +GLuint createDefaultProgram(const char *attributeLocations[], + GLuint numberOfLocations); +GLuint createProgram(const char *vertexShaderPath, + const char *fragmentShaderPath, + const char *attributeLocations[], + GLuint numberOfLocations); +} // namespace glslUtility diff --git a/cuda-gl-check/src/kernel.cu b/cuda-gl-check/src/kernel.cu index 3fbd9b0..e527457 100644 --- a/cuda-gl-check/src/kernel.cu +++ b/cuda-gl-check/src/kernel.cu @@ -7,17 +7,17 @@ * @copyright University of Pennsylvania */ +#include "kernel.h" #include #include #include -#include "kernel.h" void checkCUDAError(const char *msg) { - cudaError_t err = cudaGetLastError(); - if (cudaSuccess != err) { - fprintf(stderr, "Cuda error: %s: %s.\n", msg, cudaGetErrorString(err)); - exit(EXIT_FAILURE); - } + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err) { + fprintf(stderr, "Cuda error: %s: %s.\n", msg, cudaGetErrorString(err)); + exit(EXIT_FAILURE); + } } /** @@ -37,72 +37,116 @@ void checkCUDAError(const char *msg) { * 12 -> Magenta #ff0080 rgb(255, 0, 128) * * -> Black #000000 rgb( 0, 0, 0) */ -__host__ __device__ void versionToColor(uchar4* pixel, int version) { - switch(version) { - case 0: - pixel->x = 255; pixel->y = 255; pixel->z = 255; break; - case 1: - pixel->x = 255; pixel->y = 0; pixel->z = 0; break; - case 2: - pixel->x = 255; pixel->y = 128; pixel->z = 0; break; - case 3: - pixel->x = 255; pixel->y = 255; pixel->z = 0; break; - case 4: - pixel->x = 128; pixel->y = 255; pixel->z = 0; break; - case 5: - pixel->x = 0; pixel->y = 255; pixel->z = 0; break; - case 6: - pixel->x = 0; pixel->y = 255; pixel->z = 128; break; - case 7: - pixel->x = 0; pixel->y = 255; pixel->z = 255; break; - case 8: - pixel->x = 0; pixel->y = 128; pixel->z = 255; break; - case 9: - pixel->x = 0; pixel->y = 0; pixel->z = 255; break; - case 10: - pixel->x = 128; pixel->y = 0; pixel->z = 255; break; - case 11: - pixel->x = 255; pixel->y = 0; pixel->z = 255; break; - case 12: - pixel->x = 255; pixel->y = 0; pixel->z = 128; break; - default: - pixel->x = 0; pixel->y = 0; pixel->z = 0; - } +__host__ __device__ void versionToColor(uchar4 *pixel, int version) { + switch (version) { + case 0: + pixel->x = 255; + pixel->y = 255; + pixel->z = 255; + break; + case 1: + pixel->x = 255; + pixel->y = 0; + pixel->z = 0; + break; + case 2: + pixel->x = 255; + pixel->y = 128; + pixel->z = 0; + break; + case 3: + pixel->x = 255; + pixel->y = 255; + pixel->z = 0; + break; + case 4: + pixel->x = 128; + pixel->y = 255; + pixel->z = 0; + break; + case 5: + pixel->x = 0; + pixel->y = 255; + pixel->z = 0; + break; + case 6: + pixel->x = 0; + pixel->y = 255; + pixel->z = 128; + break; + case 7: + pixel->x = 0; + pixel->y = 255; + pixel->z = 255; + break; + case 8: + pixel->x = 0; + pixel->y = 128; + pixel->z = 255; + break; + case 9: + pixel->x = 0; + pixel->y = 0; + pixel->z = 255; + break; + case 10: + pixel->x = 128; + pixel->y = 0; + pixel->z = 255; + break; + case 11: + pixel->x = 255; + pixel->y = 0; + pixel->z = 255; + break; + case 12: + pixel->x = 255; + pixel->y = 0; + pixel->z = 128; + break; + default: + pixel->x = 0; + pixel->y = 0; + pixel->z = 0; + } } // Kernel that writes the image to the OpenGL PBO directly. -__global__ void createVersionVisualization(uchar4* PBOpos, int width, int height, int major, int minor) { - int x = (blockIdx.x * blockDim.x) + threadIdx.x; - int y = (blockIdx.y * blockDim.y) + threadIdx.y; - int index = x + (y * width); +__global__ void createVersionVisualization(uchar4 *PBOpos, int width, + int height, int major, int minor) { + int x = (blockIdx.x * blockDim.x) + threadIdx.x; + int y = (blockIdx.y * blockDim.y) + threadIdx.y; + int index = x + (y * width); - if (x <= width && y <= height) { - // Each thread writes one pixel location in the texture (textel) - PBOpos[index].w = 0; - PBOpos[index].x = 0; - PBOpos[index].y = 0; - PBOpos[index].z = 0; + if (x <= width && y <= height) { + // Each thread writes one pixel location in the texture (textel) + PBOpos[index].w = 0; + PBOpos[index].x = 0; + PBOpos[index].y = 0; + PBOpos[index].z = 0; - int ver = y < height / 2 ? major : minor; - versionToColor(&PBOpos[index], ver); - } + int ver = y < height / 2 ? major : minor; + versionToColor(&PBOpos[index], ver); + } } // Wrapper for the __global__ call that sets up the kernel calls -void kernelVersionVis(uchar4* PBOpos, int width, int height, int major, int minor) { - // set up crucial magic - unsigned int blockSize = 16; - dim3 threadsPerBlock(blockSize, blockSize); +void kernelVersionVis(uchar4 *PBOpos, int width, int height, int major, + int minor) { + // set up crucial magic + unsigned int blockSize = 16; + dim3 threadsPerBlock(blockSize, blockSize); - unsigned int blocksX = (width + blockSize - 1) / blockSize; - unsigned int blocksY = (height + blockSize - 1) / blockSize; - dim3 fullBlocksPerGrid(blocksX, blocksY); + unsigned int blocksX = (width + blockSize - 1) / blockSize; + unsigned int blocksY = (height + blockSize - 1) / blockSize; + dim3 fullBlocksPerGrid(blocksX, blocksY); - //kernel launches - createVersionVisualization<<>>(PBOpos, width, height, major, minor); + // kernel launches + createVersionVisualization<<>>( + PBOpos, width, height, major, minor); - // make certain the kernel has completed - cudaDeviceSynchronize(); + // make certain the kernel has completed + cudaDeviceSynchronize(); - checkCUDAError("Kernel failed!"); + checkCUDAError("Kernel failed!"); } diff --git a/cuda-gl-check/src/kernel.h b/cuda-gl-check/src/kernel.h index b454162..1ea80f7 100644 --- a/cuda-gl-check/src/kernel.h +++ b/cuda-gl-check/src/kernel.h @@ -3,4 +3,4 @@ #include #include -void kernelVersionVis(uchar4* pos, int width, int height, int major, int minor); +void kernelVersionVis(uchar4 *pos, int width, int height, int major, int minor); diff --git a/cuda-gl-check/src/main.cpp b/cuda-gl-check/src/main.cpp index 886fd4c..cd81f40 100644 --- a/cuda-gl-check/src/main.cpp +++ b/cuda-gl-check/src/main.cpp @@ -1,161 +1,160 @@ -#include -#include +#include "main.hpp" +#include "kernel.h" #include #include -#include +#include #include -#include "main.hpp" +#include +#include +#include /** * C main function. */ -int main(int argc, char* argv[]) { - // TODO: Change this line to use your name! - m_yourName = "TODO: YOUR NAME HERE"; +int main(int argc, char *argv[]) { + m_yourName = "Thomas Shaw"; - if (init(argc, argv)) { - mainLoop(); - } + if (init(argc, argv)) { + mainLoop(); + } - return 0; + return 0; } /** * Initialization of CUDA and GLFW. */ bool init(int argc, char **argv) { - // Set window title to "Student Name: [SM 2.0] GPU Name" - std::string deviceName; - cudaDeviceProp deviceProp; - int gpuDevice = 0; - int device_count = 0; - cudaGetDeviceCount(&device_count); - if (gpuDevice > device_count) { - std::cout << "Error: GPU device number is greater than the number of devices!" << - "Perhaps a CUDA-capable GPU is not installed?" << std::endl; - return false; - } - cudaGetDeviceProperties(&deviceProp, gpuDevice); - m_major = deviceProp.major; - m_minor = deviceProp.minor; - - std::ostringstream ss; - ss << m_yourName << ": [SM " << m_major << "." << m_minor << "] " << deviceProp.name; - deviceName = ss.str(); - - // Window setup stuff - glfwSetErrorCallback(errorCallback); - - if (!glfwInit()) { - return false; - } - m_width = 800; - m_height = 800; - m_window = glfwCreateWindow(m_width, m_height, deviceName.c_str(), NULL, NULL); - if (!m_window) { - glfwTerminate(); - return false; - } - glfwMakeContextCurrent(m_window); - glfwSetKeyCallback(m_window, keyCallback); - - glewExperimental = GL_TRUE; - if (glewInit() != GLEW_OK) { - return false; - } - - // init all of the things - initVAO(); - initTextures(); - initCUDA(); - initPBO(&m_pbo); - - GLuint passthroughProgram; - passthroughProgram = initShader(); - glUseProgram(passthroughProgram); - glActiveTexture(GL_TEXTURE0); - - return true; + // Set window title to "Student Name: [SM 2.0] GPU Name" + std::string deviceName; + cudaDeviceProp deviceProp; + int gpuDevice = 0; + int device_count = 0; + cudaGetDeviceCount(&device_count); + if (gpuDevice > device_count) { + std::cout + << "Error: GPU device number is greater than the number of devices!" + << "Perhaps a CUDA-capable GPU is not installed?" << std::endl; + return false; + } + cudaGetDeviceProperties(&deviceProp, gpuDevice); + m_major = deviceProp.major; + m_minor = deviceProp.minor; + + std::ostringstream ss; + ss << m_yourName << ": [SM " << m_major << "." << m_minor << "] " + << deviceProp.name; + deviceName = ss.str(); + + // Window setup stuff + glfwSetErrorCallback(errorCallback); + + if (!glfwInit()) { + return false; + } + m_width = 800; + m_height = 800; + m_window = + glfwCreateWindow(m_width, m_height, deviceName.c_str(), NULL, NULL); + if (!m_window) { + glfwTerminate(); + return false; + } + glfwMakeContextCurrent(m_window); + glfwSetKeyCallback(m_window, keyCallback); + + glewExperimental = GL_TRUE; + if (glewInit() != GLEW_OK) { + return false; + } + + // init all of the things + initVAO(); + initTextures(); + initCUDA(); + initPBO(&m_pbo); + + GLuint passthroughProgram; + passthroughProgram = initShader(); + glUseProgram(passthroughProgram); + glActiveTexture(GL_TEXTURE0); + + return true; } void initPBO(GLuint *pbo) { - if (pbo) { - // set up vertex data parameter - int num_texels = m_width * m_height; - int num_values = num_texels * 4; - size_t size_tex_data = sizeof(GLubyte) * num_values; - - // Generate a buffer ID called a PBO (Pixel Buffer Object) - glGenBuffers(1, pbo); - // Make this the current UNPACK buffer (OpenGL is state-based) - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, *pbo); - // Allocate data for the buffer. 4-channel 8-bit image - glBufferData(GL_PIXEL_UNPACK_BUFFER, size_tex_data, NULL, GL_DYNAMIC_COPY); - cudaGLRegisterBufferObject(*pbo); - } + if (pbo) { + // set up vertex data parameter + int num_texels = m_width * m_height; + int num_values = num_texels * 4; + size_t size_tex_data = sizeof(GLubyte) * num_values; + + // Generate a buffer ID called a PBO (Pixel Buffer Object) + glGenBuffers(1, pbo); + // Make this the current UNPACK buffer (OpenGL is state-based) + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, *pbo); + // Allocate data for the buffer. 4-channel 8-bit image + glBufferData(GL_PIXEL_UNPACK_BUFFER, size_tex_data, NULL, GL_DYNAMIC_COPY); + cudaGLRegisterBufferObject(*pbo); + } } void initVAO() { - GLfloat vertices[] = { - -1.0f, -1.0f, - 1.0f, -1.0f, - 1.0f, 1.0f, - -1.0f, 1.0f, - }; - - GLfloat texCoords[] = { - 1.0f, 1.0f, - 0.0f, 1.0f, - 0.0f, 0.0f, - 1.0f, 0.0f - }; - - GLushort indices[] = { 0, 1, 3, 3, 1, 2 }; - - GLuint vertexBufferObjID[3]; - glGenBuffers(3, vertexBufferObjID); - - glBindBuffer(GL_ARRAY_BUFFER, vertexBufferObjID[0]); - glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STATIC_DRAW); - glVertexAttribPointer((GLuint)m_positionLocation, 2, GL_FLOAT, GL_FALSE, 0, 0); - glEnableVertexAttribArray(m_positionLocation); - - glBindBuffer(GL_ARRAY_BUFFER, vertexBufferObjID[1]); - glBufferData(GL_ARRAY_BUFFER, sizeof(texCoords), texCoords, GL_STATIC_DRAW); - glVertexAttribPointer((GLuint)m_texCoordsLocation, 2, GL_FLOAT, GL_FALSE, 0, 0); - glEnableVertexAttribArray(m_texCoordsLocation); - - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, vertexBufferObjID[2]); - glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(indices), indices, GL_STATIC_DRAW); + GLfloat vertices[] = { + -1.0f, -1.0f, 1.0f, -1.0f, 1.0f, 1.0f, -1.0f, 1.0f, + }; + + GLfloat texCoords[] = {1.0f, 1.0f, 0.0f, 1.0f, 0.0f, 0.0f, 1.0f, 0.0f}; + + GLushort indices[] = {0, 1, 3, 3, 1, 2}; + + GLuint vertexBufferObjID[3]; + glGenBuffers(3, vertexBufferObjID); + + glBindBuffer(GL_ARRAY_BUFFER, vertexBufferObjID[0]); + glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STATIC_DRAW); + glVertexAttribPointer((GLuint)m_positionLocation, 2, GL_FLOAT, GL_FALSE, 0, + 0); + glEnableVertexAttribArray(m_positionLocation); + + glBindBuffer(GL_ARRAY_BUFFER, vertexBufferObjID[1]); + glBufferData(GL_ARRAY_BUFFER, sizeof(texCoords), texCoords, GL_STATIC_DRAW); + glVertexAttribPointer((GLuint)m_texCoordsLocation, 2, GL_FLOAT, GL_FALSE, 0, + 0); + glEnableVertexAttribArray(m_texCoordsLocation); + + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, vertexBufferObjID[2]); + glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(indices), indices, + GL_STATIC_DRAW); } void initCUDA() { - // Default to device ID 0. If you have more than one GPU and want to test a non-default one, - // change the device ID. - cudaGLSetGLDevice(0); + // Default to device ID 0. If you have more than one GPU and want to test a + // non-default one, change the device ID. + cudaGLSetGLDevice(0); - // Clean up on program exit - atexit(cleanupCUDA); + // Clean up on program exit + atexit(cleanupCUDA); } void initTextures() { - glGenTextures(1, &m_image); - glBindTexture(GL_TEXTURE_2D, m_image); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, m_width, m_height, 0, GL_BGRA, - GL_UNSIGNED_BYTE, NULL); + glGenTextures(1, &m_image); + glBindTexture(GL_TEXTURE_2D, m_image); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, m_width, m_height, 0, GL_BGRA, + GL_UNSIGNED_BYTE, NULL); } GLuint initShader() { - const char *attributeLocations[] = { "Position", "Tex" }; - GLuint program = glslUtility::createDefaultProgram(attributeLocations, 2); - GLint location; - glUseProgram(program); - if ((location = glGetUniformLocation(program, "u_image")) != -1) { - glUniform1i(location, 0); - } - return program; + const char *attributeLocations[] = {"Position", "Tex"}; + GLuint program = glslUtility::createDefaultProgram(attributeLocations, 2); + GLint location; + glUseProgram(program); + if ((location = glGetUniformLocation(program, "u_image")) != -1) { + glUniform1i(location, 0); + } + return program; } // ==================================== @@ -163,46 +162,47 @@ GLuint initShader() { // ==================================== void runCUDA() { - // Map OpenGL buffer object for writing from CUDA on a single GPU - // No data is moved (Win & Linux). When mapped to CUDA, OpenGL should not use this buffer - uchar4 *dptr = NULL; - cudaGLMapBufferObject((void**)&dptr, m_pbo); + // Map OpenGL buffer object for writing from CUDA on a single GPU + // No data is moved (Win & Linux). When mapped to CUDA, OpenGL should not use + // this buffer + uchar4 *dptr = NULL; + cudaGLMapBufferObject((void **)&dptr, m_pbo); - // Execute the kernel - kernelVersionVis(dptr, m_width, m_height, m_major, m_minor); + // Execute the kernel + kernelVersionVis(dptr, m_width, m_height, m_major, m_minor); - // Unmap buffer object - cudaGLUnmapBufferObject(m_pbo); + // Unmap buffer object + cudaGLUnmapBufferObject(m_pbo); } void mainLoop() { - while (!glfwWindowShouldClose(m_window)) { - glfwPollEvents(); - runCUDA(); - - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, m_pbo); - glBindTexture(GL_TEXTURE_2D, m_image); - glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, m_width, m_height, GL_RGBA, - GL_UNSIGNED_BYTE, NULL); - glClear(GL_COLOR_BUFFER_BIT); - - // VAO, shader program, and texture already bound - glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_SHORT, 0); - glfwSwapBuffers(m_window); - } - glfwDestroyWindow(m_window); - glfwTerminate(); -} + while (!glfwWindowShouldClose(m_window)) { + glfwPollEvents(); + runCUDA(); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, m_pbo); + glBindTexture(GL_TEXTURE_2D, m_image); + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, m_width, m_height, GL_RGBA, + GL_UNSIGNED_BYTE, NULL); + glClear(GL_COLOR_BUFFER_BIT); + + // VAO, shader program, and texture already bound + glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_SHORT, 0); + glfwSwapBuffers(m_window); + } + glfwDestroyWindow(m_window); + glfwTerminate(); +} void errorCallback(int error, const char *description) { - fprintf(stderr, "error %d: %s\n", error, description); + fprintf(stderr, "error %d: %s\n", error, description); } -void keyCallback(GLFWwindow* window, int key, int scancode, int action, int mods) { - if (key == GLFW_KEY_ESCAPE && action == GLFW_PRESS) { - glfwSetWindowShouldClose(window, GL_TRUE); - } +void keyCallback(GLFWwindow *window, int key, int scancode, int action, + int mods) { + if (key == GLFW_KEY_ESCAPE && action == GLFW_PRESS) { + glfwSetWindowShouldClose(window, GL_TRUE); + } } // ==================================== @@ -210,27 +210,27 @@ void keyCallback(GLFWwindow* window, int key, int scancode, int action, int mods // ==================================== void cleanupCUDA() { - if (m_pbo) { - deletePBO(&m_pbo); - } - if (m_image) { - deleteTexture(&m_image); - } + if (m_pbo) { + deletePBO(&m_pbo); + } + if (m_image) { + deleteTexture(&m_image); + } } void deletePBO(GLuint *pbo) { - if (pbo) { - // unregister this buffer object with CUDA - cudaGLUnregisterBufferObject(*pbo); + if (pbo) { + // unregister this buffer object with CUDA + cudaGLUnregisterBufferObject(*pbo); - glBindBuffer(GL_ARRAY_BUFFER, *pbo); - glDeleteBuffers(1, pbo); + glBindBuffer(GL_ARRAY_BUFFER, *pbo); + glDeleteBuffers(1, pbo); - *pbo = (GLuint)NULL; - } + *pbo = (GLuint)NULL; + } } void deleteTexture(GLuint *tex) { - glDeleteTextures(1, tex); - *tex = (GLuint)NULL; + glDeleteTextures(1, tex); + *tex = (GLuint)NULL; } diff --git a/cuda-gl-check/src/main.hpp b/cuda-gl-check/src/main.hpp index 389eaa7..021d077 100644 --- a/cuda-gl-check/src/main.hpp +++ b/cuda-gl-check/src/main.hpp @@ -1,39 +1,38 @@ #pragma once -#include -#include -#include +#include "glslUtility.hpp" #include #include -#include "glslUtility.hpp" -#include "kernel.h" +#include +#include // ==================================== // GL stuff // ==================================== -GLuint m_pbo = (GLuint) NULL; -GLFWwindow* m_window; -std::string m_yourName; -unsigned int m_width; -unsigned int m_height; -int m_major; -int m_minor; -GLuint m_positionLocation = 0; -GLuint m_texCoordsLocation = 1; -GLuint m_image; +GLuint m_pbo = (GLuint)NULL; +GLFWwindow *m_window; +std::string m_yourName; +unsigned int m_width; +unsigned int m_height; +int m_major; +int m_minor; +GLuint m_positionLocation = 0; +GLuint m_texCoordsLocation = 1; +GLuint m_image; // ==================================== // Main // ==================================== -int main(int argc, char* argv[]); +int main(int argc, char *argv[]); // ==================================== // Main loop // ==================================== void mainLoop(); void errorCallback(int error, const char *description); -void keyCallback(GLFWwindow* window, int key, int scancode, int action, int mods); +void keyCallback(GLFWwindow *window, int key, int scancode, int action, + int mods); void runCUDA(); // ==================================== diff --git a/images/gl-check-screencap.png b/images/gl-check-screencap.png new file mode 100644 index 0000000..cc8c056 Binary files /dev/null and b/images/gl-check-screencap.png differ diff --git a/images/nsight-analysis.png b/images/nsight-analysis.png new file mode 100644 index 0000000..119d931 Binary files /dev/null and b/images/nsight-analysis.png differ diff --git a/images/nsight-gui.png b/images/nsight-gui.png new file mode 100644 index 0000000..2c9171b Binary files /dev/null and b/images/nsight-gui.png differ diff --git a/images/nsight-summary.png b/images/nsight-summary.png new file mode 100644 index 0000000..f4ad3d6 Binary files /dev/null and b/images/nsight-summary.png differ diff --git a/images/webgl-report.png b/images/webgl-report.png new file mode 100644 index 0000000..695c24b Binary files /dev/null and b/images/webgl-report.png differ diff --git a/images/webgpu-report.png b/images/webgpu-report.png new file mode 100644 index 0000000..73b8da1 Binary files /dev/null and b/images/webgpu-report.png differ