diff --git a/README.md b/README.md index d63a6a1..dfcd4fa 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,32 @@ **University of Pennsylvania, CIS 565: GPU Programming and Architecture, Project 1 - Flocking** -* (TODO) YOUR NAME HERE - * (TODO) [LinkedIn](), [personal website](), [twitter](), etc. -* Tested on: (TODO) Windows 22, i7-2222 @ 2.22GHz 22GB, GTX 222 222MB (Moore 2222 Lab) +* Yichao Wang + * [LinkedIn](https://www.linkedin.com/in/wangyic/) +* Tested on: Windows 10 Home 64-bit (10.0, Build 18363) + * Intel(R) Core(TM) i7-7700HQ CPU @ 2.80GHz (8 CPUs) + * GeForce GTX 1060 6.1 -### (TODO: Your README) +**Boids Animation** -Include screenshots, analysis, etc. (Remember, this is public, so don't put -anything here that you don't want to share with the world.) +![](images/screenshot.PNG) + +![](images/demo.gif) + +**Performance Analysis** + +![Figure1](images/withoutV.png) + +![](images/withV.png) + +![](images/blockSize.png) + +From above plots, we can see: + +1. For each implementation, as the # of boids increases, the performance descreases. This is because we need to process more data as the # of boids increases. + +2. For each implementation, changing of block size does not affect performance. This is because changing of block size does not change the amount of threads we used to process data. + +3. The performance improves with the more coherent uniform grid. Because we reduce the random access by eliminating ```dev_particleArrayIndices``` and use contiguous memory by ordering boid's position and velocity in the coherent kernel. + +4. Surprisingly, checking 27 neighboring has better performance than checking 8 neighboring. This is because we have many if statements when checking 8 neighboring. The if statements make the branch in warp diverge which results in more run time. diff --git a/images/blockSize.png b/images/blockSize.png new file mode 100644 index 0000000..454bcc9 Binary files /dev/null and b/images/blockSize.png differ diff --git a/images/demo.gif b/images/demo.gif new file mode 100644 index 0000000..c6d69b6 Binary files /dev/null and b/images/demo.gif differ diff --git a/images/screenshot.PNG b/images/screenshot.PNG new file mode 100644 index 0000000..88a9876 Binary files /dev/null and b/images/screenshot.PNG differ diff --git a/images/withV.png b/images/withV.png new file mode 100644 index 0000000..b802fa7 Binary files /dev/null and b/images/withV.png differ diff --git a/images/withoutV.png b/images/withoutV.png new file mode 100644 index 0000000..55e11fc Binary files /dev/null and b/images/withoutV.png differ diff --git a/src/kernel.cu b/src/kernel.cu index 74dffcb..2c247da 100644 --- a/src/kernel.cu +++ b/src/kernel.cu @@ -37,7 +37,7 @@ void checkCUDAError(const char *msg, int line = -1) { *****************/ /*! Block size used for CUDA kernel launch. */ -#define blockSize 128 +#define blockSize 1024 // LOOK-1.2 Parameters for the boids algorithm. // These worked well in our reference implementation. @@ -85,6 +85,7 @@ int *dev_gridCellEndIndices; // to this cell? // TODO-2.3 - consider what additional buffers you might need to reshuffle // the position and velocity data to be coherent within cells. +glm::vec3* dev_sorted_pos; // LOOK-2.1 - Grid parameters based on simulation parameters. // These are automatically computed for you in Boids::initSimulation @@ -157,7 +158,7 @@ void Boids::initSimulation(int N) { checkCUDAErrorWithLine("kernGenerateRandomPosArray failed!"); // LOOK-2.1 computing grid params - gridCellWidth = 2.0f * std::max(std::max(rule1Distance, rule2Distance), rule3Distance); + gridCellWidth = 1.0f * std::max(std::max(rule1Distance, rule2Distance), rule3Distance); int halfSideCount = (int)(scene_scale / gridCellWidth) + 1; gridSideCount = 2 * halfSideCount; @@ -169,6 +170,24 @@ void Boids::initSimulation(int N) { gridMinimum.z -= halfGridWidth; // TODO-2.1 TODO-2.3 - Allocate additional buffers here. + cudaMalloc((void**)&dev_particleArrayIndices, N * sizeof(int)); + checkCUDAErrorWithLine("cudaMalloc dev_particleArrayIndices failed!"); + + cudaMalloc((void**)&dev_particleGridIndices, N * sizeof(int)); + checkCUDAErrorWithLine("cudaMalloc dev_particleGridIndices failed!"); + + cudaMalloc((void**)&dev_gridCellStartIndices, gridCellCount * sizeof(int)); + checkCUDAErrorWithLine("cudaMalloc dev_gridCellStartIndices failed!"); + + cudaMalloc((void**)&dev_gridCellEndIndices, gridCellCount * sizeof(int)); + checkCUDAErrorWithLine("cudaMalloc dev_gridCellEndIndices failed!"); + + cudaMalloc((void**)&dev_sorted_pos, N * sizeof(glm::vec3)); + checkCUDAErrorWithLine("cudaMalloc sorted_dev_pos failed!"); + + dev_thrust_particleArrayIndices = thrust::device_ptr(dev_particleArrayIndices); + dev_thrust_particleGridIndices = thrust::device_ptr(dev_particleGridIndices); + cudaDeviceSynchronize(); } @@ -233,7 +252,49 @@ __device__ glm::vec3 computeVelocityChange(int N, int iSelf, const glm::vec3 *po // Rule 1: boids fly towards their local perceived center of mass, which excludes themselves // Rule 2: boids try to stay a distance d away from each other // Rule 3: boids try to match the speed of surrounding boids - return glm::vec3(0.0f, 0.0f, 0.0f); + glm::vec3 finalVel = glm::vec3(0.f), perceivedCenter = finalVel, c = finalVel, perceivedVel = finalVel; + int rule1Neighbors = 0, rule3Neighbors = 0; + glm::vec3 boidPos = pos[iSelf]; + + for (int i = 0; i < N; i++) { + if (i != iSelf) { + glm::vec3 bPos = pos[i]; + float dist = glm::distance(bPos, boidPos); + + // rule1: cohesion + if (dist < rule1Distance) { + rule1Neighbors++; + perceivedCenter += bPos; + } + + // separation + if (dist < rule2Distance) { + c -= bPos - boidPos; + } + + // alignment + if (dist < rule3Distance) { + rule3Neighbors++; + perceivedVel += vel[i]; + } + } + } + + if (rule1Neighbors != 0) { + perceivedCenter /= rule1Neighbors; + finalVel += (perceivedCenter - boidPos) * rule1Scale; + } + + + finalVel += c * rule2Scale; + + // alignment + if (rule3Neighbors != 0) { + perceivedVel /= rule3Neighbors; + finalVel += perceivedVel * rule3Scale; + } + + return vel[iSelf] + finalVel; } /** @@ -245,6 +306,18 @@ __global__ void kernUpdateVelocityBruteForce(int N, glm::vec3 *pos, // Compute a new velocity based on pos and vel1 // Clamp the speed // Record the new velocity into vel2. Question: why NOT vel1? + int idx = threadIdx.x + (blockIdx.x * blockDim.x); + if (idx >= N) { + return; + } + + glm::vec3 vel = computeVelocityChange(N, idx, pos, vel1); + if (glm::length(vel) > maxSpeed) { + vel = glm::normalize(vel) * maxSpeed; + } + + vel2[idx] = vel; + } /** @@ -282,6 +355,10 @@ __device__ int gridIndex3Dto1D(int x, int y, int z, int gridResolution) { return x + y * gridResolution + z * gridResolution * gridResolution; } +__device__ int gridIndex3Dto1D(glm::vec3 gridIndex3D, int gridResolution) { + return gridIndex3D.x + gridIndex3D.y * gridResolution + gridIndex3D.z * gridResolution * gridResolution; +} + __global__ void kernComputeIndices(int N, int gridResolution, glm::vec3 gridMin, float inverseCellWidth, glm::vec3 *pos, int *indices, int *gridIndices) { @@ -289,6 +366,20 @@ __global__ void kernComputeIndices(int N, int gridResolution, // - Label each boid with the index of its grid cell. // - Set up a parallel array of integer indices as pointers to the actual // boid data in pos and vel1/vel2 + int index = (blockIdx.x * blockDim.x) + threadIdx.x; + if (index >= N) { + return; + } + + + + + glm::vec3 cellIdx3D = glm::floor((pos[index] - gridMin) * inverseCellWidth); + + gridIndices[index] = gridIndex3Dto1D(cellIdx3D, gridResolution); + indices[index] = index; + + } // LOOK-2.1 Consider how this could be useful for indicating that a cell @@ -306,6 +397,138 @@ __global__ void kernIdentifyCellStartEnd(int N, int *particleGridIndices, // Identify the start point of each cell in the gridIndices array. // This is basically a parallel unrolling of a loop that goes // "this index doesn't match the one before it, must be a new cell!" + int index = (blockIdx.x * blockDim.x) + threadIdx.x; + if (index >= N) { + return; + } + + int cellIndex = particleGridIndices[index]; + if (index == 0) { + gridCellStartIndices[cellIndex] = index; + return; + } + if (index == N - 1) { + gridCellEndIndices[cellIndex] = index; + return; + } + int cellIndexBefore = particleGridIndices[index - 1]; + if (cellIndex != cellIndexBefore) { + gridCellEndIndices[cellIndexBefore] = index - 1; + gridCellStartIndices[cellIndex] = index; + } + + +} + + +__device__ int* getNeighborCells8(int gridResolution, glm::vec3 gridMin, float cellWidth, + glm::vec3 cellIdx3D, glm::vec3 boidPos, int* gridCellStartIndices, int* gridCellEndIndices) { + const int size = 8; + int neighborCellIdxs[size]; + for (int i = 0; i < size; i++) { + neighborCellIdxs[i] = -1; + } + int tracker = -1; + + int xDir, yDir, zDir = 0; + float searchDist = imax(imax(rule1Distance, rule2Distance), rule3Distance); + glm::vec3 cornerPos = gridMin + cellIdx3D * cellWidth; + + if (boidPos.x - searchDist < cornerPos.x) { + if (cellIdx3D.x != 0) { + xDir = -1; + } + } else { + if (cellIdx3D.x != gridResolution - 1) { + xDir = 1; + } + } + + if (boidPos.y - searchDist < cornerPos.y) { + if (cellIdx3D.y != 0) { + yDir = -1; + } + } + else { + if (cellIdx3D.y != gridResolution - 1) { + yDir = 1; + } + } + + if (boidPos.z - searchDist < cornerPos.z) { + if (cellIdx3D.z != 0) { + zDir = -1; + } + } + else { + if (cellIdx3D.z != gridResolution - 1) { + zDir = 1; + } + } + + glm::vec3 neighborCellIdx3D = cellIdx3D; + neighborCellIdxs[++tracker] = gridIndex3Dto1D(neighborCellIdx3D, gridResolution); + if (zDir != 0) { + neighborCellIdx3D.z += zDir; + neighborCellIdxs[++tracker] = gridIndex3Dto1D(neighborCellIdx3D, gridResolution); + } + + if (xDir != 0) { + glm::vec3 neighborCellIdx3D = cellIdx3D; + neighborCellIdx3D.x += xDir; + neighborCellIdxs[++tracker] = gridIndex3Dto1D(neighborCellIdx3D, gridResolution); + if (zDir != 0) { + neighborCellIdx3D.z += zDir; + neighborCellIdxs[++tracker] = gridIndex3Dto1D(neighborCellIdx3D, gridResolution); + } + } + + if (yDir != 0) { + glm::vec3 neighborCellIdx3D = cellIdx3D; + neighborCellIdx3D.y += yDir; + neighborCellIdxs[++tracker] = gridIndex3Dto1D(neighborCellIdx3D, gridResolution); + if (zDir != 0) { + neighborCellIdx3D.z += zDir; + neighborCellIdxs[++tracker] = gridIndex3Dto1D(neighborCellIdx3D, gridResolution); + } + } + + if (xDir != 0 && yDir != 0) { + glm::vec3 neighborCellIdx3D = cellIdx3D; + neighborCellIdx3D.x += xDir; + neighborCellIdx3D.y += yDir; + neighborCellIdxs[++tracker] = gridIndex3Dto1D(neighborCellIdx3D, gridResolution); + if (zDir != 0) { + neighborCellIdx3D.z += zDir; + neighborCellIdxs[++tracker] = gridIndex3Dto1D(neighborCellIdx3D, gridResolution); + } + } + + return neighborCellIdxs; +} + +__device__ int* getNeighborCells27(int gridResolution, glm::vec3 gridMin, float cellWidth, + glm::vec3 cellIdx3D, glm::vec3 boidPos, int* gridCellStartIndices, int* gridCellEndIndices) { + const int size = 27; + int neighborCellIdxs[size]; + for (int i = 0; i < size; i++) { + neighborCellIdxs[i] = -1; + } + int tracker = -1; + + for (int k = cellIdx3D.z - 1; k <= cellIdx3D.z + 1; k++) { + for (int j = cellIdx3D.y - 1; j <= cellIdx3D.y + 1; j++) { + for (int i = cellIdx3D.x - 1; i <= cellIdx3D.x + 1; i++) { + if (i < 0 || j < 0 || k < 0 || i >= gridResolution || j >= gridResolution || k >= gridResolution) { + continue; + } + glm::vec3 neighborCellIdx3D(i, j, k); + neighborCellIdxs[++tracker] = gridIndex3Dto1D(neighborCellIdx3D, gridResolution); + } + } + } + return neighborCellIdxs; + } __global__ void kernUpdateVelNeighborSearchScattered( @@ -322,6 +545,86 @@ __global__ void kernUpdateVelNeighborSearchScattered( // - Access each boid in the cell and compute velocity change from // the boids rules, if this boid is within the neighborhood distance. // - Clamp the speed change before putting the new speed in vel2 + + int index = (blockIdx.x * blockDim.x) + threadIdx.x; + if (index >= N) { + return; + } + + glm::vec3 finalVel = glm::vec3(0.f), perceivedCenter = finalVel, c = finalVel, perceivedVel = finalVel; + int rule1Neighbors = 0, rule3Neighbors = 0; + + glm::vec3 cellIdx3D = glm::floor((pos[index] - gridMin) * inverseCellWidth); + int cellIndex = gridIndex3Dto1D(cellIdx3D, gridResolution); + + glm::vec3 boidPos = pos[index]; + int* neighborCellIdxs = getNeighborCells27(gridResolution, gridMin, cellWidth, cellIdx3D, boidPos, + gridCellStartIndices, gridCellEndIndices); + + for (int i = 0; i < 27; i++) { + int cellIdx = neighborCellIdxs[i]; + if (cellIdx == -1) { + continue; + } + if (gridCellStartIndices[cellIdx] == -1) { + continue; + } + for (int j = gridCellStartIndices[cellIdx]; j <= gridCellEndIndices[cellIdx]; j++) { + int boidIndex = particleArrayIndices[j]; + if (boidIndex != index) { + glm::vec3 bPos = pos[boidIndex]; + float dist = glm::distance(bPos, boidPos); + + // rule1: cohesion + if (dist < rule1Distance) { + rule1Neighbors++; + perceivedCenter += bPos; + } + + // separation + if (dist < rule2Distance) { + c -= bPos - boidPos; + } + + // alignment + if (dist < rule3Distance) { + rule3Neighbors++; + perceivedVel += vel1[boidIndex]; + } + } + } + } + + if (rule1Neighbors != 0) { + perceivedCenter /= rule1Neighbors; + finalVel += (perceivedCenter - boidPos) * rule1Scale; + } + + finalVel += c * rule2Scale; + + // alignment + if (rule3Neighbors != 0) { + perceivedVel /= rule3Neighbors; + finalVel += perceivedVel * rule3Scale; + } + + finalVel += vel1[index]; + + if (glm::length(finalVel) > maxSpeed) { + finalVel = glm::normalize(finalVel) * maxSpeed; + } + + vel2[index] = finalVel; +} + +__global__ void kernSortPosVel(int N, int* particleArrayIndices, glm::vec3* pos, + glm::vec3 *sortPos, glm::vec3 *vel1, glm::vec3 *vel2) { + int index = (blockIdx.x * blockDim.x) + threadIdx.x; + if (index >= N) { + return; + } + sortPos[index] = pos[particleArrayIndices[index]]; + vel2[index] = vel1[particleArrayIndices[index]]; } __global__ void kernUpdateVelNeighborSearchCoherent( @@ -341,6 +644,75 @@ __global__ void kernUpdateVelNeighborSearchCoherent( // - Access each boid in the cell and compute velocity change from // the boids rules, if this boid is within the neighborhood distance. // - Clamp the speed change before putting the new speed in vel2 + int index = (blockIdx.x * blockDim.x) + threadIdx.x; + if (index >= N) { + return; + } + + glm::vec3 finalVel = glm::vec3(0.f), perceivedCenter = finalVel, c = finalVel, perceivedVel = finalVel; + int rule1Neighbors = 0, rule3Neighbors = 0; + + glm::vec3 cellIdx3D = glm::floor((pos[index] - gridMin) * inverseCellWidth); + int cellIndex = gridIndex3Dto1D(cellIdx3D, gridResolution); + + glm::vec3 boidPos = pos[index]; + int* neighborCellIdxs = getNeighborCells27(gridResolution, gridMin, cellWidth, cellIdx3D, boidPos, + gridCellStartIndices, gridCellEndIndices); + + for (int i = 0; i < 27; i++) { + int cellIdx = neighborCellIdxs[i]; + if (cellIdx == -1) { + continue; + } + if (gridCellStartIndices[cellIdx] == -1) { + continue; + } + for (int j = gridCellStartIndices[cellIdx]; j <= gridCellEndIndices[cellIdx]; j++) { + //int boidIndex = particleArrayIndices[j]; + if (j != index) { + glm::vec3 bPos = pos[j]; + float dist = glm::distance(bPos, boidPos); + + // rule1: cohesion + if (dist < rule1Distance) { + rule1Neighbors++; + perceivedCenter += bPos; + } + + // separation + if (dist < rule2Distance) { + c -= bPos - boidPos; + } + + // alignment + if (dist < rule3Distance) { + rule3Neighbors++; + perceivedVel += vel1[j]; + } + } + } + } + + if (rule1Neighbors != 0) { + perceivedCenter /= rule1Neighbors; + finalVel += (perceivedCenter - boidPos) * rule1Scale; + } + + finalVel += c * rule2Scale; + + // alignment + if (rule3Neighbors != 0) { + perceivedVel /= rule3Neighbors; + finalVel += perceivedVel * rule3Scale; + } + + finalVel += vel1[index]; + + if (glm::length(finalVel) > maxSpeed) { + finalVel = glm::normalize(finalVel) * maxSpeed; + } + + vel2[index] = finalVel; } /** @@ -349,6 +721,15 @@ __global__ void kernUpdateVelNeighborSearchCoherent( void Boids::stepSimulationNaive(float dt) { // TODO-1.2 - use the kernels you wrote to step the simulation forward in time. // TODO-1.2 ping-pong the velocity buffers + dim3 fullBlocksPerGrid((numObjects + blockSize - 1) / blockSize); + kernUpdateVelocityBruteForce <<< fullBlocksPerGrid, blockSize >>> (numObjects, + dev_pos, dev_vel1, dev_vel2); + + kernUpdatePos << > > (numObjects, dt, dev_pos, dev_vel2); + + // ping-pong + cudaMemcpy(dev_vel1, dev_vel2, sizeof(glm::vec3) * numObjects, cudaMemcpyDeviceToDevice); + } void Boids::stepSimulationScatteredGrid(float dt) { @@ -364,6 +745,38 @@ void Boids::stepSimulationScatteredGrid(float dt) { // - Perform velocity updates using neighbor search // - Update positions // - Ping-pong buffers as needed + + dim3 fullBlocksPerGrid((numObjects + blockSize - 1) / blockSize); + dim3 fullBlocksPerGridCells((gridCellCount + blockSize - 1) / blockSize); + + kernComputeIndices << > > (numObjects, gridSideCount, gridMinimum, + gridInverseCellWidth, dev_pos, dev_particleArrayIndices, dev_particleGridIndices); + checkCUDAErrorWithLine("kernComputeIndices failed!"); + + thrust::sort_by_key(dev_thrust_particleGridIndices, dev_thrust_particleGridIndices + numObjects, + dev_thrust_particleArrayIndices); + checkCUDAErrorWithLine("thrust sort failed!"); + + kernResetIntBuffer << > > (gridCellCount, + dev_gridCellStartIndices, -1); + checkCUDAErrorWithLine("kernResetIntBuffer failed!"); + + kernIdentifyCellStartEnd << > > (numObjects, dev_particleGridIndices, + dev_gridCellStartIndices, dev_gridCellEndIndices); + checkCUDAErrorWithLine("kernIdentifyCellStartEnd failed!"); + + kernUpdateVelNeighborSearchScattered << > > (numObjects, gridSideCount, + gridMinimum, gridInverseCellWidth, gridCellWidth, dev_gridCellStartIndices, dev_gridCellEndIndices, + dev_particleArrayIndices, dev_pos, dev_vel1, dev_vel2); + checkCUDAErrorWithLine("kernUpdateVelNeighborSearchScattered failed!"); + + kernUpdatePos << > > (numObjects, dt, dev_pos, dev_vel2); + checkCUDAErrorWithLine("kernUpdatePos failed!"); + + // ping-pong + cudaMemcpy(dev_vel1, dev_vel2, sizeof(glm::vec3) * numObjects, cudaMemcpyDeviceToDevice); + + } void Boids::stepSimulationCoherentGrid(float dt) { @@ -382,6 +795,40 @@ void Boids::stepSimulationCoherentGrid(float dt) { // - Perform velocity updates using neighbor search // - Update positions // - Ping-pong buffers as needed. THIS MAY BE DIFFERENT FROM BEFORE. + dim3 fullBlocksPerGrid((numObjects + blockSize - 1) / blockSize); + dim3 fullBlocksPerGridCells((gridCellCount + blockSize - 1) / blockSize); + + kernComputeIndices << > > (numObjects, gridSideCount, gridMinimum, + gridInverseCellWidth, dev_pos, dev_particleArrayIndices, dev_particleGridIndices); + checkCUDAErrorWithLine("kernComputeIndices failed!"); + + thrust::sort_by_key(dev_thrust_particleGridIndices, dev_thrust_particleGridIndices + numObjects, + dev_thrust_particleArrayIndices); + checkCUDAErrorWithLine("thrust sort failed!"); + + kernResetIntBuffer << > > (gridCellCount, + dev_gridCellStartIndices, -1); + checkCUDAErrorWithLine("kernResetIntBuffer failed!"); + + kernIdentifyCellStartEnd << > > (numObjects, dev_particleGridIndices, + dev_gridCellStartIndices, dev_gridCellEndIndices); + checkCUDAErrorWithLine("kernIdentifyCellStartEnd failed!"); + + kernSortPosVel << > > (numObjects, dev_particleArrayIndices, dev_pos, + dev_sorted_pos, dev_vel1, dev_vel2); + + kernUpdateVelNeighborSearchCoherent << > > (numObjects, gridSideCount, + gridMinimum, gridInverseCellWidth, gridCellWidth, dev_gridCellStartIndices, dev_gridCellEndIndices, + dev_sorted_pos, dev_vel2, dev_vel1); + checkCUDAErrorWithLine("kernUpdateVelNeighborSearchScattered failed!"); + + kernUpdatePos << > > (numObjects, dt, dev_sorted_pos, dev_vel1); + checkCUDAErrorWithLine("kernUpdatePos failed!"); + + // ping-pong + cudaMemcpy(dev_vel2, dev_vel1, sizeof(glm::vec3) * numObjects, cudaMemcpyDeviceToDevice); + cudaMemcpy(dev_pos, dev_sorted_pos, sizeof(glm::vec3) * numObjects, cudaMemcpyDeviceToDevice); + } void Boids::endSimulation() { @@ -390,6 +837,12 @@ void Boids::endSimulation() { cudaFree(dev_pos); // TODO-2.1 TODO-2.3 - Free any additional buffers here. + cudaFree(dev_particleArrayIndices); + cudaFree(dev_particleGridIndices); + cudaFree(dev_gridCellStartIndices); + cudaFree(dev_gridCellEndIndices); + cudaFree(dev_sorted_pos); + } void Boids::unitTest() { diff --git a/src/main.cpp b/src/main.cpp index b82c8c6..abd4a60 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -13,12 +13,12 @@ // ================ // LOOK-2.1 LOOK-2.3 - toggles for UNIFORM_GRID and COHERENT_GRID -#define VISUALIZE 1 -#define UNIFORM_GRID 0 -#define COHERENT_GRID 0 +#define VISUALIZE 0 +#define UNIFORM_GRID 1 +#define COHERENT_GRID 1 // LOOK-1.2 - change this to adjust particle count in the simulation -const int N_FOR_VIS = 5000; +const int N_FOR_VIS = 50000; const float DT = 0.2f; /**