Use curl again #218
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Standalone Benchmark | |
| on: | |
| workflow_dispatch: | |
| pull_request: | |
| push: | |
| branches: | |
| - '**' | |
| jobs: | |
| benchmark: | |
| runs-on: ${{ matrix.runner }} | |
| container: registry.cern.ch/alisw/slc9-gpu-builder@sha256:ea3443f9dfbc770e4b4bce0d1a9ecc0b7a7c16e9f76e416b796d170877220820 | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| name: [nvidia-h100, nvidia-l40s, amd-mi300x, amd-w7900] | |
| include: | |
| - name: nvidia-h100 | |
| runner: cern-nextgen-h100 | |
| cmake_args: -DENABLE_CUDA=1 -DENABLE_HIP=0 -DCUDA_COMPUTETARGET=90 | |
| - name: nvidia-l40s | |
| runner: cern-nextgen-l40s | |
| cmake_args: -DENABLE_CUDA=1 -DENABLE_HIP=0 -DCUDA_COMPUTETARGET=89 | |
| - name: amd-mi300x | |
| runner: cern-nextgen-mi300x | |
| cmake_args: -DENABLE_CUDA=0 -DENABLE_HIP=1 -DHIP_AMDGPUTARGET=gfx942 | |
| - name: amd-w7900 | |
| runner: cern-nextgen-w7900 | |
| cmake_args: -DENABLE_CUDA=0 -DENABLE_HIP=1 -DHIP_AMDGPUTARGET=gfx1100 | |
| env: | |
| WORK_DIR: /cvmfs/alice.cern.ch | |
| ALIBUILD_ARCH_PREFIX: el9-x86_64/Packages | |
| MODULEPATH: /cvmfs/alice.cern.ch/etc/toolchain/modulefiles/el9-x86_64:/cvmfs/alice.cern.ch/el9-x86_64/Modules/modulefiles | |
| STANDALONE_DIR: /root/standalone | |
| BENCHMARK_CSV: ${{ matrix.name }}.csv | |
| PROFILER_CSV: results_${{ matrix.name }}.csv | |
| TIMING_CA: ./ca -e 50kHz -g --seed 0 --memSize 15000000000 --sync --debug 1 # Add --PROCdebugMarkdown 1 --runs 42 --runsInit 2 --PROCresetTimers 1 for benchmark runs | |
| LD_LIBRARY_PATH: /usr/local/cuda-13.0/compat | |
| name: ${{ matrix.name }} | |
| steps: | |
| - name: Checkout Repository | |
| uses: actions/checkout@v6 | |
| - name: Download Files | |
| run: | | |
| mkdir -p ${STANDALONE_DIR} | |
| curl -fL --retry 3 -o ${STANDALONE_DIR}/o2-simple-GPU.out https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/o2-simple-GPU.out | |
| mkdir -p ${STANDALONE_DIR}/events | |
| curl -fL --retry 3 -o ${STANDALONE_DIR}/events/o2-simple.tar.xz https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/events/o2-simple.tar.xz | |
| tar -xf ${STANDALONE_DIR}/events/o2-simple.tar.xz -C ${STANDALONE_DIR}/events | |
| curl -fL --retry 3 -o ${STANDALONE_DIR}/events/50kHz.tar.xz https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/events/50kHz.tar.xz | |
| tar -xf ${STANDALONE_DIR}/events/50kHz.tar.xz -C ${STANDALONE_DIR}/events | |
| - name: Build Deterministic | |
| run: &build | | |
| source /etc/profile.d/modules.sh | |
| module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25 | |
| mkdir -p ${STANDALONE_DIR} | |
| cmake -B ${STANDALONE_DIR}/build ${{ matrix.cmake_args }} -DENABLE_OPENCL=0 -DGPUCA_BUILD_EVENT_DISPLAY=0 -DGPUCA_DETERMINISTIC_MODE=${DETERMINISTIC_MODE} -DCMAKE_INSTALL_PREFIX=${STANDALONE_DIR} ${GITHUB_WORKSPACE}/GPU/GPUTracking/Standalone/ | |
| cmake --build ${STANDALONE_DIR}/build --target install -j 8 | |
| env: | |
| DETERMINISTIC_MODE: GPU | |
| - name: Test GPU Track Reconstruction | |
| run: | | |
| source /etc/profile.d/modules.sh | |
| module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25 | |
| cd ${STANDALONE_DIR} | |
| ${STANDALONE_DIR}/ca -e o2-simple -g --seed 0 --memSize 20000000000 --sync --runs 1 --RTCenable --PROCdeterministicGPUReconstruction 1 --RTCoptConstexpr 1 --RTCoptSpecialCode 1 --debug 6 | |
| cmp ${STANDALONE_DIR}/GPU.out ${STANDALONE_DIR}/o2-simple-GPU.out | |
| rm -rf ${STANDALONE_DIR}/GPU.out ${STANDALONE_DIR}/o2-simple-GPU.out ${STANDALONE_DIR}/events/o2-simple ${STANDALONE_DIR}/build | |
| - name: Build Non-Deterministic | |
| run: *build | |
| env: | |
| DETERMINISTIC_MODE: OFF | |
| - name: Benchmark GPU Track Reconstruction | |
| run: | | |
| source /etc/profile.d/modules.sh | |
| module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25 | |
| cd ${STANDALONE_DIR} | |
| ${TIMING_CA} --debug 1 --runs 42 --runsInit 2 --PROCdebugMarkdown 1 --PROCresetTimers 1 --PROCdebugCSV /root/${BENCHMARK_CSV} | |
| python3 ${GITHUB_WORKSPACE}/.github/scripts/merge_runs.py --discard 2 --input /root/${BENCHMARK_CSV} --output /root/${BENCHMARK_CSV} | |
| - name: Profiler - Nsight Compute | |
| if: ${{ matrix.name == 'nvidia-h100' }} | |
| run: | | |
| dnf install -y cuda-nsight-compute-13-1 | |
| source /etc/profile.d/modules.sh | |
| module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25 | |
| cd ${STANDALONE_DIR} | |
| ncu --set none --metrics gpu__time_duration.avg --export ${{ matrix.name }} --clock-control none --force-overwrite ${TIMING_CA} --runs 42 --debug 1 --PROCdebugMarkdown 1 # Generates ${{ matrix.name }}.ncu-rep | |
| ncu --import ${STANDALONE_DIR}/${{ matrix.name }}.ncu-rep --print-units base --csv > /root/${PROFILER_CSV} | |
| rm -rf ${STANDALONE_DIR}/events/50kHz ${STANDALONE_DIR}/build | |
| python3 ${GITHUB_WORKSPACE}/.github/scripts/profiler_ncu.py --runs 42 --input /root/${PROFILER_CSV} --output /root/${PROFILER_CSV} | |
| - name: Profiler - Nsight Systems | |
| if: ${{ matrix.name == 'nvidia-l40s' }} | |
| run: | | |
| curl -fL --retry 3 -o ${STANDALONE_DIR}/nsys.rpm https://developer.nvidia.com/downloads/assets/tools/secure/nsight-systems/2026_2/NsightSystems-linux-cli-public-2026.2.1.210-3763964.rpm | |
| dnf install -y ${STANDALONE_DIR}/nsys.rpm | |
| rm -f ${STANDALONE_DIR}/nsys.rpm | |
| source /etc/profile.d/modules.sh | |
| module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25 | |
| cd ${STANDALONE_DIR} | |
| nsys profile -o ${{ matrix.name }} ${TIMING_CA} --runs 42 --debug 1 --PROCdebugMarkdown 1 # Generates ${{ matrix.name }}.nsys-rep | |
| nsys stats --report cuda_gpu_kern_sum --timeunit usec --force-export=true --format csv ${{ matrix.name }}.nsys-rep > /root/${PROFILER_CSV} | |
| rm -rf ${STANDALONE_DIR}/events/50kHz ${STANDALONE_DIR}/build | |
| python3 ${GITHUB_WORKSPACE}/.github/scripts/profiler_nsys.py --runs 42 --input /root/${PROFILER_CSV} --output /root/${PROFILER_CSV} | |
| - name: Profiler - rocprofv2 | |
| if: ${{ matrix.name == 'amd-mi300x' || matrix.name == 'amd-w7900' }} | |
| run: | | |
| source /etc/profile.d/modules.sh | |
| module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25 | |
| cd ${STANDALONE_DIR} | |
| rocprofv2 --output-directory /root --output-file-name ${{ matrix.name }} ${TIMING_CA} --runs 42 --debug 1 --PROCdebugMarkdown 1 # Generates results_${{ matrix.name }}.csv == ${PROFILER_CSV} | |
| rm -rf ${STANDALONE_DIR}/events/50kHz ${STANDALONE_DIR}/build | |
| python3 ${GITHUB_WORKSPACE}/.github/scripts/profiler_rocprofv2.py --runs 42 --input /root/${PROFILER_CSV} --output /root/${PROFILER_CSV} | |
| - name: Upload Artifact | |
| uses: actions/upload-artifact@v6 | |
| with: | |
| name: ${{ matrix.name }}-artifact | |
| path: "/root/*.csv" | |
| - name: Display table on GitHub web | |
| run: | | |
| source /etc/profile.d/modules.sh | |
| module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25 | |
| mkdir -p ${STANDALONE_DIR}/baseline | |
| curl -fL --retry 3 -o ${STANDALONE_DIR}/baseline/${PROFILER_CSV} https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/baseline/${PROFILER_CSV} | |
| curl -fL --retry 3 -o ${STANDALONE_DIR}/baseline/${BENCHMARK_CSV} https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/baseline/${BENCHMARK_CSV} | |
| python3 ${GITHUB_WORKSPACE}/.github/scripts/csv_to_md.py --baseline ${STANDALONE_DIR}/baseline/${PROFILER_CSV} --current /root/${PROFILER_CSV} >> ${GITHUB_STEP_SUMMARY} | |
| echo -e "\n\n" >> ${GITHUB_STEP_SUMMARY} | |
| python3 ${GITHUB_WORKSPACE}/.github/scripts/csv_to_md.py --baseline ${STANDALONE_DIR}/baseline/${BENCHMARK_CSV} --current /root/${BENCHMARK_CSV} >> ${GITHUB_STEP_SUMMARY} | |
| rm -rf ${STANDALONE_DIR}/baseline |