diff --git a/.gitignore b/.gitignore index f9f0d8a0..dae7db94 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,8 @@ build builds .vscode + +# local build and tooling artifacts +build.log +Dockerfile +.claude/ diff --git a/CMakeLists.txt b/CMakeLists.txt index be1aa5e5..1d3fb8c1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -194,13 +194,14 @@ target_sources( src/MatrixWrapper/SparsityPattern.cpp src/MatrixWrapper/Distributed.cpp src/MatrixWrapper/Combination.cpp + src/Preconditioner/CoarseSolver.cpp src/Repartitioner.cpp src/Solver/CG.cpp src/Solver/PipeCG.cpp src/Solver/BiCGStab.cpp src/Solver/GMRES.cpp src/Solver/Multigrid.cpp - # src/Solver/IR.cpp + # src/Solver/IR.cpp ) enable_sanitizers( @@ -218,7 +219,13 @@ endif() if(${GINKGO_BUILD_CUDA}) find_package(CUDAToolkit REQUIRED) target_compile_definitions(OGL PRIVATE GINKGO_BUILD_CUDA=1) - target_link_libraries(OGL PUBLIC CUDA::nvToolsExt) + if(TARGET CUDA::nvToolsExt) + target_link_libraries(OGL PUBLIC CUDA::nvToolsExt) + elseif(TARGET CUDA::nvtx3) + target_link_libraries(OGL PUBLIC CUDA::nvtx3) + else() + message(WARNING "Neither CUDA::nvToolsExt nor CUDA::nvtx3 found; NVTX annotations disabled") + endif() endif() if(OGL_DATA_VALIDATION) @@ -243,15 +250,20 @@ add_custom_target( WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} COMMENT "a target to format cmake files") -install( - TARGETS OGL - ginkgo - ginkgo_device - ginkgo_hip - ginkgo_cuda - ginkgo_omp - ginkgo_dpcpp - ginkgo_reference - DESTINATION $ENV{FOAM_USER_LIBBIN}) +set(_ogl_install_targets OGL) +foreach( + _target + ginkgo + ginkgo_device + ginkgo_hip + ginkgo_cuda + ginkgo_omp + ginkgo_dpcpp + ginkgo_reference) + if(TARGET ${_target}) + list(APPEND _ogl_install_targets ${_target}) + endif() +endforeach() +install(TARGETS ${_ogl_install_targets} DESTINATION $ENV{FOAM_USER_LIBBIN}) include(cmake/banner.cmake) diff --git a/cmake/AutoEnableDevice.cmake b/cmake/AutoEnableDevice.cmake index 91602e66..7ad80380 100644 --- a/cmake/AutoEnableDevice.cmake +++ b/cmake/AutoEnableDevice.cmake @@ -33,5 +33,5 @@ if(NOT DEFINED OGL_BUILD_HIP) CACHE INTERNAL "") endif() else() - message(STATUS "Skip HIP detection OGL_HIP_CUDA=${OGL_BUILD_HIP}") + message(STATUS "Skip HIP detection OGL_BUILD_HIP=${OGL_BUILD_HIP}") endif() diff --git a/cmake/CxxThirdParty.cmake b/cmake/CxxThirdParty.cmake index ba710fbf..8efe4758 100644 --- a/cmake/CxxThirdParty.cmake +++ b/cmake/CxxThirdParty.cmake @@ -6,10 +6,13 @@ include(cmake/CPM.cmake) + if(NOT DEFINED OGL_GINKGO_DIR) - set(OGL_GINKGO_CHECKOUT_VERSION - "ogl_0600_gko190" - CACHE STRING "Use specific version of ginkgo") + if(NOT DEFINED OGL_GINKGO_CHECKOUT_VERSION) + set(OGL_GINKGO_CHECKOUT_VERSION + "ogl_0600_gko190" + CACHE STRING "Use specific version of ginkgo") + endif() message(STATUS "Using CPM to get Ginkgo ${GINKGO_CHECKOUT_VERSION}") set(OGL_GINKGO_VIA_CPM ON) else() diff --git a/include/OGL/DevicePersistent/ExecutorHandler.hpp b/include/OGL/DevicePersistent/ExecutorHandler.hpp index 3c9b7da5..df56b32f 100644 --- a/include/OGL/DevicePersistent/ExecutorHandler.hpp +++ b/include/OGL/DevicePersistent/ExecutorHandler.hpp @@ -71,6 +71,14 @@ struct DeviceIdHandler { return device_global_id % num_devices_per_node; } + /* @brief returns the owner rank on the global comm world communicator + */ + label global_owner() const + { + label rank = Pstream::myProcNo(); + return rank - (rank % ranks_per_gpu); + } + /* @brief check if rank is an owning rank */ bool is_owner() const @@ -270,7 +278,7 @@ class ExecutorHandler // gko comm label group = device_id_handler_.compute_group(); MPI_Comm gko_comm; - label host_rank = 0; + label host_rank = Pstream::myProcNo(); MPI_Comm_split(MPI_COMM_WORLD, group, host_rank, &gko_comm); device_comm_ = std::make_shared( @@ -278,7 +286,8 @@ class ExecutorHandler // repart comm MPI_Comm repart_comm; - label device_id = device_id_handler_.compute_device_id(4); + label global_rank = Pstream::myProcNo(); + label device_id = global_rank / device_id_handler_.ranks_per_gpu; MPI_Comm_split(MPI_COMM_WORLD, device_id, host_rank, &repart_comm); repart_comm_ = std::make_shared( @@ -300,6 +309,15 @@ class ExecutorHandler * */ bool get_non_orig_device_comm() const { return non_orig_device_comm_; } + label get_ranks_per_gpu() const { return device_id_handler_.ranks_per_gpu; } + + void set_ranks_per_gpu(label ranks_per_gpu) + { + device_id_handler_.ranks_per_gpu = ranks_per_gpu; + } + + label get_owner_rank() const { return device_id_handler_.global_owner(); } + const std::shared_ptr get_device_exec() const { return this->get_persistent_object(); diff --git a/include/OGL/Preconditioner.hpp b/include/OGL/Preconditioner.hpp index 03533486..7e924665 100644 --- a/include/OGL/Preconditioner.hpp +++ b/include/OGL/Preconditioner.hpp @@ -8,11 +8,20 @@ #include "OGL/DevicePersistent/Base.hpp" #include "OGL/MatrixWrapper/Distributed.hpp" +#include "OGL/Preconditioner/Adaptive.hpp" +#include "OGL/Preconditioner/Cholesky.hpp" +#include "OGL/Preconditioner/ISAI.hpp" +#include "OGL/Preconditioner/Jacobi.hpp" +#include "OGL/Preconditioner/LU.hpp" +#include "OGL/Preconditioner/Multigrid.hpp" +#include "OGL/Preconditioner/Schwarz.hpp" #include "fvCFD.H" #include "regIOobject.H" namespace Foam { + + class Preconditioner { using mtx = gko::matrix::Csr; using bj = gko::preconditioner::Jacobi; @@ -52,110 +61,6 @@ class Preconditioner { verbose_(verbose) {} - template - std::shared_ptr wrap_multi_level_schwarz( - std::shared_ptr gkomatrix, - std::shared_ptr device_exec, - std::shared_ptr precond, const dictionary &d, - label local_rows) const - { - using pgm = gko::multigrid::Pgm; - using fc = gko::multigrid::FixedCoarsening; - using solver = gko::solver::Cg; - - auto selCoarseRows = d.lookupOrDefault("selCoarseRows", label(5)); - auto fixedCoarsening = - d.lookupOrDefault("fixedCoarsening", false); - auto coarseWeight = d.lookupOrDefault("coarseWeight", scalar(0.01)); - auto solveNormC = - d.lookupOrDefault("reductionCoarseSolver", label(1e-6)); - auto maxIterCoarse = d.lookupOrDefault("maxIterCoarse", label(50)); - - word msg = "Generate multi level schwarz:\n\tfixedCoarsening " + - std::to_string(fixedCoarsening) + "\n\tselCoarseRows " + - std::to_string(selCoarseRows) + "\n\trelTolCoarse " + - std::to_string(solveNormC) + "\n\tmaxIterCoarse " + - std::to_string(maxIterCoarse) + "\n\tcoarseWeigth" + - std::to_string(coarseWeight); - MLOG_0(verbose_, msg) - - auto pre_factory = ras::build().with_local_solver( - bj::build().with_skip_sorting(true).with_max_block_size(1u).on( - device_exec)); - - auto coarse_solver = gko::share( - solver::build() - .with_preconditioner(pre_factory) - .with_criteria( - gko::stop::Iteration::build().with_max_iters(maxIterCoarse), - gko::stop::ResidualNorm::build() - .with_reduction_factor(solveNormC)) - .on(device_exec)); - - if (fixedCoarsening) { - auto n_rows = local_rows / selCoarseRows; - auto sel_rows = - gko::array