From db12f3ee5bd622f3c6779ab22e704526e5a04afe Mon Sep 17 00:00:00 2001 From: Sara Faghih-Naini Date: Fri, 3 May 2024 11:50:44 +0200 Subject: [PATCH 01/22] Use account c28 and main-branch of PMAP-L --- daint/.bashrc | 2 +- daint/generate_prepare_mpi.py | 0 daint/generate_prepare_pmapl.py | 2 +- daint/salloc.py | 2 +- daint/sbatch.py | 2 +- daint/sbatch_pmapl.py | 4 ++-- 6 files changed, 6 insertions(+), 6 deletions(-) mode change 100644 => 100755 daint/generate_prepare_mpi.py mode change 100644 => 100755 daint/generate_prepare_pmapl.py mode change 100644 => 100755 daint/salloc.py mode change 100644 => 100755 daint/sbatch.py mode change 100644 => 100755 daint/sbatch_pmapl.py diff --git a/daint/.bashrc b/daint/.bashrc index 77f7507..f28314b 100644 --- a/daint/.bashrc +++ b/daint/.bashrc @@ -121,7 +121,7 @@ export LC_ALL=C.UTF-8 export LANG=C.UTF-8 # project -export PROJECT=/project/s299/"$USER" +export PROJECT=/project/c28/"$USER" # scratch case $HOSTNAME in diff --git a/daint/generate_prepare_mpi.py b/daint/generate_prepare_mpi.py old mode 100644 new mode 100755 diff --git a/daint/generate_prepare_pmapl.py b/daint/generate_prepare_pmapl.py old mode 100644 new mode 100755 index 637e21e..146b10c --- a/daint/generate_prepare_pmapl.py +++ b/daint/generate_prepare_pmapl.py @@ -10,7 +10,7 @@ # >>> config: start -BRANCH: str = "cloudsc-cy49r1" +BRANCH: str = "main" ENV: defs.ProgrammingEnvironment = "gnu" PARTITION: defs.Partition = "gpu" # >>> config: end diff --git a/daint/salloc.py b/daint/salloc.py old mode 100644 new mode 100755 index e15b45d..b703158 --- a/daint/salloc.py +++ b/daint/salloc.py @@ -7,7 +7,7 @@ # >>> config: start -ACCOUNT: str = "s299" +ACCOUNT: str = "c28" NUM_NODES: int = 1 PARTITION: defs.Partition = "gpu" TIME: str = "01:00:00" diff --git a/daint/sbatch.py b/daint/sbatch.py old mode 100644 new mode 100755 index 08205a1..6d3822a --- a/daint/sbatch.py +++ b/daint/sbatch.py @@ -10,7 +10,7 @@ # >>> config: start -ACCOUNT: str = "s299" +ACCOUNT: str = "c28" JOB_NAME: str = "test_job" JOB_SCRIPT: str = "test_job" NUM_NODES: int = 1 diff --git a/daint/sbatch_pmapl.py b/daint/sbatch_pmapl.py old mode 100644 new mode 100755 index 33bbb7c..a1522a3 --- a/daint/sbatch_pmapl.py +++ b/daint/sbatch_pmapl.py @@ -10,8 +10,8 @@ # >>> config: start -account: str = "s299" -branch_l: list[str] = ["cloudsc-cy49r1"] +account: str = "c28" +branch_l: list[str] = ["main"] partition: defs.Partition = "gpu" env_l: list[defs.ProgrammingEnvironment] = ["gnu"] ghex_aggregate_fields: bool = False From 5128097f88fac91c0085576b7e88a52d7b3b9863 Mon Sep 17 00:00:00 2001 From: Sara Faghih-Naini Date: Fri, 3 May 2024 17:11:28 +0200 Subject: [PATCH 02/22] Update and add env variables and add generate-scripts for hpc2020 --- daint/.bashrc | 2 + daint/generate_build_autoconf.py | 1 + daint/generate_build_help2man.py | 45 ++++++++++ daint/generate_prepare_pmapl.py | 2 +- daint/utils.py | 2 + hpc2020/.bashrc | 3 + hpc2020/defs.py | 8 ++ hpc2020/generate_build_autoconf.py | 47 +++++++++++ hpc2020/generate_build_hdf5.py | 63 ++++++++++++++ hpc2020/generate_build_help2man.py | 43 ++++++++++ hpc2020/generate_build_netcdf.py | 74 ++++++++++++++++ hpc2020/generate_prepare_mpi.py | 37 ++++++++ hpc2020/generate_prepare_pmapl.py | 75 +++++++++++++++++ hpc2020/utils.py | 131 +++++++++++++++++++++++++++++ 14 files changed, 532 insertions(+), 1 deletion(-) create mode 100755 daint/generate_build_help2man.py create mode 100644 hpc2020/defs.py create mode 100644 hpc2020/generate_build_autoconf.py create mode 100755 hpc2020/generate_build_hdf5.py create mode 100644 hpc2020/generate_build_help2man.py create mode 100755 hpc2020/generate_build_netcdf.py create mode 100755 hpc2020/generate_prepare_mpi.py create mode 100755 hpc2020/generate_prepare_pmapl.py create mode 100644 hpc2020/utils.py diff --git a/daint/.bashrc b/daint/.bashrc index f28314b..bcfad29 100644 --- a/daint/.bashrc +++ b/daint/.bashrc @@ -139,6 +139,8 @@ export SQUEUE_FORMAT="%.9i %.50j %.15u %.5q %.15T %.10M %.10l %.5D %.13f %R" # hdf5 and netcdf export HDF5_ROOT=/users/"$USER"/hdf5/1.14.2/build/gnu export NETCDF_ROOT=/users/"$USER"/netcdf-c/4.9.2/build/gnu +PATH=~/help2man/1.49.3/build/gnu/bin/:$PATH +PATH=~/autoconf/2.72/build/gnu/bin/:$PATH # get node id of a salloc function get_node_id() { diff --git a/daint/generate_build_autoconf.py b/daint/generate_build_autoconf.py index f5d7584..0384c16 100755 --- a/daint/generate_build_autoconf.py +++ b/daint/generate_build_autoconf.py @@ -35,6 +35,7 @@ def core(env: defs.ProgrammingEnvironment, partition: defs.Partition, root_dir: utils.run("CC=cc", "CXX=CC", "./configure", f"--prefix={build_dir}") utils.run("make -j 8 install") utils.export_variable("AUTOCONF_ROOT", build_dir) + utils.append_to_path("PATH", f"{build_dir}/bin") if __name__ == "__main__": diff --git a/daint/generate_build_help2man.py b/daint/generate_build_help2man.py new file mode 100755 index 0000000..86f34e7 --- /dev/null +++ b/daint/generate_build_help2man.py @@ -0,0 +1,45 @@ +#!/opt/python/3.9.4.1/bin/python +# -*- coding: utf-8 -*- +from __future__ import annotations +import argparse +import os + +import defs +import utils + + +# >>> config: start +ENV: defs.ProgrammingEnvironment = "gnu" +PARTITION: defs.Partition = "gpu" +ROOT_DIR: str = f"/users/{os.getlogin()}" +VERSION: str = "1.49.3" +# >>> config: endi + + +def core(env: defs.ProgrammingEnvironment, partition: defs.Partition, root_dir: str, version: str): + with utils.batch_file(prefix="build_help2man"): + utils.module_purge(force=True) + utils.load_partition(partition) + utils.load_env(env) + root_dir = os.path.abspath(root_dir) + with utils.chdir(root_dir): + utils.run("mkdir -p help2man") + branch = f"master" + utils.run( + f"git clone --branch={branch} " + f"https://github.com/Distrotech/help2man.git help2man/{version}" + ) + with utils.chdir(f"help2man/{version}"): + build_dir = os.path.join(root_dir, f"help2man/{version}/build/{env}") + utils.run("CC=cc", "CXX=CC", "./configure", f"--prefix={build_dir}") + utils.run("make -j 8 install") + utils.append_to_path("PATH", f"{build_dir}/bin") + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--env", type=str, default=ENV) + parser.add_argument("--partition", type=str, default=PARTITION) + parser.add_argument("--root-dir", type=str, default=ROOT_DIR) + parser.add_argument("--version", type=str, default=VERSION) + args = parser.parse_args() + core(**args.__dict__) diff --git a/daint/generate_prepare_pmapl.py b/daint/generate_prepare_pmapl.py index 146b10c..5ec1728 100755 --- a/daint/generate_prepare_pmapl.py +++ b/daint/generate_prepare_pmapl.py @@ -52,7 +52,7 @@ def core(branch: str, env: defs.ProgrammingEnvironment, partition: defs.Partitio utils.setup_cuda() # path to custom build of HDF5 and NetCDF-C - home_dir = os.environ.get("HOME", "/users/subbiali") + home_dir = os.environ.get("HOME", f"/users/{os.getlogin()}") utils.export_variable("HDF5_ROOT", os.path.join(home_dir, f"hdf5/1.14.2/build/{env}")) utils.export_variable("HDF5_DIR", os.path.join(home_dir, f"hdf5/1.14.2/build/{env}")) utils.export_variable("NETCDF_ROOT", os.path.join(home_dir, f"netcdf-c/4.9.2/build/{env}")) diff --git a/daint/utils.py b/daint/utils.py index a3fd50b..bd17863 100644 --- a/daint/utils.py +++ b/daint/utils.py @@ -103,6 +103,8 @@ def load_env(env: str) -> None: def export_variable(name: str, value: typing.Any) -> None: run(f"export {name}={str(value)}") +def append_to_path( name: str, value: typing.Any) -> None: + run(f"{name}={str(value)}:${name}") def setup_cuda(): run("NVCC_PATH=$(which nvcc)") diff --git a/hpc2020/.bashrc b/hpc2020/.bashrc index d554cd1..ab4a921 100644 --- a/hpc2020/.bashrc +++ b/hpc2020/.bashrc @@ -101,3 +101,6 @@ alias sc='scancel' alias sq='squeue -u $USER' alias sr='srun' export SQUEUE_FORMAT="%.9i %.50j %.15u %.15q %.15T %.10M %.10l %.5D %.13f %R" + +PATH=~/help2man/1.49.3/build/gnu/bin/:$PATH +PATH=~/autoconf/2.72/build/gnu/bin/:$PATH \ No newline at end of file diff --git a/hpc2020/defs.py b/hpc2020/defs.py new file mode 100644 index 0000000..74ff9df --- /dev/null +++ b/hpc2020/defs.py @@ -0,0 +1,8 @@ +# -*- coding: utf-8 -*- +import typing + +FloatingPointPrecision = typing.Literal["double", "single"] +Partition = typing.Literal["gpu"] +ProgrammingEnvironment = typing.Literal["gnu"] + +valid_programming_environments = typing.get_args(ProgrammingEnvironment) \ No newline at end of file diff --git a/hpc2020/generate_build_autoconf.py b/hpc2020/generate_build_autoconf.py new file mode 100644 index 0000000..554856c --- /dev/null +++ b/hpc2020/generate_build_autoconf.py @@ -0,0 +1,47 @@ +#!/usr/local/apps/python3/3.11.8-01/bin/python3 +# -*- coding: utf-8 -*- +from __future__ import annotations +import argparse +import os + +import defs +import utils + + +# >>> config: start +ENV: defs.ProgrammingEnvironment = "gnu" +PARTITION: defs.Partition = "gpu" +ROOT_DIR: str = f"/home/{os.getlogin()}" +VERSION: str = "2.72" +# >>> config: end + + +def core(env: defs.ProgrammingEnvironment, partition: defs.Partition, root_dir: str, version: str): + with utils.batch_file(prefix="build_autoconf"): + utils.module_purge(force=True) + utils.load_env(env) + root_dir = os.path.abspath(root_dir) + with utils.chdir(root_dir): + utils.run("mkdir -p autoconf") + branch = f"v{version}" + utils.run( + f"git clone --branch={branch} " + f"http://git.sv.gnu.org/r/autoconf.git autoconf/{version}" + ) + with utils.chdir(f"autoconf/{version}"): + utils.run("./bootstrap") + build_dir = os.path.join(root_dir, f"autoconf/{version}/build/{env}") + utils.run("CC=cc", "CXX=CC", "./configure", f"--prefix={build_dir}") + utils.run("make -j 8 install") + utils.export_variable("AUTOCONF_ROOT", build_dir) + utils.append_to_path("PATH", f"{build_dir}/bin") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--env", type=str, default=ENV) + parser.add_argument("--partition", type=str, default=PARTITION) + parser.add_argument("--root-dir", type=str, default=ROOT_DIR) + parser.add_argument("--version", type=str, default=VERSION) + args = parser.parse_args() + core(**args.__dict__) diff --git a/hpc2020/generate_build_hdf5.py b/hpc2020/generate_build_hdf5.py new file mode 100755 index 0000000..fa1f541 --- /dev/null +++ b/hpc2020/generate_build_hdf5.py @@ -0,0 +1,63 @@ +#!/usr/local/apps/python3/3.11.8-01/bin/python3 +# -*- coding: utf-8 -*- +from __future__ import annotations +import argparse +import os + +import defs +import utils + + +# >>> config: start +ENV: defs.ProgrammingEnvironment = "gnu" +PARTITION: defs.Partition = "gpu" +ROOT_DIR: str = f"/home/{os.getlogin()}" +VERSION: str = "1.14.2" +# >>> config: end + + +def core( + env: defs.ProgrammingEnvironment, partition: defs.Partition, root_dir: str, version: str +): + with utils.batch_file(prefix="build_hdf5"): + utils.module_purge(force=True) + utils.load_env(env) + utils.module_load("gcc/11.2.0") + utils.append_to_path("LD_LIBRARY_PATH", f"/usr/local/apps/gcc/11.2.0/lib64") + utils.module_load("openmpi") + root_dir = os.path.abspath(root_dir) + with utils.chdir(root_dir): + utils.run("mkdir -p hdf5") + branch = f"hdf5-{version.replace('.', '_')}" + utils.run( + f"git clone --branch={branch} --depth=1 " + f"https://github.com/HDFGroup/hdf5.git hdf5/{version}" + ) + with utils.chdir(f"hdf5/{version}"): + utils.run("chmod +x autogen.sh") + utils.run("./autogen.sh") + build_dir = os.path.join(root_dir, f"hdf5/{version}/build/{env}") + utils.run( + "CC=mpicc", + "CXX=mpicxx", + "CFLAGS='-fPIC'", + "./configure", + f"--prefix={build_dir}", + "--enable-build-mode=production", + "--enable-parallel", + "--enable-shared=no", + "--enable-tests", + "--enable-tools", + ) + utils.run("make -j 8 install") + utils.export_variable("HDF5_ROOT", build_dir) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--env", type=str, default=ENV) + parser.add_argument("--partition", type=str, default=PARTITION) + parser.add_argument("--root-dir", type=str, default=ROOT_DIR) + parser.add_argument("--version", type=str, default=VERSION) + args = parser.parse_args() + core(**args.__dict__) diff --git a/hpc2020/generate_build_help2man.py b/hpc2020/generate_build_help2man.py new file mode 100644 index 0000000..7db2351 --- /dev/null +++ b/hpc2020/generate_build_help2man.py @@ -0,0 +1,43 @@ +#!/usr/local/apps/python3/3.11.8-01/bin/python3 +# -*- coding: utf-8 -*- +from __future__ import annotations +import argparse +import os + +import defs +import utils + +# >>> config: start +ENV: defs.ProgrammingEnvironment = "gnu" +PARTITION: defs.Partition = "gpu" +ROOT_DIR: str = f"/home/{os.getlogin()}" +VERSION: str = "1.49.3" +# >>> config: endi + + +def core(env: defs.ProgrammingEnvironment, partition: defs.Partition, root_dir: str, version: str): + with utils.batch_file(prefix="build_help2man"): + utils.module_purge(force=True) + utils.load_env(env) + root_dir = os.path.abspath(root_dir) + with utils.chdir(root_dir): + utils.run("mkdir -p help2man") + branch = f"master" + utils.run( + f"git clone --branch={branch} " + f"https://github.com/Distrotech/help2man.git help2man/{version}" + ) + with utils.chdir(f"help2man/{version}"): + build_dir = os.path.join(root_dir, f"help2man/{version}/build/{env}") + utils.run("CC=cc", "CXX=CC", "./configure", f"--prefix={build_dir}") + utils.run("make -j 8 install") + utils.append_to_path("PATH", f"{build_dir}/bin") + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--env", type=str, default=ENV) + parser.add_argument("--partition", type=str, default=PARTITION) + parser.add_argument("--root-dir", type=str, default=ROOT_DIR) + parser.add_argument("--version", type=str, default=VERSION) + args = parser.parse_args() + core(**args.__dict__) \ No newline at end of file diff --git a/hpc2020/generate_build_netcdf.py b/hpc2020/generate_build_netcdf.py new file mode 100755 index 0000000..19e42a6 --- /dev/null +++ b/hpc2020/generate_build_netcdf.py @@ -0,0 +1,74 @@ +#!/usr/local/apps/python3/3.11.8-01/bin/python3 +# -*- coding: utf-8 -*- +from __future__ import annotations +import argparse +import os + +import defs +import utils + + +# >>> config: start +ENV: defs.ProgrammingEnvironment = "gnu" +PARTITION: defs.Partition = "gpu" +HDF5_VERSION: str = "1.14.2" +ROOT_DIR: str = f"/home/{os.getlogin()}" +VERSION: str = "4.9.2" +# >>> config: end + + +def core( + env: defs.ProgrammingEnvironment, + partition: defs.Partition, + hdf5_version: str, + root_dir: str, + version: str, +): + with utils.batch_file(prefix="build_netcdf"): + utils.module_purge(force=True) + utils.load_env(env) + utils.module_load("gcc/11.2.0") + utils.append_to_path("LD_LIBRARY_PATH", f"/usr/local/apps/gcc/11.2.0/lib64") + utils.module_load("openmpi") + root_dir = os.path.abspath(root_dir) + hdf5_root = os.path.join(root_dir, "hdf5", hdf5_version, "build", env) + utils.export_variable("HDF5_ROOT", hdf5_root) + with utils.chdir(root_dir): + os.makedirs("netcdf-c", exist_ok=True) + branch = f"v{version}" + utils.run( + f"git clone --branch={branch} --depth=1 " + f"https://github.com/Unidata/netcdf-c.git netcdf-c/{version}" + ) + with utils.chdir(f"netcdf-c/{version}"): + utils.run("autoupdate") + utils.run("autoreconf -if") + utils.run("rm -rf build") + build_dir = os.path.join(root_dir, f"netcdf-c/{version}/build/{env}") + hdf5_include_dir = os.path.join(hdf5_root, "include") + hdf5_lib_dir = os.path.join(hdf5_root, "lib") + utils.run( + "CC=mpicc", + "CXX=mpicxx", + f"CFLAGS='-fPIC -I{hdf5_include_dir}'", + f"CPPFLAGS='-fPIC -I{hdf5_include_dir}'", + f"LDFLAGS='-fPIC -L{hdf5_lib_dir}'", + "LIBS=-ldl", + "./configure", + f"--prefix={build_dir}", + "--disable-shared", + "--enable-parallel-tests", + ) + utils.run("make -j 8 install") + utils.export_variable("NETCDF_ROOT", build_dir) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--env", type=str, default=ENV) + parser.add_argument("--partition", type=str, default=PARTITION) + parser.add_argument("--hdf5-version", type=str, default=HDF5_VERSION) + parser.add_argument("--root-dir", type=str, default=ROOT_DIR) + parser.add_argument("--version", type=str, default=VERSION) + args = parser.parse_args() + core(**args.__dict__) diff --git a/hpc2020/generate_prepare_mpi.py b/hpc2020/generate_prepare_mpi.py new file mode 100755 index 0000000..61c8aba --- /dev/null +++ b/hpc2020/generate_prepare_mpi.py @@ -0,0 +1,37 @@ +#!/usr/local/apps/python3/3.11.8-01/bin/python3 +# -*- coding: utf-8 -*- +from __future__ import annotations +import argparse + +import defs +import utils + + +# >>> config: start +PARTITION: defs.Partition = "gpu" +# >>> config: end + + +def core(partition: defs.Partition) -> str: + with utils.batch_file(prefix="prepare_mpi") as (f, fname): + # configure MPICH + # utils.export_variable("MPICH_CRAY_OPT_THREAD_SYNC", 1) + # utils.export_variable("MPICH_GNI_USE_UNASSIGNED_CPUS", "enabled") + # utils.export_variable("MPICH_MAX_THREAD_SAFETY", "multiple") + # utils.export_variable("MPICH_NEMESIS_ASYNC_PROGRESS", "MC") + # utils.export_variable("MPICH_NEMESIS_ON_NODE_ASYNC_OPT", 1) + # utils.export_variable("MPICH_OPTIMIZED_MEMCPY", 2) + utils.export_variable("MPICH_MAX_THREAD_SAFETY", "multiple") + if partition == "gpu": + utils.export_variable("MPICH_GPU_SUPPORT_ENABLED", 1) + utils.export_variable("MPICH_RDMA_ENABLED_CUDA", 1) + else: + utils.export_variable("MPICH_GPU_SUPPORT_ENABLED", 0) + return fname + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Configure MPICH.") + parser.add_argument("--partition", type=str, default=PARTITION) + args = parser.parse_args() + core(**args.__dict__) diff --git a/hpc2020/generate_prepare_pmapl.py b/hpc2020/generate_prepare_pmapl.py new file mode 100755 index 0000000..1bc4f9a --- /dev/null +++ b/hpc2020/generate_prepare_pmapl.py @@ -0,0 +1,75 @@ +#!/usr/local/apps/python3/3.11.8-01/bin/python3 +# -*- coding: utf-8 -*- +from __future__ import annotations +import argparse +import os + +import defs +import generate_prepare_mpi +import utils + + +# >>> config: start +BRANCH: str = "main" +ENV: defs.ProgrammingEnvironment = "gnu" +PARTITION: defs.Partition = "gpu" +# >>> config: end + + +def core(branch: str, env: defs.ProgrammingEnvironment, partition: defs.Partition) -> str: + with utils.batch_file(prefix="prepare_pmapl") as (f, fname): + # clear environment + utils.module_purge(force=True) + + # load relevant modules + utils.load_env(env) + utils.module_load("gcc/11.2.0", "boost", "openmpi", "cmake", "python3/3.10.10-01", "cmake") + utils.append_to_path("LD_LIBRARY_PATH", f"/usr/local/apps/gcc/11.2.0/lib64") + if partition == "gpu": + utils.module_load("nvidia/22.11") + + # set path to PMAP code + pwd = os.environ.get("SCRATCH", os.path.curdir) + pmapl_dir = os.path.join(pwd, "pmapl", branch) + assert os.path.exists(pmapl_dir) + utils.export_variable("PMAPL", pmapl_dir) + pmapl_venv_dir = os.path.join(pmapl_dir, "venv", env) + utils.export_variable("PMAPL_VENV", pmapl_venv_dir) + + # low-level GT4Py, DaCe and GHEX config + # gt_cache_root = os.path.join(pmapl_dir, "gt_cache", env) + gt_cache_root = os.path.join(pwd, "pmapl", "gt_cache", env) + utils.export_variable("GT_CACHE_ROOT", gt_cache_root) + utils.export_variable("GT_CACHE_DIR_NAME", ".gt_cache") + utils.export_variable("DACE_CONFIG", os.path.join(gt_cache_root, ".dace.conf")) + + # configure MPICH + prepare_mpi_fname = generate_prepare_mpi.core(partition) + utils.run(f". {prepare_mpi_fname}") + + # set/fix CUDA-related variables + if partition == "gpu": + utils.setup_cuda() + + # path to custom build of HDF5 and NetCDF-C + home_dir = os.environ.get("HOME", f"/home/{os.getlogin()}") + utils.export_variable("HDF5_ROOT", os.path.join(home_dir, f"hdf5/1.14.2/build/{env}")) + utils.export_variable("HDF5_DIR", os.path.join(home_dir, f"hdf5/1.14.2/build/{env}")) + utils.export_variable("NETCDF_ROOT", os.path.join(home_dir, f"netcdf-c/4.9.2/build/{env}")) + utils.export_variable("NETCDF4_DIR", os.path.join(home_dir, f"netcdf-c/4.9.2/build/{env}")) + + # jump into project source directory and activate virtual environment (if it already exists) + with utils.chdir(pmapl_dir, restore=False): + if os.path.exists(pmapl_venv_dir): + utils.run(f"source {pmapl_venv_dir}/bin/activate") + + return fname + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--branch", type=str, default=BRANCH) + parser.add_argument("--env", type=str, default=ENV) + parser.add_argument("--partition", type=str, default=PARTITION) + args = parser.parse_args() + core(**args.__dict__) diff --git a/hpc2020/utils.py b/hpc2020/utils.py new file mode 100644 index 0000000..cc12168 --- /dev/null +++ b/hpc2020/utils.py @@ -0,0 +1,131 @@ +# -*- coding: utf-8 -*- +from __future__ import annotations +import contextlib +import dataclasses +import os +import subprocess +import tempfile +import typing + +import defs + + +BATCH_DIRECTORY_REGISTRY = [] +BATCH_FILE_REGISTRY = [] + + +@contextlib.contextmanager +def batch_directory(): + os.makedirs("_tmp", exist_ok=True) + try: + if len(BATCH_DIRECTORY_REGISTRY) > 0: + final_cleanup = False + yield BATCH_DIRECTORY_REGISTRY[-1] + else: + final_cleanup = True + dirname = os.path.abspath(tempfile.mkdtemp(dir="_tmp")) + BATCH_DIRECTORY_REGISTRY.append(dirname) + os.makedirs(dirname, exist_ok=True) + print(f"py-hpc-scripts: create {dirname}") + yield dirname + finally: + if final_cleanup: + BATCH_DIRECTORY_REGISTRY.pop() + + +@contextlib.contextmanager +def batch_file(prefix: typing.Optional[str] = None): + if len(BATCH_DIRECTORY_REGISTRY) > 0: + fname = os.path.abspath(os.path.join(BATCH_DIRECTORY_REGISTRY[-1], prefix + ".sh")) + else: + # os.makedirs("_tmp", exist_ok=True) + # fname = os.path.abspath(tempfile.mktemp(prefix=prefix + "_", suffix=".sh", dir="_tmp")) + fname = os.path.abspath(prefix + ".sh") + + try: + with open(fname, "w") as f: + BATCH_FILE_REGISTRY.append(f) + f.write("#!/bin/bash -l\n\n") + yield f, fname + finally: + print(f"py-hpc-scripts: write {fname}") + BATCH_FILE_REGISTRY.pop() + + +def run(*args: str) -> None: + split_args = [item for arg in args for item in arg.split(" ")] + command = " ".join(split_args) + if len(BATCH_FILE_REGISTRY) > 0: + BATCH_FILE_REGISTRY[-1].write(command + "\n") + else: + subprocess.run(command, capture_output=False, shell=True) + + +def module_purge(force: bool = False) -> None: + run(f"module{' --force ' if force else ' '}purge") + + +def module_load(*module_names: str) -> None: + for module_name in module_names: + run(f"module load {module_name}") + + +class InvalidArgumentError(Exception): + def __init__(self, parameter: str, token: str, options: list[str]): + options = [f"`{opt}`" for opt in options] + msg = ( + f"Invalid value `{token}` for parameter `{parameter}`. " + f"Available options: {', '.join(options)}." + ) + super().__init__(msg) + + +@contextlib.contextmanager +def check_argument(parameter, token, options): + if token not in options: + raise InvalidArgumentError(parameter, token, options) + try: + yield token + finally: + pass + + + +def load_env(env: str) -> None: + with check_argument("env", env, defs.valid_programming_environments): + module_load("prgenv/gnu") + + +def export_variable(name: str, value: typing.Any) -> None: + run(f"export {name}={str(value)}") + + +def append_to_path( name: str, value: typing.Any) -> None: + run(f"{name}={str(value)}:${name}") + + +def setup_cuda(): + run("NVCC_PATH=$(which nvcc)") + run("CUDA_PATH=$(echo $NVCC_PATH | sed -e 's/\/bin\/nvcc//g')") + export_variable("CUDA_HOME", "$CUDA_PATH") + export_variable("NVHPC_CUDA_HOME", "$CUDA_PATH") + export_variable("LD_LIBRARY_PATH", "$CUDA_PATH/lib64:$LD_LIBRARY_PATH") + + +@contextlib.contextmanager +def chdir(dirname: str, restore: bool = True) -> None: + try: + run(f"pushd {dirname}") + yield None + finally: + if restore: + run("popd") + + +@dataclasses.dataclass +class ThreadsLayout: + num_nodes: int + num_tasks_per_node: int + num_threads_per_task: int + + From 57970cf7fa4c325e8a8ca48896865c7d7f25ec1d Mon Sep 17 00:00:00 2001 From: stubbiali Date: Tue, 14 May 2024 14:05:54 +0200 Subject: [PATCH 03/22] Add pre-commit config. --- .pre-commit-config.yaml | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 .pre-commit-config.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..1d6a8a7 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,31 @@ +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + - id: check-ast + - id: check-case-conflict + - id: check-json + - id: check-merge-conflict + - id: check-yaml + - id: debug-statements + - id: end-of-file-fixer + - id: fix-encoding-pragma + - id: mixed-line-ending + - id: requirements-txt-fixer + - id: trailing-whitespace +- repo: https://github.com/pre-commit/pygrep-hooks + rev: v1.10.0 + hooks: + - id: python-check-blanket-noqa + - id: python-check-blanket-type-ignore + - id: python-use-type-annotations + - id: rst-backticks +- repo: https://github.com/psf/black + rev: 24.2.0 + hooks: + - id: black +- repo: https://github.com/PyCQA/autoflake + rev: v2.2.1 + hooks: + - id: autoflake + args: [--in-place, --remove-all-unused-imports, --ignore-init-module-imports] From 8fd500fe05e23fe1aff96025cf76d06f62ddf69f Mon Sep 17 00:00:00 2001 From: stubbiali Date: Tue, 14 May 2024 14:06:06 +0200 Subject: [PATCH 04/22] Review bashrc. --- hpc2020/.bashrc | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/hpc2020/.bashrc b/hpc2020/.bashrc index ab4a921..f061062 100644 --- a/hpc2020/.bashrc +++ b/hpc2020/.bashrc @@ -95,12 +95,16 @@ alias l='ls -CF' export LC_ALL=C.UTF-8 export LANG=C.UTF-8 +# nvim +alias vim='/home/"$USER"/neovim/0.9.5/install/bin/nvim' +export VIMRUNTIME=/home/"$USER"/neovim/0.9.5/install/share/nvim/runtime + # slurm shortcuts and settings alias sb='sbatch' alias sc='scancel' alias sq='squeue -u $USER' alias sr='srun' -export SQUEUE_FORMAT="%.9i %.50j %.15u %.15q %.15T %.10M %.10l %.5D %.13f %R" +export SQUEUE_FORMAT="%.9i %.60j %.10u %.10q %.15T %.10M %.10l %.5D %.13f %R" -PATH=~/help2man/1.49.3/build/gnu/bin/:$PATH -PATH=~/autoconf/2.72/build/gnu/bin/:$PATH \ No newline at end of file +# update path with manually built software +export PATH="$HPCPERM"/autoconf/2.72/build/gnu/bin/:"$HPCPERM"/help2man/build/gnu/bin/:$PATH From 3e5e9cb30d8e86e4a572ee0ac5394f7ece3bb90f Mon Sep 17 00:00:00 2001 From: stubbiali Date: Tue, 14 May 2024 14:12:55 +0200 Subject: [PATCH 05/22] Review defs. --- hpc2020/defs.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/hpc2020/defs.py b/hpc2020/defs.py index 74ff9df..28f4e3f 100644 --- a/hpc2020/defs.py +++ b/hpc2020/defs.py @@ -1,8 +1,13 @@ # -*- coding: utf-8 -*- +import os import typing FloatingPointPrecision = typing.Literal["double", "single"] -Partition = typing.Literal["gpu"] -ProgrammingEnvironment = typing.Literal["gnu"] +MPI = typing.Literal["hpcx", "intelmpi", "openmpi"] +Partition = typing.Literal["gpu", "par"] +ProgrammingEnvironment = typing.Literal["gnu", "intel"] -valid_programming_environments = typing.get_args(ProgrammingEnvironment) \ No newline at end of file +valid_mpi_libraries = typing.get_args(MPI) +valid_programming_environments = typing.get_args(ProgrammingEnvironment) + +root_dir = os.environ.get("HPCPERM", f"/home/{os.getlogin()}") From 540dc855d415928e923274e85b5869f585947081 Mon Sep 17 00:00:00 2001 From: stubbiali Date: Tue, 14 May 2024 14:14:45 +0200 Subject: [PATCH 06/22] Review utils. --- hpc2020/utils.py | 57 +++++++++++++++++++++++++++++++++--------------- 1 file changed, 39 insertions(+), 18 deletions(-) diff --git a/hpc2020/utils.py b/hpc2020/utils.py index cc12168..8b6ce49 100644 --- a/hpc2020/utils.py +++ b/hpc2020/utils.py @@ -90,26 +90,49 @@ def check_argument(parameter, token, options): pass - def load_env(env: str) -> None: with check_argument("env", env, defs.valid_programming_environments): - module_load("prgenv/gnu") - - -def export_variable(name: str, value: typing.Any) -> None: - run(f"export {name}={str(value)}") - - -def append_to_path( name: str, value: typing.Any) -> None: - run(f"{name}={str(value)}:${name}") + if env == "gnu": + module_load("prgenv/gnu", "gcc/11.2.0") + cc, cxx, fc = "gcc", "g++", "gfortran" + export_variable( + "LD_LIBRARY_PATH", "/usr/local/apps/gcc/11.2.0/lib64", prepend_value=True + ) + else: + module_load("prgenv/intel") + cc, cxx, fc = "icc", "icpc", "ifort" + export_variable("CC", cc) + export_variable("CXX", cxx) + export_variable("FC", fc) -def setup_cuda(): - run("NVCC_PATH=$(which nvcc)") - run("CUDA_PATH=$(echo $NVCC_PATH | sed -e 's/\/bin\/nvcc//g')") - export_variable("CUDA_HOME", "$CUDA_PATH") - export_variable("NVHPC_CUDA_HOME", "$CUDA_PATH") - export_variable("LD_LIBRARY_PATH", "$CUDA_PATH/lib64:$LD_LIBRARY_PATH") +def load_mpi(env: str, mpi: str) -> None: + with check_argument("env", env, defs.valid_programming_environments): + with check_argument("mpi", mpi, defs.valid_mpi_libraries): + cc, cxx, fc = "mpicc", "mpicxx", "mpifort" + if mpi == "hpcx": + module_load("hpcx-openmpi/2.10.0") + elif mpi == "intel-mpi": + module_load("intel-mpi/2023.2.0") + if env == "intel": + cc, cxx, fc = "mpiicc", "mpiicpc", "mpiifort" + else: + cc, cxx, fc = "mpigcc", "mpigxx", "mpif90" + else: + module_load("openmpi/4.1.1.1") + export_variable("CC", cc) + export_variable("MPICC", cc) + export_variable("CXX", cxx) + export_variable("MPICXX", cxx) + export_variable("FC", fc) + export_variable("MPIFC", fc) + + +def export_variable(name: str, value: typing.Any, prepend_value: bool = False) -> None: + cmd = f"export {name}={str(value)}" + if prepend_value: + cmd += f":${name}" + run(cmd) @contextlib.contextmanager @@ -127,5 +150,3 @@ class ThreadsLayout: num_nodes: int num_tasks_per_node: int num_threads_per_task: int - - From 0acce4b835115226663f1a071a5e0be81e194fde Mon Sep 17 00:00:00 2001 From: stubbiali Date: Tue, 14 May 2024 14:15:19 +0200 Subject: [PATCH 07/22] Review generate_build_*.py scripts. --- hpc2020/generate_build_autoconf.py | 13 +++++-------- hpc2020/generate_build_hdf5.py | 24 ++++++++++-------------- hpc2020/generate_build_help2man.py | 25 ++++++++++--------------- hpc2020/generate_build_netcdf.py | 27 +++++++++------------------ 4 files changed, 34 insertions(+), 55 deletions(-) diff --git a/hpc2020/generate_build_autoconf.py b/hpc2020/generate_build_autoconf.py index 554856c..4112ebf 100644 --- a/hpc2020/generate_build_autoconf.py +++ b/hpc2020/generate_build_autoconf.py @@ -10,37 +10,34 @@ # >>> config: start ENV: defs.ProgrammingEnvironment = "gnu" -PARTITION: defs.Partition = "gpu" -ROOT_DIR: str = f"/home/{os.getlogin()}" +ROOT_DIR: str = defs.root_dir VERSION: str = "2.72" # >>> config: end -def core(env: defs.ProgrammingEnvironment, partition: defs.Partition, root_dir: str, version: str): +def core(env: defs.ProgrammingEnvironment, root_dir: str, version: str): with utils.batch_file(prefix="build_autoconf"): utils.module_purge(force=True) utils.load_env(env) root_dir = os.path.abspath(root_dir) with utils.chdir(root_dir): utils.run("mkdir -p autoconf") - branch = f"v{version}" utils.run( - f"git clone --branch={branch} " + f"git clone --branch=v{version} " f"http://git.sv.gnu.org/r/autoconf.git autoconf/{version}" ) with utils.chdir(f"autoconf/{version}"): utils.run("./bootstrap") build_dir = os.path.join(root_dir, f"autoconf/{version}/build/{env}") - utils.run("CC=cc", "CXX=CC", "./configure", f"--prefix={build_dir}") + utils.run("./configure", f"--prefix={build_dir}") utils.run("make -j 8 install") utils.export_variable("AUTOCONF_ROOT", build_dir) - utils.append_to_path("PATH", f"{build_dir}/bin") + utils.export_variable("PATH", f"{build_dir}/bin", prepend_value=True) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--env", type=str, default=ENV) - parser.add_argument("--partition", type=str, default=PARTITION) parser.add_argument("--root-dir", type=str, default=ROOT_DIR) parser.add_argument("--version", type=str, default=VERSION) args = parser.parse_args() diff --git a/hpc2020/generate_build_hdf5.py b/hpc2020/generate_build_hdf5.py index fa1f541..a00e963 100755 --- a/hpc2020/generate_build_hdf5.py +++ b/hpc2020/generate_build_hdf5.py @@ -10,25 +10,23 @@ # >>> config: start ENV: defs.ProgrammingEnvironment = "gnu" -PARTITION: defs.Partition = "gpu" -ROOT_DIR: str = f"/home/{os.getlogin()}" -VERSION: str = "1.14.2" +MPI: defs.MPI = "openmpi" +ROOT_DIR: str = defs.root_dir +VERSION: str = "1.14.4.2" # >>> config: end -def core( - env: defs.ProgrammingEnvironment, partition: defs.Partition, root_dir: str, version: str -): +def core(env: defs.ProgrammingEnvironment, mpi: defs.MPI, root_dir: str, version: str): with utils.batch_file(prefix="build_hdf5"): utils.module_purge(force=True) utils.load_env(env) - utils.module_load("gcc/11.2.0") - utils.append_to_path("LD_LIBRARY_PATH", f"/usr/local/apps/gcc/11.2.0/lib64") - utils.module_load("openmpi") + utils.load_mpi(env, mpi) root_dir = os.path.abspath(root_dir) with utils.chdir(root_dir): utils.run("mkdir -p hdf5") - branch = f"hdf5-{version.replace('.', '_')}" + branch = ( + f"hdf5-{version.replace('.', '_')}" if version < "1.14.4" else f"hdf5_{version}" + ) utils.run( f"git clone --branch={branch} --depth=1 " f"https://github.com/HDFGroup/hdf5.git hdf5/{version}" @@ -36,10 +34,8 @@ def core( with utils.chdir(f"hdf5/{version}"): utils.run("chmod +x autogen.sh") utils.run("./autogen.sh") - build_dir = os.path.join(root_dir, f"hdf5/{version}/build/{env}") + build_dir = os.path.join(root_dir, f"hdf5/{version}/build/{env}/{mpi}") utils.run( - "CC=mpicc", - "CXX=mpicxx", "CFLAGS='-fPIC'", "./configure", f"--prefix={build_dir}", @@ -56,7 +52,7 @@ def core( if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--env", type=str, default=ENV) - parser.add_argument("--partition", type=str, default=PARTITION) + parser.add_argument("--mpi", type=str, default=MPI) parser.add_argument("--root-dir", type=str, default=ROOT_DIR) parser.add_argument("--version", type=str, default=VERSION) args = parser.parse_args() diff --git a/hpc2020/generate_build_help2man.py b/hpc2020/generate_build_help2man.py index 7db2351..7465f40 100644 --- a/hpc2020/generate_build_help2man.py +++ b/hpc2020/generate_build_help2man.py @@ -9,35 +9,30 @@ # >>> config: start ENV: defs.ProgrammingEnvironment = "gnu" -PARTITION: defs.Partition = "gpu" -ROOT_DIR: str = f"/home/{os.getlogin()}" -VERSION: str = "1.49.3" -# >>> config: endi +ROOT_DIR: str = defs.root_dir +# >>> config: end -def core(env: defs.ProgrammingEnvironment, partition: defs.Partition, root_dir: str, version: str): +def core(env: defs.ProgrammingEnvironment, root_dir: str): with utils.batch_file(prefix="build_help2man"): utils.module_purge(force=True) utils.load_env(env) root_dir = os.path.abspath(root_dir) with utils.chdir(root_dir): utils.run("mkdir -p help2man") - branch = f"master" utils.run( - f"git clone --branch={branch} " - f"https://github.com/Distrotech/help2man.git help2man/{version}" + f"git clone --branch=master " f"https://github.com/Distrotech/help2man.git help2man" ) - with utils.chdir(f"help2man/{version}"): - build_dir = os.path.join(root_dir, f"help2man/{version}/build/{env}") - utils.run("CC=cc", "CXX=CC", "./configure", f"--prefix={build_dir}") + with utils.chdir(f"help2man"): + build_dir = os.path.join(root_dir, f"help2man/build/{env}") + utils.run("./configure", f"--prefix={build_dir}") utils.run("make -j 8 install") - utils.append_to_path("PATH", f"{build_dir}/bin") + utils.export_variable("PATH", f"{build_dir}/bin", prepend_value=True) + if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--env", type=str, default=ENV) - parser.add_argument("--partition", type=str, default=PARTITION) parser.add_argument("--root-dir", type=str, default=ROOT_DIR) - parser.add_argument("--version", type=str, default=VERSION) args = parser.parse_args() - core(**args.__dict__) \ No newline at end of file + core(**args.__dict__) diff --git a/hpc2020/generate_build_netcdf.py b/hpc2020/generate_build_netcdf.py index 19e42a6..82343ca 100755 --- a/hpc2020/generate_build_netcdf.py +++ b/hpc2020/generate_build_netcdf.py @@ -10,46 +10,37 @@ # >>> config: start ENV: defs.ProgrammingEnvironment = "gnu" -PARTITION: defs.Partition = "gpu" -HDF5_VERSION: str = "1.14.2" -ROOT_DIR: str = f"/home/{os.getlogin()}" +HDF5_VERSION: str = "1.14.4.2" +MPI: defs.MPI = "openmpi" +ROOT_DIR: str = defs.root_dir VERSION: str = "4.9.2" # >>> config: end def core( - env: defs.ProgrammingEnvironment, - partition: defs.Partition, - hdf5_version: str, - root_dir: str, - version: str, + env: defs.ProgrammingEnvironment, hdf5_version: str, mpi: defs.MPI, root_dir: str, version: str ): with utils.batch_file(prefix="build_netcdf"): utils.module_purge(force=True) utils.load_env(env) - utils.module_load("gcc/11.2.0") - utils.append_to_path("LD_LIBRARY_PATH", f"/usr/local/apps/gcc/11.2.0/lib64") - utils.module_load("openmpi") + utils.load_mpi(env, mpi) root_dir = os.path.abspath(root_dir) - hdf5_root = os.path.join(root_dir, "hdf5", hdf5_version, "build", env) + hdf5_root = os.path.join(root_dir, "hdf5", hdf5_version, "build", env, mpi) utils.export_variable("HDF5_ROOT", hdf5_root) with utils.chdir(root_dir): os.makedirs("netcdf-c", exist_ok=True) - branch = f"v{version}" utils.run( - f"git clone --branch={branch} --depth=1 " + f"git clone --branch=v{version} --depth=1 " f"https://github.com/Unidata/netcdf-c.git netcdf-c/{version}" ) with utils.chdir(f"netcdf-c/{version}"): utils.run("autoupdate") utils.run("autoreconf -if") utils.run("rm -rf build") - build_dir = os.path.join(root_dir, f"netcdf-c/{version}/build/{env}") + build_dir = os.path.join(root_dir, f"netcdf-c/{version}/build/{env}/{mpi}") hdf5_include_dir = os.path.join(hdf5_root, "include") hdf5_lib_dir = os.path.join(hdf5_root, "lib") utils.run( - "CC=mpicc", - "CXX=mpicxx", f"CFLAGS='-fPIC -I{hdf5_include_dir}'", f"CPPFLAGS='-fPIC -I{hdf5_include_dir}'", f"LDFLAGS='-fPIC -L{hdf5_lib_dir}'", @@ -66,8 +57,8 @@ def core( if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--env", type=str, default=ENV) - parser.add_argument("--partition", type=str, default=PARTITION) parser.add_argument("--hdf5-version", type=str, default=HDF5_VERSION) + parser.add_argument("--mpi", type=str, default=MPI) parser.add_argument("--root-dir", type=str, default=ROOT_DIR) parser.add_argument("--version", type=str, default=VERSION) args = parser.parse_args() From 956cc3eb4edccd88047d492f20e1edf5bcb31dcc Mon Sep 17 00:00:00 2001 From: stubbiali Date: Tue, 14 May 2024 14:15:34 +0200 Subject: [PATCH 08/22] Review generate_prepare_pmapl.py. --- hpc2020/generate_prepare_mpi.py | 37 ------------------- hpc2020/generate_prepare_pmapl.py | 60 ++++++++++++++++++------------- 2 files changed, 36 insertions(+), 61 deletions(-) delete mode 100755 hpc2020/generate_prepare_mpi.py diff --git a/hpc2020/generate_prepare_mpi.py b/hpc2020/generate_prepare_mpi.py deleted file mode 100755 index 61c8aba..0000000 --- a/hpc2020/generate_prepare_mpi.py +++ /dev/null @@ -1,37 +0,0 @@ -#!/usr/local/apps/python3/3.11.8-01/bin/python3 -# -*- coding: utf-8 -*- -from __future__ import annotations -import argparse - -import defs -import utils - - -# >>> config: start -PARTITION: defs.Partition = "gpu" -# >>> config: end - - -def core(partition: defs.Partition) -> str: - with utils.batch_file(prefix="prepare_mpi") as (f, fname): - # configure MPICH - # utils.export_variable("MPICH_CRAY_OPT_THREAD_SYNC", 1) - # utils.export_variable("MPICH_GNI_USE_UNASSIGNED_CPUS", "enabled") - # utils.export_variable("MPICH_MAX_THREAD_SAFETY", "multiple") - # utils.export_variable("MPICH_NEMESIS_ASYNC_PROGRESS", "MC") - # utils.export_variable("MPICH_NEMESIS_ON_NODE_ASYNC_OPT", 1) - # utils.export_variable("MPICH_OPTIMIZED_MEMCPY", 2) - utils.export_variable("MPICH_MAX_THREAD_SAFETY", "multiple") - if partition == "gpu": - utils.export_variable("MPICH_GPU_SUPPORT_ENABLED", 1) - utils.export_variable("MPICH_RDMA_ENABLED_CUDA", 1) - else: - utils.export_variable("MPICH_GPU_SUPPORT_ENABLED", 0) - return fname - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Configure MPICH.") - parser.add_argument("--partition", type=str, default=PARTITION) - args = parser.parse_args() - core(**args.__dict__) diff --git a/hpc2020/generate_prepare_pmapl.py b/hpc2020/generate_prepare_pmapl.py index 1bc4f9a..b335b00 100755 --- a/hpc2020/generate_prepare_pmapl.py +++ b/hpc2020/generate_prepare_pmapl.py @@ -5,58 +5,67 @@ import os import defs -import generate_prepare_mpi import utils # >>> config: start BRANCH: str = "main" ENV: defs.ProgrammingEnvironment = "gnu" -PARTITION: defs.Partition = "gpu" +HDF5_VERSION: str = "1.14.4.2" +MPI: defs.MPI = "openmpi" +NETCDF_VERSION: str = "4.9.2" +PARTITION: defs.Partition = "par" # >>> config: end -def core(branch: str, env: defs.ProgrammingEnvironment, partition: defs.Partition) -> str: +def core( + branch: str, + env: defs.ProgrammingEnvironment, + hdf5_version: str, + mpi: defs.MPI, + netcdf_version: str, + partition: defs.Partition, +) -> str: with utils.batch_file(prefix="prepare_pmapl") as (f, fname): # clear environment utils.module_purge(force=True) # load relevant modules utils.load_env(env) - utils.module_load("gcc/11.2.0", "boost", "openmpi", "cmake", "python3/3.10.10-01", "cmake") - utils.append_to_path("LD_LIBRARY_PATH", f"/usr/local/apps/gcc/11.2.0/lib64") + utils.load_mpi(env, mpi) + utils.module_load("boost", "cmake", "python3/3.11.8-01") if partition == "gpu": - utils.module_load("nvidia/22.11") + utils.module_load("nvidia/22.11", "cuda/11.6") + # utils.setup_cuda() # set path to PMAP code - pwd = os.environ.get("SCRATCH", os.path.curdir) - pmapl_dir = os.path.join(pwd, "pmapl", branch) + pmapl_dir = os.path.join(defs.root_dir, "pmapl", branch) assert os.path.exists(pmapl_dir) utils.export_variable("PMAPL", pmapl_dir) - pmapl_venv_dir = os.path.join(pmapl_dir, "venv", env) + pmapl_venv_dir = os.path.join(pmapl_dir, "venv", partition, env, mpi) utils.export_variable("PMAPL_VENV", pmapl_venv_dir) # low-level GT4Py, DaCe and GHEX config - # gt_cache_root = os.path.join(pmapl_dir, "gt_cache", env) - gt_cache_root = os.path.join(pwd, "pmapl", "gt_cache", env) + gt_cache_root = os.path.join(defs.root_dir, "pmapl", "gt_cache", env) utils.export_variable("GT_CACHE_ROOT", gt_cache_root) utils.export_variable("GT_CACHE_DIR_NAME", ".gt_cache") utils.export_variable("DACE_CONFIG", os.path.join(gt_cache_root, ".dace.conf")) - # configure MPICH - prepare_mpi_fname = generate_prepare_mpi.core(partition) - utils.run(f". {prepare_mpi_fname}") - - # set/fix CUDA-related variables - if partition == "gpu": - utils.setup_cuda() - # path to custom build of HDF5 and NetCDF-C - home_dir = os.environ.get("HOME", f"/home/{os.getlogin()}") - utils.export_variable("HDF5_ROOT", os.path.join(home_dir, f"hdf5/1.14.2/build/{env}")) - utils.export_variable("HDF5_DIR", os.path.join(home_dir, f"hdf5/1.14.2/build/{env}")) - utils.export_variable("NETCDF_ROOT", os.path.join(home_dir, f"netcdf-c/4.9.2/build/{env}")) - utils.export_variable("NETCDF4_DIR", os.path.join(home_dir, f"netcdf-c/4.9.2/build/{env}")) + utils.export_variable( + "HDF5_ROOT", os.path.join(defs.root_dir, f"hdf5/{hdf5_version}/build/{env}/{mpi}") + ) + utils.export_variable( + "HDF5_DIR", os.path.join(defs.root_dir, f"hdf5/{hdf5_version}/build/{env}/{mpi}") + ) + utils.export_variable( + "NETCDF_ROOT", + os.path.join(defs.root_dir, f"netcdf-c/{netcdf_version}/build/{env}/{mpi}"), + ) + utils.export_variable( + "NETCDF4_DIR", + os.path.join(defs.root_dir, f"netcdf-c/{netcdf_version}/build/{env}/{mpi}"), + ) # jump into project source directory and activate virtual environment (if it already exists) with utils.chdir(pmapl_dir, restore=False): @@ -70,6 +79,9 @@ def core(branch: str, env: defs.ProgrammingEnvironment, partition: defs.Partitio parser = argparse.ArgumentParser() parser.add_argument("--branch", type=str, default=BRANCH) parser.add_argument("--env", type=str, default=ENV) + parser.add_argument("--hdf5-version", type=str, default=HDF5_VERSION) + parser.add_argument("--mpi", type=str, default=MPI) + parser.add_argument("--netcdf-version", type=str, default=NETCDF_VERSION) parser.add_argument("--partition", type=str, default=PARTITION) args = parser.parse_args() core(**args.__dict__) From b323c13caa9aab43df01f908d6287500897db37f Mon Sep 17 00:00:00 2001 From: stubbiali Date: Tue, 14 May 2024 14:15:43 +0200 Subject: [PATCH 09/22] Add salloc.py. --- hpc2020/salloc.py | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100755 hpc2020/salloc.py diff --git a/hpc2020/salloc.py b/hpc2020/salloc.py new file mode 100755 index 0000000..e9161df --- /dev/null +++ b/hpc2020/salloc.py @@ -0,0 +1,38 @@ +#!/usr/local/apps/python3/3.11.8-01/bin/python3 +# -*- coding: utf-8 -*- +import argparse +import os + +import defs +import utils + + +# >>> config: start + +ACCOUNT: str = os.environ.get("ECACCOUNT", "") +NUM_NODES: int = 1 +PARTITION: defs.Partition = "gpu" +TIME: str = "01:00:00" +# >>> config: end + + +def core(account: int, num_nodes: int, partition: defs.Partition, time: str) -> None: + command = [ + f"salloc", + f"--account={account}", + f"--nodes={num_nodes}", + f"--partition={partition}", + f"--time={time}", + ] + command += ["--qos=ng", "--gpus=1"] if partition == "gpu" else ["--qos=np"] + utils.run(*command) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Get an allocation on the compute nodes.") + parser.add_argument("--account", type=str, default=ACCOUNT) + parser.add_argument("--num-nodes", type=int, default=NUM_NODES) + parser.add_argument("--partition", type=str, default=PARTITION) + parser.add_argument("--time", type=str, default=TIME) + args = parser.parse_args() + core(**args.__dict__) From 0cdc1b67c1bfac24c4387f00250e22e20e75c073 Mon Sep 17 00:00:00 2001 From: stubbiali Date: Tue, 14 May 2024 14:16:00 +0200 Subject: [PATCH 10/22] Add requirements file. --- requirements.txt | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..251d2d2 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +black==24.4.0 +pre-commit From 3941aea0cd8b2a8cf7d1d8611af976d3d021ad4a Mon Sep 17 00:00:00 2001 From: stubbiali Date: Tue, 14 May 2024 22:25:31 +0200 Subject: [PATCH 11/22] Differentiate openmpi version based on partition. --- hpc2020/defs.py | 1 + hpc2020/generate_build_hdf5.py | 12 ++++++++-- hpc2020/generate_build_netcdf.py | 11 +++++++-- hpc2020/generate_prepare_pmapl.py | 4 ++-- hpc2020/utils.py | 38 +++++++++++++++++-------------- 5 files changed, 43 insertions(+), 23 deletions(-) diff --git a/hpc2020/defs.py b/hpc2020/defs.py index 28f4e3f..d993e4a 100644 --- a/hpc2020/defs.py +++ b/hpc2020/defs.py @@ -8,6 +8,7 @@ ProgrammingEnvironment = typing.Literal["gnu", "intel"] valid_mpi_libraries = typing.get_args(MPI) +valid_partitions = typing.get_args(Partition) valid_programming_environments = typing.get_args(ProgrammingEnvironment) root_dir = os.environ.get("HPCPERM", f"/home/{os.getlogin()}") diff --git a/hpc2020/generate_build_hdf5.py b/hpc2020/generate_build_hdf5.py index a00e963..e058e87 100755 --- a/hpc2020/generate_build_hdf5.py +++ b/hpc2020/generate_build_hdf5.py @@ -11,16 +11,23 @@ # >>> config: start ENV: defs.ProgrammingEnvironment = "gnu" MPI: defs.MPI = "openmpi" +PARTITION: defs.Partition = "gpu" ROOT_DIR: str = defs.root_dir VERSION: str = "1.14.4.2" # >>> config: end -def core(env: defs.ProgrammingEnvironment, mpi: defs.MPI, root_dir: str, version: str): +def core( + env: defs.ProgrammingEnvironment, + mpi: defs.MPI, + partition: defs.Partition, + root_dir: str, + version: str, +): with utils.batch_file(prefix="build_hdf5"): utils.module_purge(force=True) utils.load_env(env) - utils.load_mpi(env, mpi) + utils.load_mpi(env, mpi, partition) root_dir = os.path.abspath(root_dir) with utils.chdir(root_dir): utils.run("mkdir -p hdf5") @@ -53,6 +60,7 @@ def core(env: defs.ProgrammingEnvironment, mpi: defs.MPI, root_dir: str, version parser = argparse.ArgumentParser() parser.add_argument("--env", type=str, default=ENV) parser.add_argument("--mpi", type=str, default=MPI) + parser.add_argument("--partition", type=str, default=PARTITION) parser.add_argument("--root-dir", type=str, default=ROOT_DIR) parser.add_argument("--version", type=str, default=VERSION) args = parser.parse_args() diff --git a/hpc2020/generate_build_netcdf.py b/hpc2020/generate_build_netcdf.py index 82343ca..8262f57 100755 --- a/hpc2020/generate_build_netcdf.py +++ b/hpc2020/generate_build_netcdf.py @@ -12,18 +12,24 @@ ENV: defs.ProgrammingEnvironment = "gnu" HDF5_VERSION: str = "1.14.4.2" MPI: defs.MPI = "openmpi" +PARTITION: defs.Partition = "gpu" ROOT_DIR: str = defs.root_dir VERSION: str = "4.9.2" # >>> config: end def core( - env: defs.ProgrammingEnvironment, hdf5_version: str, mpi: defs.MPI, root_dir: str, version: str + env: defs.ProgrammingEnvironment, + hdf5_version: str, + mpi: defs.MPI, + partition: defs.Partition, + root_dir: str, + version: str, ): with utils.batch_file(prefix="build_netcdf"): utils.module_purge(force=True) utils.load_env(env) - utils.load_mpi(env, mpi) + utils.load_mpi(env, mpi, partition) root_dir = os.path.abspath(root_dir) hdf5_root = os.path.join(root_dir, "hdf5", hdf5_version, "build", env, mpi) utils.export_variable("HDF5_ROOT", hdf5_root) @@ -59,6 +65,7 @@ def core( parser.add_argument("--env", type=str, default=ENV) parser.add_argument("--hdf5-version", type=str, default=HDF5_VERSION) parser.add_argument("--mpi", type=str, default=MPI) + parser.add_argument("--partition", type=str, default=PARTITION) parser.add_argument("--root-dir", type=str, default=ROOT_DIR) parser.add_argument("--version", type=str, default=VERSION) args = parser.parse_args() diff --git a/hpc2020/generate_prepare_pmapl.py b/hpc2020/generate_prepare_pmapl.py index b335b00..d33d924 100755 --- a/hpc2020/generate_prepare_pmapl.py +++ b/hpc2020/generate_prepare_pmapl.py @@ -14,7 +14,7 @@ HDF5_VERSION: str = "1.14.4.2" MPI: defs.MPI = "openmpi" NETCDF_VERSION: str = "4.9.2" -PARTITION: defs.Partition = "par" +PARTITION: defs.Partition = "gpu" # >>> config: end @@ -32,7 +32,7 @@ def core( # load relevant modules utils.load_env(env) - utils.load_mpi(env, mpi) + utils.load_mpi(env, mpi, partition) utils.module_load("boost", "cmake", "python3/3.11.8-01") if partition == "gpu": utils.module_load("nvidia/22.11", "cuda/11.6") diff --git a/hpc2020/utils.py b/hpc2020/utils.py index 8b6ce49..5e184f6 100644 --- a/hpc2020/utils.py +++ b/hpc2020/utils.py @@ -106,26 +106,30 @@ def load_env(env: str) -> None: export_variable("FC", fc) -def load_mpi(env: str, mpi: str) -> None: +def load_mpi(env: str, mpi: str, partition: str) -> None: with check_argument("env", env, defs.valid_programming_environments): with check_argument("mpi", mpi, defs.valid_mpi_libraries): - cc, cxx, fc = "mpicc", "mpicxx", "mpifort" - if mpi == "hpcx": - module_load("hpcx-openmpi/2.10.0") - elif mpi == "intel-mpi": - module_load("intel-mpi/2023.2.0") - if env == "intel": - cc, cxx, fc = "mpiicc", "mpiicpc", "mpiifort" + with check_argument("partition", partition, defs.valid_partitions): + cc, cxx, fc = "mpicc", "mpicxx", "mpifort" + if mpi == "hpcx": + module_load("hpcx-openmpi/2.10.0") + elif mpi == "intel-mpi": + module_load("intel-mpi/2023.2.0") + if env == "intel": + cc, cxx, fc = "mpiicc", "mpiicpc", "mpiifort" + else: + cc, cxx, fc = "mpigcc", "mpigxx", "mpif90" else: - cc, cxx, fc = "mpigcc", "mpigxx", "mpif90" - else: - module_load("openmpi/4.1.1.1") - export_variable("CC", cc) - export_variable("MPICC", cc) - export_variable("CXX", cxx) - export_variable("MPICXX", cxx) - export_variable("FC", fc) - export_variable("MPIFC", fc) + if partition == "gpu": + module_load("openmpi/4.1.5.4") + else: + module_load("openmpi/4.1.1.1") + export_variable("CC", cc) + export_variable("MPICC", cc) + export_variable("CXX", cxx) + export_variable("MPICXX", cxx) + export_variable("FC", fc) + export_variable("MPIFC", fc) def export_variable(name: str, value: typing.Any, prepend_value: bool = False) -> None: From 37c3595879142ee1a08155c244e69f34fa48f142 Mon Sep 17 00:00:00 2001 From: stubbiali Date: Tue, 14 May 2024 22:25:55 +0200 Subject: [PATCH 12/22] Fix export PATH in bashrc. --- daint/.bashrc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/daint/.bashrc b/daint/.bashrc index 3ee3efe..873f3d6 100644 --- a/daint/.bashrc +++ b/daint/.bashrc @@ -141,8 +141,7 @@ export SQUEUE_FORMAT="%.9i %.50j %.15u %.5q %.15T %.10M %.10l %.5D %.13f %R" # hdf5 and netcdf export HDF5_ROOT=/users/"$USER"/hdf5/1.14.2/build/gnu export NETCDF_ROOT=/users/"$USER"/netcdf-c/4.9.2/build/gnu -PATH=~/help2man/1.49.3/build/gnu/bin/:$PATH -PATH=~/autoconf/2.72/build/gnu/bin/:$PATH +export PATH="$HOME"/autoconf/2.72/build/gnu/bin/:"$HOME"/help2man/1.49.3/build/gnu/bin/:$PATH # get node id of a salloc function get_node_id() { From 20cfa764e87c87462e03ce9a8b9a4d61fad0ab2a Mon Sep 17 00:00:00 2001 From: stubbiali Date: Wed, 15 May 2024 10:48:40 +0200 Subject: [PATCH 13/22] Add update_path utility. --- hpc2020/utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/hpc2020/utils.py b/hpc2020/utils.py index 5e184f6..d855294 100644 --- a/hpc2020/utils.py +++ b/hpc2020/utils.py @@ -139,6 +139,10 @@ def export_variable(name: str, value: typing.Any, prepend_value: bool = False) - run(cmd) +def update_path(value: str): + export_variable("PATH", value, prepend_value=True) + + @contextlib.contextmanager def chdir(dirname: str, restore: bool = True) -> None: try: From fb62f6371e755b4553aac7c3ab4f9e7384b0b932 Mon Sep 17 00:00:00 2001 From: stubbiali Date: Wed, 15 May 2024 10:49:07 +0200 Subject: [PATCH 14/22] Add setup functions for better code reutilization. --- hpc2020/generate_build_autoconf.py | 56 +++++++++++++----- hpc2020/generate_build_hdf5.py | 91 +++++++++++++++++++++--------- hpc2020/generate_build_help2man.py | 42 ++++++++++---- hpc2020/generate_build_netcdf.py | 65 ++++++++++++++++++--- hpc2020/generate_prepare_pmapl.py | 56 +++++++++++------- 5 files changed, 230 insertions(+), 80 deletions(-) diff --git a/hpc2020/generate_build_autoconf.py b/hpc2020/generate_build_autoconf.py index 4112ebf..4481eec 100644 --- a/hpc2020/generate_build_autoconf.py +++ b/hpc2020/generate_build_autoconf.py @@ -5,39 +5,67 @@ import os import defs +import generate_build_help2man import utils # >>> config: start ENV: defs.ProgrammingEnvironment = "gnu" +COMPILER_VERSION: str = "13.2.0" ROOT_DIR: str = defs.root_dir VERSION: str = "2.72" # >>> config: end -def core(env: defs.ProgrammingEnvironment, root_dir: str, version: str): +def _setup(build_dir: str) -> None: + utils.export_variable("AUTOCONF_ROOT", build_dir) + utils.update_path(f"{build_dir}/bin") + + +def core( + env: defs.ProgrammingEnvironment, + compiler_version: str, + root_dir: str, + version: str, + _build: bool = True, +) -> str: with utils.batch_file(prefix="build_autoconf"): utils.module_purge(force=True) utils.load_env(env) + env_id = utils.load_compiler(env, compiler_version) root_dir = os.path.abspath(root_dir) - with utils.chdir(root_dir): - utils.run("mkdir -p autoconf") - utils.run( - f"git clone --branch=v{version} " - f"http://git.sv.gnu.org/r/autoconf.git autoconf/{version}" - ) - with utils.chdir(f"autoconf/{version}"): - utils.run("./bootstrap") - build_dir = os.path.join(root_dir, f"autoconf/{version}/build/{env}") - utils.run("./configure", f"--prefix={build_dir}") - utils.run("make -j 8 install") - utils.export_variable("AUTOCONF_ROOT", build_dir) - utils.export_variable("PATH", f"{build_dir}/bin", prepend_value=True) + build_dir = os.path.join(root_dir, f"autoconf/{version}/build/{env_id}") + + generate_build_help2man.setup(env, compiler_version, root_dir) + + if _build: + with utils.chdir(root_dir): + utils.run("mkdir -p autoconf") + utils.run( + f"git clone --branch=v{version} " + f"http://git.sv.gnu.org/r/autoconf.git autoconf/{version}" + ) + with utils.chdir(f"autoconf/{version}"): + utils.run("./bootstrap") + utils.run("./configure", f"--prefix={build_dir}") + utils.run("make -j 8 install") + + _setup(build_dir) + + return build_dir + + +def setup( + env: defs.ProgrammingEnvironment, compiler_version: str, root_dir: str, version: str +) -> None: + build_dir = core(env, compiler_version, root_dir, version, _build=False) + _setup(build_dir) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--env", type=str, default=ENV) + parser.add_argument("--compiler-version", type=str, default=COMPILER_VERSION) parser.add_argument("--root-dir", type=str, default=ROOT_DIR) parser.add_argument("--version", type=str, default=VERSION) args = parser.parse_args() diff --git a/hpc2020/generate_build_hdf5.py b/hpc2020/generate_build_hdf5.py index e058e87..c5cb30a 100755 --- a/hpc2020/generate_build_hdf5.py +++ b/hpc2020/generate_build_hdf5.py @@ -5,60 +5,99 @@ import os import defs +import generate_build_autoconf +import generate_build_help2man import utils # >>> config: start +AUTOCONF_VERSION: str = "2.72" ENV: defs.ProgrammingEnvironment = "gnu" -MPI: defs.MPI = "openmpi" +COMPILER_VERSION: str = "13.2.0" +MPI: defs.MPI = "hpcx" PARTITION: defs.Partition = "gpu" ROOT_DIR: str = defs.root_dir VERSION: str = "1.14.4.2" # >>> config: end +def _setup(build_dir: str) -> None: + utils.export_variable("HDF5_DIR", build_dir) + utils.export_variable("HDF5_ROOT", build_dir) + + def core( + autoconf_version: str, env: defs.ProgrammingEnvironment, + compiler_version: str, mpi: defs.MPI, partition: defs.Partition, root_dir: str, version: str, -): + _build: bool = True, +) -> str: with utils.batch_file(prefix="build_hdf5"): utils.module_purge(force=True) utils.load_env(env) - utils.load_mpi(env, mpi, partition) + env_id = utils.load_compiler(env, compiler_version) + mpi_id = utils.load_mpi(mpi, env, compiler_version, partition) root_dir = os.path.abspath(root_dir) - with utils.chdir(root_dir): - utils.run("mkdir -p hdf5") - branch = ( - f"hdf5-{version.replace('.', '_')}" if version < "1.14.4" else f"hdf5_{version}" - ) - utils.run( - f"git clone --branch={branch} --depth=1 " - f"https://github.com/HDFGroup/hdf5.git hdf5/{version}" - ) - with utils.chdir(f"hdf5/{version}"): - utils.run("chmod +x autogen.sh") - utils.run("./autogen.sh") - build_dir = os.path.join(root_dir, f"hdf5/{version}/build/{env}/{mpi}") + build_dir = os.path.join(root_dir, f"hdf5/{version}/build/{env_id}/{mpi_id}") + + generate_build_help2man.setup(env, compiler_version, root_dir) + generate_build_autoconf.setup(env, compiler_version, root_dir, autoconf_version) + + if _build: + with utils.chdir(root_dir): + utils.run("mkdir -p hdf5") + branch = ( + f"hdf5-{version.replace('.', '_')}" if version < "1.14.4" else f"hdf5_{version}" + ) utils.run( - "CFLAGS='-fPIC'", - "./configure", - f"--prefix={build_dir}", - "--enable-build-mode=production", - "--enable-parallel", - "--enable-shared=no", - "--enable-tests", - "--enable-tools", + f"git clone --branch={branch} --depth=1 " + f"https://github.com/HDFGroup/hdf5.git hdf5/{version}" ) - utils.run("make -j 8 install") - utils.export_variable("HDF5_ROOT", build_dir) + with utils.chdir(f"hdf5/{version}"): + utils.run("chmod +x autogen.sh") + utils.run("./autogen.sh") + utils.run( + "CFLAGS='-fPIC'", + "./configure", + f"--prefix={build_dir}", + "--enable-build-mode=production", + "--enable-parallel", + "--enable-shared=no", + "--enable-tests", + "--enable-tools", + ) + utils.run("make -j 8 install") + + _setup(build_dir) + + return build_dir + + +def setup( + autoconf_version: str, + env: defs.ProgrammingEnvironment, + compiler_version: str, + mpi: defs.MPI, + partition: defs.Partition, + root_dir: str, + version: str, +) -> str: + build_dir = core( + autoconf_version, env, compiler_version, mpi, partition, root_dir, version, _build=False + ) + _setup(build_dir) + return build_dir if __name__ == "__main__": parser = argparse.ArgumentParser() + parser.add_argument("--autoconf-version", type=str, default=AUTOCONF_VERSION) parser.add_argument("--env", type=str, default=ENV) + parser.add_argument("--compiler-version", type=str, default=COMPILER_VERSION) parser.add_argument("--mpi", type=str, default=MPI) parser.add_argument("--partition", type=str, default=PARTITION) parser.add_argument("--root-dir", type=str, default=ROOT_DIR) diff --git a/hpc2020/generate_build_help2man.py b/hpc2020/generate_build_help2man.py index 7465f40..6a1541d 100644 --- a/hpc2020/generate_build_help2man.py +++ b/hpc2020/generate_build_help2man.py @@ -9,30 +9,50 @@ # >>> config: start ENV: defs.ProgrammingEnvironment = "gnu" +COMPILER_VERSION: str = "13.2.0" ROOT_DIR: str = defs.root_dir # >>> config: end -def core(env: defs.ProgrammingEnvironment, root_dir: str): +def _setup(build_dir: str) -> None: + utils.update_path(f"{build_dir}/bin") + + +def core( + env: defs.ProgrammingEnvironment, compiler_version: str, root_dir: str, _build: bool = True +) -> str: with utils.batch_file(prefix="build_help2man"): utils.module_purge(force=True) utils.load_env(env) + env_id = utils.load_compiler(env, compiler_version) root_dir = os.path.abspath(root_dir) - with utils.chdir(root_dir): - utils.run("mkdir -p help2man") - utils.run( - f"git clone --branch=master " f"https://github.com/Distrotech/help2man.git help2man" - ) - with utils.chdir(f"help2man"): - build_dir = os.path.join(root_dir, f"help2man/build/{env}") - utils.run("./configure", f"--prefix={build_dir}") - utils.run("make -j 8 install") - utils.export_variable("PATH", f"{build_dir}/bin", prepend_value=True) + build_dir = os.path.join(root_dir, f"help2man/build/{env_id}") + + if _build: + with utils.chdir(root_dir): + utils.run("mkdir -p help2man") + utils.run( + f"git clone --branch=master " + f"https://github.com/Distrotech/help2man.git help2man" + ) + with utils.chdir(f"help2man"): + utils.run("./configure", f"--prefix={build_dir}") + utils.run("make -j 8 install") + + _setup(build_dir) + + return build_dir + + +def setup(env: defs.ProgrammingEnvironment, compiler_version: str, root_dir: str) -> None: + build_dir = core(env, compiler_version, root_dir, _build=False) + _setup(build_dir) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--env", type=str, default=ENV) + parser.add_argument("--compiler-version", type=str, default=COMPILER_VERSION) parser.add_argument("--root-dir", type=str, default=ROOT_DIR) args = parser.parse_args() core(**args.__dict__) diff --git a/hpc2020/generate_build_netcdf.py b/hpc2020/generate_build_netcdf.py index 8262f57..5ddf924 100755 --- a/hpc2020/generate_build_netcdf.py +++ b/hpc2020/generate_build_netcdf.py @@ -5,34 +5,54 @@ import os import defs +import generate_build_autoconf +import generate_build_hdf5 +import generate_build_help2man import utils # >>> config: start +AUTOCONF_VERSION: str = "2.72" ENV: defs.ProgrammingEnvironment = "gnu" +COMPILER_VERSION: str = "13.2.0" HDF5_VERSION: str = "1.14.4.2" -MPI: defs.MPI = "openmpi" +MPI: defs.MPI = "hpcx" PARTITION: defs.Partition = "gpu" ROOT_DIR: str = defs.root_dir VERSION: str = "4.9.2" # >>> config: end +def _setup(build_dir: str) -> None: + utils.export_variable("NETCDF_DIR", build_dir) + utils.export_variable("NETCDF_ROOT", build_dir) + + def core( + autoconf_version: str, env: defs.ProgrammingEnvironment, + compiler_version: str, hdf5_version: str, mpi: defs.MPI, partition: defs.Partition, root_dir: str, version: str, + _build: bool = True, ): with utils.batch_file(prefix="build_netcdf"): utils.module_purge(force=True) utils.load_env(env) - utils.load_mpi(env, mpi, partition) + env_id = utils.load_compiler(env, compiler_version) + mpi_id = utils.load_mpi(mpi, env, compiler_version, partition) root_dir = os.path.abspath(root_dir) - hdf5_root = os.path.join(root_dir, "hdf5", hdf5_version, "build", env, mpi) - utils.export_variable("HDF5_ROOT", hdf5_root) + build_dir = os.path.join(root_dir, f"netcdf-c/{version}/build/{env_id}/{mpi_id}") + + generate_build_help2man.setup(env, compiler_version, root_dir) + generate_build_autoconf.setup(env, compiler_version, root_dir, autoconf_version) + hdf5_build_dir = generate_build_hdf5.setup( + autoconf_version, env, compiler_version, mpi, partition, root_dir, hdf5_version + ) + with utils.chdir(root_dir): os.makedirs("netcdf-c", exist_ok=True) utils.run( @@ -43,9 +63,8 @@ def core( utils.run("autoupdate") utils.run("autoreconf -if") utils.run("rm -rf build") - build_dir = os.path.join(root_dir, f"netcdf-c/{version}/build/{env}/{mpi}") - hdf5_include_dir = os.path.join(hdf5_root, "include") - hdf5_lib_dir = os.path.join(hdf5_root, "lib") + hdf5_include_dir = os.path.join(hdf5_build_dir, "include") + hdf5_lib_dir = os.path.join(hdf5_build_dir, "lib") utils.run( f"CFLAGS='-fPIC -I{hdf5_include_dir}'", f"CPPFLAGS='-fPIC -I{hdf5_include_dir}'", @@ -57,12 +76,42 @@ def core( "--enable-parallel-tests", ) utils.run("make -j 8 install") - utils.export_variable("NETCDF_ROOT", build_dir) + + _setup(build_dir) + + return build_dir + + +def setup( + autoconf_version: str, + env: defs.ProgrammingEnvironment, + compiler_version: str, + hdf5_version: str, + mpi: defs.MPI, + partition: defs.Partition, + root_dir: str, + version: str, +) -> str: + build_dir = core( + autoconf_version, + env, + compiler_version, + hdf5_version, + mpi, + partition, + root_dir, + version, + _build=False, + ) + _setup(build_dir) + return build_dir if __name__ == "__main__": parser = argparse.ArgumentParser() + parser.add_argument("--autoconf-version", type=str, default=AUTOCONF_VERSION) parser.add_argument("--env", type=str, default=ENV) + parser.add_argument("--compiler-version", type=str, default=COMPILER_VERSION) parser.add_argument("--hdf5-version", type=str, default=HDF5_VERSION) parser.add_argument("--mpi", type=str, default=MPI) parser.add_argument("--partition", type=str, default=PARTITION) diff --git a/hpc2020/generate_prepare_pmapl.py b/hpc2020/generate_prepare_pmapl.py index d33d924..06c4d8c 100755 --- a/hpc2020/generate_prepare_pmapl.py +++ b/hpc2020/generate_prepare_pmapl.py @@ -5,26 +5,36 @@ import os import defs +import generate_build_autoconf +import generate_build_hdf5 +import generate_build_help2man +import generate_build_netcdf import utils # >>> config: start +AUTOCONF_VERSION: str = "2.72" BRANCH: str = "main" ENV: defs.ProgrammingEnvironment = "gnu" +COMPILER_VERSION: str = "13.2.0" HDF5_VERSION: str = "1.14.4.2" -MPI: defs.MPI = "openmpi" +MPI: defs.MPI = "hpcx" NETCDF_VERSION: str = "4.9.2" PARTITION: defs.Partition = "gpu" +ROOT_DIR: str = defs.root_dir # >>> config: end def core( + autoconf_version: str, branch: str, env: defs.ProgrammingEnvironment, + compiler_version: str, hdf5_version: str, mpi: defs.MPI, netcdf_version: str, partition: defs.Partition, + root_dir: str, ) -> str: with utils.batch_file(prefix="prepare_pmapl") as (f, fname): # clear environment @@ -32,41 +42,42 @@ def core( # load relevant modules utils.load_env(env) - utils.load_mpi(env, mpi, partition) + env_id = utils.load_compiler(env, compiler_version) + mpi_id = utils.load_mpi(mpi, env, compiler_version, partition) utils.module_load("boost", "cmake", "python3/3.11.8-01") if partition == "gpu": utils.module_load("nvidia/22.11", "cuda/11.6") - # utils.setup_cuda() + + # set path to custom build of external dependencies + generate_build_help2man.setup(env, compiler_version, root_dir) + generate_build_autoconf.setup(env, compiler_version, root_dir, autoconf_version) + generate_build_hdf5.setup( + autoconf_version, env, compiler_version, mpi, partition, root_dir, hdf5_version + ) + generate_build_netcdf.setup( + autoconf_version, + env, + compiler_version, + hdf5_version, + mpi, + partition, + root_dir, + netcdf_version, + ) # set path to PMAP code pmapl_dir = os.path.join(defs.root_dir, "pmapl", branch) assert os.path.exists(pmapl_dir) utils.export_variable("PMAPL", pmapl_dir) - pmapl_venv_dir = os.path.join(pmapl_dir, "venv", partition, env, mpi) + pmapl_venv_dir = os.path.join(pmapl_dir, "venv", partition, env_id, mpi_id) utils.export_variable("PMAPL_VENV", pmapl_venv_dir) # low-level GT4Py, DaCe and GHEX config - gt_cache_root = os.path.join(defs.root_dir, "pmapl", "gt_cache", env) + gt_cache_root = os.path.join(defs.root_dir, "pmapl", "gt_cache", env_id) utils.export_variable("GT_CACHE_ROOT", gt_cache_root) utils.export_variable("GT_CACHE_DIR_NAME", ".gt_cache") utils.export_variable("DACE_CONFIG", os.path.join(gt_cache_root, ".dace.conf")) - # path to custom build of HDF5 and NetCDF-C - utils.export_variable( - "HDF5_ROOT", os.path.join(defs.root_dir, f"hdf5/{hdf5_version}/build/{env}/{mpi}") - ) - utils.export_variable( - "HDF5_DIR", os.path.join(defs.root_dir, f"hdf5/{hdf5_version}/build/{env}/{mpi}") - ) - utils.export_variable( - "NETCDF_ROOT", - os.path.join(defs.root_dir, f"netcdf-c/{netcdf_version}/build/{env}/{mpi}"), - ) - utils.export_variable( - "NETCDF4_DIR", - os.path.join(defs.root_dir, f"netcdf-c/{netcdf_version}/build/{env}/{mpi}"), - ) - # jump into project source directory and activate virtual environment (if it already exists) with utils.chdir(pmapl_dir, restore=False): if os.path.exists(pmapl_venv_dir): @@ -77,11 +88,14 @@ def core( if __name__ == "__main__": parser = argparse.ArgumentParser() + parser.add_argument("--autoconf-version", type=str, default=AUTOCONF_VERSION) parser.add_argument("--branch", type=str, default=BRANCH) parser.add_argument("--env", type=str, default=ENV) + parser.add_argument("--compiler-version", type=str, default=COMPILER_VERSION) parser.add_argument("--hdf5-version", type=str, default=HDF5_VERSION) parser.add_argument("--mpi", type=str, default=MPI) parser.add_argument("--netcdf-version", type=str, default=NETCDF_VERSION) parser.add_argument("--partition", type=str, default=PARTITION) + parser.add_argument("--root-dir", type=str, default=ROOT_DIR) args = parser.parse_args() core(**args.__dict__) From 8a262d2e54a6ab3f0581502961092c0fafc7da29 Mon Sep 17 00:00:00 2001 From: stubbiali Date: Wed, 15 May 2024 10:49:26 +0200 Subject: [PATCH 15/22] Add load_compiler utility. --- hpc2020/utils.py | 54 +++++++++++++++++++++++++++++------------------- 1 file changed, 33 insertions(+), 21 deletions(-) diff --git a/hpc2020/utils.py b/hpc2020/utils.py index d855294..717e8af 100644 --- a/hpc2020/utils.py +++ b/hpc2020/utils.py @@ -91,45 +91,57 @@ def check_argument(parameter, token, options): def load_env(env: str) -> None: + with check_argument("env", env, defs.valid_programming_environments): + module_load(f"prgenv/{env}") + + +def load_compiler(env: str, compiler_version: str) -> str: with check_argument("env", env, defs.valid_programming_environments): if env == "gnu": - module_load("prgenv/gnu", "gcc/11.2.0") + module_load(f"gcc/{compiler_version}") cc, cxx, fc = "gcc", "g++", "gfortran" + env_id = f"gnu-{compiler_version}" export_variable( - "LD_LIBRARY_PATH", "/usr/local/apps/gcc/11.2.0/lib64", prepend_value=True + "LD_LIBRARY_PATH", + f"/usr/local/apps/gcc/{compiler_version}/lib64", + prepend_value=True, ) else: - module_load("prgenv/intel") + module_load(f"intel/{compiler_version}") cc, cxx, fc = "icc", "icpc", "ifort" - export_variable("CC", cc) - export_variable("CXX", cxx) - export_variable("FC", fc) + env_id = f"intel-{compiler_version}" + export_variable("CC", cc) + export_variable("CXX", cxx) + export_variable("FC", fc) + return env_id -def load_mpi(env: str, mpi: str, partition: str) -> None: - with check_argument("env", env, defs.valid_programming_environments): - with check_argument("mpi", mpi, defs.valid_mpi_libraries): +def load_mpi(mpi: str, env: str, compiler_version: str, partition: str) -> str: + with check_argument("mpi", mpi, defs.valid_mpi_libraries): + with check_argument("env", env, defs.valid_programming_environments): with check_argument("partition", partition, defs.valid_partitions): cc, cxx, fc = "mpicc", "mpicxx", "mpifort" if mpi == "hpcx": - module_load("hpcx-openmpi/2.10.0") + if env == "gnu" and compiler_version == "13.2.0": + module_name = "hpcx-openmpi/2.17.1" + else: + module_name = "hpcx-openmpi/2.10.0" elif mpi == "intel-mpi": - module_load("intel-mpi/2023.2.0") + module_name = "intel-mpi/2023.2.0" if env == "intel": cc, cxx, fc = "mpiicc", "mpiicpc", "mpiifort" else: cc, cxx, fc = "mpigcc", "mpigxx", "mpif90" else: - if partition == "gpu": - module_load("openmpi/4.1.5.4") - else: - module_load("openmpi/4.1.1.1") - export_variable("CC", cc) - export_variable("MPICC", cc) - export_variable("CXX", cxx) - export_variable("MPICXX", cxx) - export_variable("FC", fc) - export_variable("MPIFC", fc) + module_name = "openmpi/4.1.5.4" if partition == "gpu" else "openmpi/4.1.1.1" + module_load(module_name) + export_variable("CC", cc) + export_variable("MPICC", cc) + export_variable("CXX", cxx) + export_variable("MPICXX", cxx) + export_variable("FC", fc) + export_variable("MPIFC", fc) + return module_name.replace("/", "-") def export_variable(name: str, value: typing.Any, prepend_value: bool = False) -> None: From d51c10328f26dbc5d24439eceb91c329a74e89a9 Mon Sep 17 00:00:00 2001 From: stubbiali Date: Wed, 15 May 2024 13:34:14 +0200 Subject: [PATCH 16/22] Roll back to gcc/11.2.0. --- hpc2020/generate_build_autoconf.py | 2 +- hpc2020/generate_build_hdf5.py | 2 +- hpc2020/generate_build_help2man.py | 2 +- hpc2020/generate_build_netcdf.py | 2 +- hpc2020/generate_prepare_pmapl.py | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/hpc2020/generate_build_autoconf.py b/hpc2020/generate_build_autoconf.py index 4481eec..f3ebca6 100644 --- a/hpc2020/generate_build_autoconf.py +++ b/hpc2020/generate_build_autoconf.py @@ -11,7 +11,7 @@ # >>> config: start ENV: defs.ProgrammingEnvironment = "gnu" -COMPILER_VERSION: str = "13.2.0" +COMPILER_VERSION: str = "11.2.0" ROOT_DIR: str = defs.root_dir VERSION: str = "2.72" # >>> config: end diff --git a/hpc2020/generate_build_hdf5.py b/hpc2020/generate_build_hdf5.py index c5cb30a..e669b30 100755 --- a/hpc2020/generate_build_hdf5.py +++ b/hpc2020/generate_build_hdf5.py @@ -13,7 +13,7 @@ # >>> config: start AUTOCONF_VERSION: str = "2.72" ENV: defs.ProgrammingEnvironment = "gnu" -COMPILER_VERSION: str = "13.2.0" +COMPILER_VERSION: str = "11.2.0" MPI: defs.MPI = "hpcx" PARTITION: defs.Partition = "gpu" ROOT_DIR: str = defs.root_dir diff --git a/hpc2020/generate_build_help2man.py b/hpc2020/generate_build_help2man.py index 6a1541d..3aff0da 100644 --- a/hpc2020/generate_build_help2man.py +++ b/hpc2020/generate_build_help2man.py @@ -9,7 +9,7 @@ # >>> config: start ENV: defs.ProgrammingEnvironment = "gnu" -COMPILER_VERSION: str = "13.2.0" +COMPILER_VERSION: str = "11.2.0" ROOT_DIR: str = defs.root_dir # >>> config: end diff --git a/hpc2020/generate_build_netcdf.py b/hpc2020/generate_build_netcdf.py index 5ddf924..6bec789 100755 --- a/hpc2020/generate_build_netcdf.py +++ b/hpc2020/generate_build_netcdf.py @@ -14,7 +14,7 @@ # >>> config: start AUTOCONF_VERSION: str = "2.72" ENV: defs.ProgrammingEnvironment = "gnu" -COMPILER_VERSION: str = "13.2.0" +COMPILER_VERSION: str = "11.2.0" HDF5_VERSION: str = "1.14.4.2" MPI: defs.MPI = "hpcx" PARTITION: defs.Partition = "gpu" diff --git a/hpc2020/generate_prepare_pmapl.py b/hpc2020/generate_prepare_pmapl.py index 06c4d8c..590d607 100755 --- a/hpc2020/generate_prepare_pmapl.py +++ b/hpc2020/generate_prepare_pmapl.py @@ -16,7 +16,7 @@ AUTOCONF_VERSION: str = "2.72" BRANCH: str = "main" ENV: defs.ProgrammingEnvironment = "gnu" -COMPILER_VERSION: str = "13.2.0" +COMPILER_VERSION: str = "11.2.0" HDF5_VERSION: str = "1.14.4.2" MPI: defs.MPI = "hpcx" NETCDF_VERSION: str = "4.9.2" From f0362fb43cd6eaa102ea9361fd6367d1dfdfe228 Mon Sep 17 00:00:00 2001 From: stubbiali Date: Wed, 15 May 2024 13:34:33 +0200 Subject: [PATCH 17/22] Specify --cpus-per-task in salloc command. --- hpc2020/salloc.py | 1 + 1 file changed, 1 insertion(+) diff --git a/hpc2020/salloc.py b/hpc2020/salloc.py index e9161df..7958394 100755 --- a/hpc2020/salloc.py +++ b/hpc2020/salloc.py @@ -20,6 +20,7 @@ def core(account: int, num_nodes: int, partition: defs.Partition, time: str) -> command = [ f"salloc", f"--account={account}", + "--cpus-per-task=256", f"--nodes={num_nodes}", f"--partition={partition}", f"--time={time}", From 831d4ce5a6972a42fe4c66a8101d3603ff041a8d Mon Sep 17 00:00:00 2001 From: stubbiali Date: Wed, 15 May 2024 13:34:45 +0200 Subject: [PATCH 18/22] Fix NETCD4_DIR. --- hpc2020/generate_build_netcdf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hpc2020/generate_build_netcdf.py b/hpc2020/generate_build_netcdf.py index 6bec789..ef4d737 100755 --- a/hpc2020/generate_build_netcdf.py +++ b/hpc2020/generate_build_netcdf.py @@ -24,7 +24,7 @@ def _setup(build_dir: str) -> None: - utils.export_variable("NETCDF_DIR", build_dir) + utils.export_variable("NETCDF4_DIR", build_dir) utils.export_variable("NETCDF_ROOT", build_dir) From 30b43b238331214d4b6729f34324ee0a8d246e32 Mon Sep 17 00:00:00 2001 From: stubbiali Date: Wed, 15 May 2024 13:35:03 +0200 Subject: [PATCH 19/22] Add load_gpu_libraries utility. --- hpc2020/generate_prepare_pmapl.py | 2 +- hpc2020/utils.py | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/hpc2020/generate_prepare_pmapl.py b/hpc2020/generate_prepare_pmapl.py index 590d607..c7517a3 100755 --- a/hpc2020/generate_prepare_pmapl.py +++ b/hpc2020/generate_prepare_pmapl.py @@ -46,7 +46,7 @@ def core( mpi_id = utils.load_mpi(mpi, env, compiler_version, partition) utils.module_load("boost", "cmake", "python3/3.11.8-01") if partition == "gpu": - utils.module_load("nvidia/22.11", "cuda/11.6") + utils.load_gpu_libraries(env, compiler_version) # set path to custom build of external dependencies generate_build_help2man.setup(env, compiler_version, root_dir) diff --git a/hpc2020/utils.py b/hpc2020/utils.py index 717e8af..9c942b6 100644 --- a/hpc2020/utils.py +++ b/hpc2020/utils.py @@ -144,6 +144,14 @@ def load_mpi(mpi: str, env: str, compiler_version: str, partition: str) -> str: return module_name.replace("/", "-") +def load_gpu_libraries(env: str, compiler_version: str) -> None: + with check_argument("env", env, defs.valid_programming_environments): + if env == "gnu" and compiler_version == "13.2.0": + module_load("nvidia/24.1", "cuda/11.6") + else: + module_load("nvidia/22.11", "cuda/11.6") + + def export_variable(name: str, value: typing.Any, prepend_value: bool = False) -> None: cmd = f"export {name}={str(value)}" if prepend_value: From a1298144ccc84c9e91bcbac0225615c9789498cc Mon Sep 17 00:00:00 2001 From: stubbiali Date: Thu, 16 May 2024 09:26:43 +0200 Subject: [PATCH 20/22] Salloc asks for 4 GPUs. --- common/utils.py | 180 ++++++++++++++++++++++++++++++++++++++++++++++ hpc2020/salloc.py | 2 +- 2 files changed, 181 insertions(+), 1 deletion(-) create mode 100644 common/utils.py diff --git a/common/utils.py b/common/utils.py new file mode 100644 index 0000000..9c942b6 --- /dev/null +++ b/common/utils.py @@ -0,0 +1,180 @@ +# -*- coding: utf-8 -*- +from __future__ import annotations +import contextlib +import dataclasses +import os +import subprocess +import tempfile +import typing + +import defs + + +BATCH_DIRECTORY_REGISTRY = [] +BATCH_FILE_REGISTRY = [] + + +@contextlib.contextmanager +def batch_directory(): + os.makedirs("_tmp", exist_ok=True) + try: + if len(BATCH_DIRECTORY_REGISTRY) > 0: + final_cleanup = False + yield BATCH_DIRECTORY_REGISTRY[-1] + else: + final_cleanup = True + dirname = os.path.abspath(tempfile.mkdtemp(dir="_tmp")) + BATCH_DIRECTORY_REGISTRY.append(dirname) + os.makedirs(dirname, exist_ok=True) + print(f"py-hpc-scripts: create {dirname}") + yield dirname + finally: + if final_cleanup: + BATCH_DIRECTORY_REGISTRY.pop() + + +@contextlib.contextmanager +def batch_file(prefix: typing.Optional[str] = None): + if len(BATCH_DIRECTORY_REGISTRY) > 0: + fname = os.path.abspath(os.path.join(BATCH_DIRECTORY_REGISTRY[-1], prefix + ".sh")) + else: + # os.makedirs("_tmp", exist_ok=True) + # fname = os.path.abspath(tempfile.mktemp(prefix=prefix + "_", suffix=".sh", dir="_tmp")) + fname = os.path.abspath(prefix + ".sh") + + try: + with open(fname, "w") as f: + BATCH_FILE_REGISTRY.append(f) + f.write("#!/bin/bash -l\n\n") + yield f, fname + finally: + print(f"py-hpc-scripts: write {fname}") + BATCH_FILE_REGISTRY.pop() + + +def run(*args: str) -> None: + split_args = [item for arg in args for item in arg.split(" ")] + command = " ".join(split_args) + if len(BATCH_FILE_REGISTRY) > 0: + BATCH_FILE_REGISTRY[-1].write(command + "\n") + else: + subprocess.run(command, capture_output=False, shell=True) + + +def module_purge(force: bool = False) -> None: + run(f"module{' --force ' if force else ' '}purge") + + +def module_load(*module_names: str) -> None: + for module_name in module_names: + run(f"module load {module_name}") + + +class InvalidArgumentError(Exception): + def __init__(self, parameter: str, token: str, options: list[str]): + options = [f"`{opt}`" for opt in options] + msg = ( + f"Invalid value `{token}` for parameter `{parameter}`. " + f"Available options: {', '.join(options)}." + ) + super().__init__(msg) + + +@contextlib.contextmanager +def check_argument(parameter, token, options): + if token not in options: + raise InvalidArgumentError(parameter, token, options) + try: + yield token + finally: + pass + + +def load_env(env: str) -> None: + with check_argument("env", env, defs.valid_programming_environments): + module_load(f"prgenv/{env}") + + +def load_compiler(env: str, compiler_version: str) -> str: + with check_argument("env", env, defs.valid_programming_environments): + if env == "gnu": + module_load(f"gcc/{compiler_version}") + cc, cxx, fc = "gcc", "g++", "gfortran" + env_id = f"gnu-{compiler_version}" + export_variable( + "LD_LIBRARY_PATH", + f"/usr/local/apps/gcc/{compiler_version}/lib64", + prepend_value=True, + ) + else: + module_load(f"intel/{compiler_version}") + cc, cxx, fc = "icc", "icpc", "ifort" + env_id = f"intel-{compiler_version}" + export_variable("CC", cc) + export_variable("CXX", cxx) + export_variable("FC", fc) + return env_id + + +def load_mpi(mpi: str, env: str, compiler_version: str, partition: str) -> str: + with check_argument("mpi", mpi, defs.valid_mpi_libraries): + with check_argument("env", env, defs.valid_programming_environments): + with check_argument("partition", partition, defs.valid_partitions): + cc, cxx, fc = "mpicc", "mpicxx", "mpifort" + if mpi == "hpcx": + if env == "gnu" and compiler_version == "13.2.0": + module_name = "hpcx-openmpi/2.17.1" + else: + module_name = "hpcx-openmpi/2.10.0" + elif mpi == "intel-mpi": + module_name = "intel-mpi/2023.2.0" + if env == "intel": + cc, cxx, fc = "mpiicc", "mpiicpc", "mpiifort" + else: + cc, cxx, fc = "mpigcc", "mpigxx", "mpif90" + else: + module_name = "openmpi/4.1.5.4" if partition == "gpu" else "openmpi/4.1.1.1" + module_load(module_name) + export_variable("CC", cc) + export_variable("MPICC", cc) + export_variable("CXX", cxx) + export_variable("MPICXX", cxx) + export_variable("FC", fc) + export_variable("MPIFC", fc) + return module_name.replace("/", "-") + + +def load_gpu_libraries(env: str, compiler_version: str) -> None: + with check_argument("env", env, defs.valid_programming_environments): + if env == "gnu" and compiler_version == "13.2.0": + module_load("nvidia/24.1", "cuda/11.6") + else: + module_load("nvidia/22.11", "cuda/11.6") + + +def export_variable(name: str, value: typing.Any, prepend_value: bool = False) -> None: + cmd = f"export {name}={str(value)}" + if prepend_value: + cmd += f":${name}" + run(cmd) + + +def update_path(value: str): + export_variable("PATH", value, prepend_value=True) + + +@contextlib.contextmanager +def chdir(dirname: str, restore: bool = True) -> None: + try: + run(f"pushd {dirname}") + yield None + finally: + if restore: + run("popd") + + +@dataclasses.dataclass +class ThreadsLayout: + num_nodes: int + num_tasks_per_node: int + num_threads_per_task: int diff --git a/hpc2020/salloc.py b/hpc2020/salloc.py index 7958394..861a89a 100755 --- a/hpc2020/salloc.py +++ b/hpc2020/salloc.py @@ -25,7 +25,7 @@ def core(account: int, num_nodes: int, partition: defs.Partition, time: str) -> f"--partition={partition}", f"--time={time}", ] - command += ["--qos=ng", "--gpus=1"] if partition == "gpu" else ["--qos=np"] + command += ["--qos=ng", "--gpus=4"] if partition == "gpu" else ["--qos=np"] utils.run(*command) From 429e3b66597cba3b9dd42db8a59b89ed698f27a4 Mon Sep 17 00:00:00 2001 From: stubbiali Date: Thu, 16 May 2024 09:27:24 +0200 Subject: [PATCH 21/22] Remove common/utils.py (still WIP). --- common/utils.py | 180 ------------------------------------------------ 1 file changed, 180 deletions(-) delete mode 100644 common/utils.py diff --git a/common/utils.py b/common/utils.py deleted file mode 100644 index 9c942b6..0000000 --- a/common/utils.py +++ /dev/null @@ -1,180 +0,0 @@ -# -*- coding: utf-8 -*- -from __future__ import annotations -import contextlib -import dataclasses -import os -import subprocess -import tempfile -import typing - -import defs - - -BATCH_DIRECTORY_REGISTRY = [] -BATCH_FILE_REGISTRY = [] - - -@contextlib.contextmanager -def batch_directory(): - os.makedirs("_tmp", exist_ok=True) - try: - if len(BATCH_DIRECTORY_REGISTRY) > 0: - final_cleanup = False - yield BATCH_DIRECTORY_REGISTRY[-1] - else: - final_cleanup = True - dirname = os.path.abspath(tempfile.mkdtemp(dir="_tmp")) - BATCH_DIRECTORY_REGISTRY.append(dirname) - os.makedirs(dirname, exist_ok=True) - print(f"py-hpc-scripts: create {dirname}") - yield dirname - finally: - if final_cleanup: - BATCH_DIRECTORY_REGISTRY.pop() - - -@contextlib.contextmanager -def batch_file(prefix: typing.Optional[str] = None): - if len(BATCH_DIRECTORY_REGISTRY) > 0: - fname = os.path.abspath(os.path.join(BATCH_DIRECTORY_REGISTRY[-1], prefix + ".sh")) - else: - # os.makedirs("_tmp", exist_ok=True) - # fname = os.path.abspath(tempfile.mktemp(prefix=prefix + "_", suffix=".sh", dir="_tmp")) - fname = os.path.abspath(prefix + ".sh") - - try: - with open(fname, "w") as f: - BATCH_FILE_REGISTRY.append(f) - f.write("#!/bin/bash -l\n\n") - yield f, fname - finally: - print(f"py-hpc-scripts: write {fname}") - BATCH_FILE_REGISTRY.pop() - - -def run(*args: str) -> None: - split_args = [item for arg in args for item in arg.split(" ")] - command = " ".join(split_args) - if len(BATCH_FILE_REGISTRY) > 0: - BATCH_FILE_REGISTRY[-1].write(command + "\n") - else: - subprocess.run(command, capture_output=False, shell=True) - - -def module_purge(force: bool = False) -> None: - run(f"module{' --force ' if force else ' '}purge") - - -def module_load(*module_names: str) -> None: - for module_name in module_names: - run(f"module load {module_name}") - - -class InvalidArgumentError(Exception): - def __init__(self, parameter: str, token: str, options: list[str]): - options = [f"`{opt}`" for opt in options] - msg = ( - f"Invalid value `{token}` for parameter `{parameter}`. " - f"Available options: {', '.join(options)}." - ) - super().__init__(msg) - - -@contextlib.contextmanager -def check_argument(parameter, token, options): - if token not in options: - raise InvalidArgumentError(parameter, token, options) - try: - yield token - finally: - pass - - -def load_env(env: str) -> None: - with check_argument("env", env, defs.valid_programming_environments): - module_load(f"prgenv/{env}") - - -def load_compiler(env: str, compiler_version: str) -> str: - with check_argument("env", env, defs.valid_programming_environments): - if env == "gnu": - module_load(f"gcc/{compiler_version}") - cc, cxx, fc = "gcc", "g++", "gfortran" - env_id = f"gnu-{compiler_version}" - export_variable( - "LD_LIBRARY_PATH", - f"/usr/local/apps/gcc/{compiler_version}/lib64", - prepend_value=True, - ) - else: - module_load(f"intel/{compiler_version}") - cc, cxx, fc = "icc", "icpc", "ifort" - env_id = f"intel-{compiler_version}" - export_variable("CC", cc) - export_variable("CXX", cxx) - export_variable("FC", fc) - return env_id - - -def load_mpi(mpi: str, env: str, compiler_version: str, partition: str) -> str: - with check_argument("mpi", mpi, defs.valid_mpi_libraries): - with check_argument("env", env, defs.valid_programming_environments): - with check_argument("partition", partition, defs.valid_partitions): - cc, cxx, fc = "mpicc", "mpicxx", "mpifort" - if mpi == "hpcx": - if env == "gnu" and compiler_version == "13.2.0": - module_name = "hpcx-openmpi/2.17.1" - else: - module_name = "hpcx-openmpi/2.10.0" - elif mpi == "intel-mpi": - module_name = "intel-mpi/2023.2.0" - if env == "intel": - cc, cxx, fc = "mpiicc", "mpiicpc", "mpiifort" - else: - cc, cxx, fc = "mpigcc", "mpigxx", "mpif90" - else: - module_name = "openmpi/4.1.5.4" if partition == "gpu" else "openmpi/4.1.1.1" - module_load(module_name) - export_variable("CC", cc) - export_variable("MPICC", cc) - export_variable("CXX", cxx) - export_variable("MPICXX", cxx) - export_variable("FC", fc) - export_variable("MPIFC", fc) - return module_name.replace("/", "-") - - -def load_gpu_libraries(env: str, compiler_version: str) -> None: - with check_argument("env", env, defs.valid_programming_environments): - if env == "gnu" and compiler_version == "13.2.0": - module_load("nvidia/24.1", "cuda/11.6") - else: - module_load("nvidia/22.11", "cuda/11.6") - - -def export_variable(name: str, value: typing.Any, prepend_value: bool = False) -> None: - cmd = f"export {name}={str(value)}" - if prepend_value: - cmd += f":${name}" - run(cmd) - - -def update_path(value: str): - export_variable("PATH", value, prepend_value=True) - - -@contextlib.contextmanager -def chdir(dirname: str, restore: bool = True) -> None: - try: - run(f"pushd {dirname}") - yield None - finally: - if restore: - run("popd") - - -@dataclasses.dataclass -class ThreadsLayout: - num_nodes: int - num_tasks_per_node: int - num_threads_per_task: int From 3d33d8822f2f8f25c65f232814aaf43a0012493d Mon Sep 17 00:00:00 2001 From: stubbiali Date: Fri, 17 May 2024 08:02:08 +0200 Subject: [PATCH 22/22] Set ghex env variables within load_gpu_libraries. --- hpc2020/utils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hpc2020/utils.py b/hpc2020/utils.py index 9c942b6..d5b2f11 100644 --- a/hpc2020/utils.py +++ b/hpc2020/utils.py @@ -150,6 +150,9 @@ def load_gpu_libraries(env: str, compiler_version: str) -> None: module_load("nvidia/24.1", "cuda/11.6") else: module_load("nvidia/22.11", "cuda/11.6") + export_variable("GHEX_USE_GPU", 1) + export_variable("GHEX_GPU_TYPE", "NVIDIA") + export_variable("GHEX_GPU_ARCH", 80) def export_variable(name: str, value: typing.Any, prepend_value: bool = False) -> None: