From 88175ccdff09c2778c630647dbedf1fbd7ff8502 Mon Sep 17 00:00:00 2001 From: Michael Dales Date: Wed, 13 Aug 2025 05:49:17 +0000 Subject: [PATCH 1/3] Fix raster sum performance --- utils/raster_sum.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/utils/raster_sum.py b/utils/raster_sum.py index 9cd5261..40b0592 100644 --- a/utils/raster_sum.py +++ b/utils/raster_sum.py @@ -1,6 +1,7 @@ import argparse import os import queue +import resource import sys import tempfile import time @@ -26,8 +27,6 @@ def worker( path: Path = input_queue.get_nowait() except queue.Empty: break - if compress: - print(path) with RasterLayer.layer_from_file(path) as partial_raster: if merged_result is None: @@ -50,16 +49,26 @@ def raster_sum( output_filename: Path, processes_count: int ) -> None: + print(f"process count set to {processes_count}") + + _, max_fd_limit = resource.getrlimit(resource.RLIMIT_NOFILE) + resource.setrlimit(resource.RLIMIT_NOFILE, (max_fd_limit, max_fd_limit)) + print(f"Set fd limit to {max_fd_limit}") + os.makedirs(output_filename.parent, exist_ok=True) - files = images_dir.glob("*.tif") + files = list(images_dir.glob("*.tif")) if not files: sys.exit(f"No files in {images_dir}, aborting") + print(f"Found {len(files)} images to process") with tempfile.TemporaryDirectory() as tempdir: with Manager() as manager: source_queue = manager.Queue() + for file in files: + source_queue.put(file) + workers = [Process(target=worker, args=( False, f"{index}.tif", @@ -69,11 +78,6 @@ def raster_sum( for worker_process in workers: worker_process.start() - for file in files: - source_queue.put(file) - # for _ in range(len(workers)): - # source_queue.put(None) - processes = workers while processes: candidates = [x for x in processes if not x.is_alive()] @@ -86,7 +90,6 @@ def raster_sum( processes.remove(candidate) time.sleep(0.1) - # here we should have now a set of images in tempdir to merge single_worker = Process(target=worker, args=( True, From 5450d0492d777a48c76108c279cb853e02dc7c04 Mon Sep 17 00:00:00 2001 From: Michael Dales Date: Wed, 13 Aug 2025 05:50:08 +0000 Subject: [PATCH 2/3] Fixes for slurm run. --- .github/workflows/python-package.yml | 1 + scripts/run.sh | 2 +- scripts/slurm.sh | 8 ++++---- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 0d1249f..6034709 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -47,3 +47,4 @@ jobs: - name: Script checks run: | shellcheck ./scripts/run.sh + shellcheck ./scripts/slurm.sh diff --git a/scripts/run.sh b/scripts/run.sh index 3d6c9b7..dcc75bb 100755 --- a/scripts/run.sh +++ b/scripts/run.sh @@ -193,7 +193,7 @@ do python3 ./utils/raster_sum.py --rasters_directory "${DATADIR}"/deltap/"${SCENARIO}"/"${CURVE}"/"${TAXA}"/ --output "${DATADIR}"/deltap_sum/"${SCENARIO}"/"${CURVE}"/"${TAXA}".tif done - python3 ./utils/species_totals.py --aohs "${DATADIR}"/deltap/"${SCENARIO}"/"${CURVE}"/ --output "${DATADIR}"/deltap/"${SCENARIO}"/"${CURVE}"/totals.csv + python3 ./utils/species_totals.py --deltaps "${DATADIR}"/deltap/"${SCENARIO}"/"${CURVE}"/ --output "${DATADIR}"/deltap/"${SCENARIO}"/"${CURVE}"/totals.csv # Generate final map python3 ./deltap/delta_p_scaled.py --input "${DATADIR}"/deltap_sum/"${SCENARIO}"/"${CURVE}"/ \ diff --git a/scripts/slurm.sh b/scripts/slurm.sh index 25d167d..1fd08cb 100644 --- a/scripts/slurm.sh +++ b/scripts/slurm.sh @@ -170,7 +170,7 @@ python3 ./utils/speciesgenerator.py --datadir "${DATADIR}" --output "${DATADIR}" python3 ./utils/persistencegenerator.py --datadir "${DATADIR}" --curve "${CURVE}" --output "${DATADIR}"/persistencebatch.csv # Calculate all the AoHs -littlejohn -j ${SLURM_JOB_CPUS_PER_NODE} -o "${DATADIR}"/aohbatch.log -c "${DATADIR}"/aohbatch.csv "${VIRTUAL_ENV}"/bin/python3 -- ./aoh-calculator/aohcalc.py --force-habitat +littlejohn -j "${SLURM_JOB_CPUS_PER_NODE}" -o "${DATADIR}"/aohbatch.log -c "${DATADIR}"/aohbatch.csv "${VIRTUAL_ENV}"/bin/python3 -- ./aoh-calculator/aohcalc.py --force-habitat # Generate validation summaries python3 ./aoh-calculator/validation/collate_data.py --aoh_results "${DATADIR}"/aohs/current/ --output "${DATADIR}"/aohs/current.csv @@ -188,16 +188,16 @@ python3 ./aoh-calculator/summaries/endemism.py --aohs_folder "${DATADIR}"/aohs/c --output "${DATADIR}"/predictors/endemism.tif # Calculate the per species Delta P values -littlejohn -j ${SLURM_JOB_CPUS_PER_NODE} -o "${DATADIR}"/persistencebatch.log -c "${DATADIR}"/persistencebatch.csv "${VIRTUAL_ENV}"/bin/python3 -- ./deltap/global_code_residents_pixel.py +littlejohn -j "${SLURM_JOB_CPUS_PER_NODE}" -o "${DATADIR}"/persistencebatch.log -c "${DATADIR}"/persistencebatch.csv "${VIRTUAL_ENV}"/bin/python3 -- ./deltap/global_code_residents_pixel.py for SCENARIO in "${SCENARIOS[@]}" do for TAXA in "${TAXAS[@]}" do - python3 ./utils/raster_sum.py --rasters_directory "${DATADIR}"/deltap/"${SCENARIO}"/"${CURVE}"/"${TAXA}"/ --output "${DATADIR}"/deltap_sum/"${SCENARIO}"/"${CURVE}"/"${TAXA}".tif + python3 ./utils/raster_sum.py --rasters_directory "${DATADIR}"/deltap/"${SCENARIO}"/"${CURVE}"/"${TAXA}"/ --output "${DATADIR}"/deltap_sum/"${SCENARIO}"/"${CURVE}"/"${TAXA}".tif -j "${SLURM_JOB_CPUS_PER_NODE}" done - python3 ./utils/species_totals.py --aohs "${DATADIR}"/deltap/"${SCENARIO}"/"${CURVE}"/ --output "${DATADIR}"/deltap/"${SCENARIO}"/"${CURVE}"/totals.csv + python3 ./utils/species_totals.py --deltaps "${DATADIR}"/deltap/"${SCENARIO}"/"${CURVE}"/ --output "${DATADIR}"/deltap/"${SCENARIO}"/"${CURVE}"/totals.csv # Generate final map python3 ./deltap/delta_p_scaled.py --input "${DATADIR}"/deltap_sum/"${SCENARIO}"/"${CURVE}"/ \ From 4278b449487f5c0943e08ce97b0427b2cd8801d6 Mon Sep 17 00:00:00 2001 From: Michael Dales Date: Wed, 13 Aug 2025 05:58:36 +0000 Subject: [PATCH 3/3] Fix linting errors on slurm script --- scripts/slurm.sh | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/scripts/slurm.sh b/scripts/slurm.sh index 1fd08cb..2d3e745 100644 --- a/scripts/slurm.sh +++ b/scripts/slurm.sh @@ -9,9 +9,10 @@ set -e -source ${HOME}/venvs/life/bin/activate -cd ${HOME}/dev/life -export PATH=$PATH:$HOME/go/bin +# shellcheck disable=SC1091 +source "${HOME}"/venvs/life/bin/activate +cd "${HOME}"/dev/life +export PATH="${PATH}":"${HOME}"/go/bin if [ -z "${DATADIR}" ]; then echo "Please specify $DATADIR"