From fc2d23c3e3ea615ceaeed266941eb5e9dc6ac366 Mon Sep 17 00:00:00 2001 From: Michael Dales Date: Fri, 16 May 2025 16:18:21 +0100 Subject: [PATCH 1/7] Attempt to prevent habitat processing if you already have the data --- scripts/run.sh | 41 ++++++++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/scripts/run.sh b/scripts/run.sh index ed380f6..cdbf4e7 100755 --- a/scripts/run.sh +++ b/scripts/run.sh @@ -22,27 +22,34 @@ fi declare -a TAXALIST=("AMPHIBIA" "AVES" "MAMMALIA" "REPTILIA") # Get habitat layer and prepare for use -if [ ! -f ${DATADIR}/habitat/raw.tif ]; then - reclaimer zenodo --zenodo_id 3939050 --filename PROBAV_LC100_global_v3.0.1_2019-nrt_Discrete-Classification-map_EPSG-4326.tif --output ${DATADIR}/habitat/raw.tif +if [ ! -d ${DATADIR}/${DATADIR}/habitat_layers ]; then + if [ ! -f ${DATADIR}/habitat/raw.tif ]; then + reclaimer zenodo --zenodo_id 3939050 --filename PROBAV_LC100_global_v3.0.1_2019-nrt_Discrete-Classification-map_EPSG-4326.tif --output ${DATADIR}/habitat/raw.tif + fi + + python3 ./aoh-calculator/habitat_process.py --habitat ${DATADIR}/habitat/raw.tif \ + --scale 1000.0 \ + --projection "ESRI:54009" \ + --output ${DATADIR}/tmp_habitat_layers/current + mv ${DATADIR}/tmp_habitat_layers ${DATADIR}/habitat_layers fi -python3 ./aoh-calculator/habitat_process.py --habitat ${DATADIR}/habitat/raw.tif \ - --scale 1000.0 \ - --projection "ESRI:54009" \ - --output ${DATADIR}/habitat_layers/current - -python3 ./prepare_layers/make_masks.py --habitat_layers ${DATADIR}/habitat_layers/current \ - --output_directory ${DATADIR}/masks +if [! -d ${DATADIR}/masks ]; then + python3 ./prepare_layers/make_masks.py --habitat_layers ${DATADIR}/habitat_layers/current \ + --output_directory ${DATADIR}/masks +fi # Fetch and prepare the elevation layers -if [ ! -f ${DATADIR}/elevation/elevation.tif ]; then - reclaimer zenodo --zenodo_id 5719984 --filename dem-100m-esri54017.tif --output ${DATADIR}/elevation/elevation.tif -fi -if [ ! -f ${DATADIR}/elevation/elevation-max-1k.tif ]; then - gdalwarp -t_srs ESRI:54009 -tr 1000 -1000 -r max -co COMPRESS=LZW -wo NUM_THREADS=40 ${DATADIR}/elevation/elevation.tif ${DATADIR}/elevation/elevation-max-1k.tif -fi -if [ ! -f ${DATADIR}/elevation/elevation-min-1k.tif ]; then - gdalwarp -t_srs ESRI:54009 -tr 1000 -1000 -r min -co COMPRESS=LZW -wo NUM_THREADS=40 ${DATADIR}/elevation/elevation.tif ${DATADIR}/elevation/elevation-min-1k.tif +if [[ ! -f ${DATADIR}/elevation/elevation-max-1k.tif ]]; then + if [ ! -f ${DATADIR}/elevation/elevation.tif ]; then + reclaimer zenodo --zenodo_id 5719984 --filename dem-100m-esri54017.tif --output ${DATADIR}/elevation/elevation.tif + fi + if [ ! -f ${DATADIR}/elevation/elevation-max-1k.tif ]; then + gdalwarp -t_srs ESRI:54009 -tr 1000 -1000 -r max -co COMPRESS=LZW -wo NUM_THREADS=40 ${DATADIR}/elevation/elevation.tif ${DATADIR}/elevation/elevation-max-1k.tif + fi + if [ ! -f ${DATADIR}/elevation/elevation-min-1k.tif ]; then + gdalwarp -t_srs ESRI:54009 -tr 1000 -1000 -r min -co COMPRESS=LZW -wo NUM_THREADS=40 ${DATADIR}/elevation/elevation.tif ${DATADIR}/elevation/elevation-min-1k.tif + fi fi # Generate the crosswalk table From dacd8331add75a500530fd2be2ade4f9a021d52e Mon Sep 17 00:00:00 2001 From: Michael Dales Date: Fri, 16 May 2025 16:18:45 +0100 Subject: [PATCH 2/7] Terminology fixes --- prepare_species/extract_species_data_psql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prepare_species/extract_species_data_psql.py b/prepare_species/extract_species_data_psql.py index 8ea70eb..0ceda78 100644 --- a/prepare_species/extract_species_data_psql.py +++ b/prepare_species/extract_species_data_psql.py @@ -509,7 +509,7 @@ def main() -> None: parser.add_argument( '--output', type=str, - help='Directory where per species Geojson is stored', + help='Directory where per species GeoJSON is stored', required=True, dest='output_directory_path', ) From 692dadd7d81b5062ddd3fe8be691042ac899c864 Mon Sep 17 00:00:00 2001 From: Michael Dales Date: Fri, 16 May 2025 16:48:44 +0100 Subject: [PATCH 3/7] Update aoh package --- aoh-calculator | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aoh-calculator b/aoh-calculator index d56e12f..1398d5d 160000 --- a/aoh-calculator +++ b/aoh-calculator @@ -1 +1 @@ -Subproject commit d56e12f85255ad5ea14d1105febecd635c2afcee +Subproject commit 1398d5d349bac4b31332aecf17f0b55704628809 From 66dcd1a46bf6c448e32f733019537f1906099b21 Mon Sep 17 00:00:00 2001 From: Michael Dales Date: Fri, 16 May 2025 17:03:12 +0100 Subject: [PATCH 4/7] update aoh calculator --- aoh-calculator | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aoh-calculator b/aoh-calculator index 1398d5d..c24def9 160000 --- a/aoh-calculator +++ b/aoh-calculator @@ -1 +1 @@ -Subproject commit 1398d5d349bac4b31332aecf17f0b55704628809 +Subproject commit c24def960799f170a9812af31d4c0e2dc5940dbf From d344dd03a395f0a9ab5385665708037093aaa47b Mon Sep 17 00:00:00 2001 From: Michael Dales Date: Fri, 16 May 2025 17:23:45 +0100 Subject: [PATCH 5/7] Improvements to run.sh --- scripts/run.sh | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/scripts/run.sh b/scripts/run.sh index cdbf4e7..24c4b73 100755 --- a/scripts/run.sh +++ b/scripts/run.sh @@ -22,11 +22,13 @@ fi declare -a TAXALIST=("AMPHIBIA" "AVES" "MAMMALIA" "REPTILIA") # Get habitat layer and prepare for use -if [ ! -d ${DATADIR}/${DATADIR}/habitat_layers ]; then +if [ ! -d ${DATADIR}/habitat_layers ]; then if [ ! -f ${DATADIR}/habitat/raw.tif ]; then + echo "Fetching habitat map..." reclaimer zenodo --zenodo_id 3939050 --filename PROBAV_LC100_global_v3.0.1_2019-nrt_Discrete-Classification-map_EPSG-4326.tif --output ${DATADIR}/habitat/raw.tif fi + echo "Processing habitat map..." python3 ./aoh-calculator/habitat_process.py --habitat ${DATADIR}/habitat/raw.tif \ --scale 1000.0 \ --projection "ESRI:54009" \ @@ -34,59 +36,75 @@ if [ ! -d ${DATADIR}/${DATADIR}/habitat_layers ]; then mv ${DATADIR}/tmp_habitat_layers ${DATADIR}/habitat_layers fi -if [! -d ${DATADIR}/masks ]; then +if [ ! -d ${DATADIR}/masks ]; then + echo "Processing masks..." python3 ./prepare_layers/make_masks.py --habitat_layers ${DATADIR}/habitat_layers/current \ --output_directory ${DATADIR}/masks fi # Fetch and prepare the elevation layers -if [[ ! -f ${DATADIR}/elevation/elevation-max-1k.tif ]]; then +if [[ ! -f ${DATADIR}/elevation/elevation-max-1k.tif || ! -f ${DATADIR}/elevation/elevation-min-1k.tif ]]; then if [ ! -f ${DATADIR}/elevation/elevation.tif ]; then + echo "Fetching elevation map..." reclaimer zenodo --zenodo_id 5719984 --filename dem-100m-esri54017.tif --output ${DATADIR}/elevation/elevation.tif fi if [ ! -f ${DATADIR}/elevation/elevation-max-1k.tif ]; then + echo "Generating elevation max layer..." gdalwarp -t_srs ESRI:54009 -tr 1000 -1000 -r max -co COMPRESS=LZW -wo NUM_THREADS=40 ${DATADIR}/elevation/elevation.tif ${DATADIR}/elevation/elevation-max-1k.tif fi if [ ! -f ${DATADIR}/elevation/elevation-min-1k.tif ]; then + echo "Generating elevation min layer..." gdalwarp -t_srs ESRI:54009 -tr 1000 -1000 -r min -co COMPRESS=LZW -wo NUM_THREADS=40 ${DATADIR}/elevation/elevation.tif ${DATADIR}/elevation/elevation-min-1k.tif fi fi # Generate the crosswalk table if [ ! -f ${DATADIR}/crosswalk.csv ]; then + echo "Generating crosswalk table..." python3 ./prepare_layers/convert_crosswalk.py --original ${PWD}/data/crosswalk_bin_T.csv --output ${DATADIR}/crosswalk.csv fi # Get species data per taxa from IUCN data for TAXA in "${TAXALIST[@]}" do + echo "Extracting species data for ${TAXA}..." python3 ./prepare_species/extract_species_data_psql.py --class ${TAXA} --output ${DATADIR}/species-info/${TAXA}/ --projection "ESRI:54009" --excludes ${DATADIR}/SpeciesList_generalisedRangePolygons.csv done if [ -f data/BL_Species_Elevations_2023.csv ]; then + echo "Applying birdlife data..." python3 ./prepare_species/apply_birdlife_data.py --geojsons ${DATADIR}/species-info/AVES --overrides data/BL_Species_Elevations_2023.csv fi +echo "Generating AoH task list..." python3 ./utils/aoh_generator.py --input ${DATADIR}/species-info --datadir ${DATADIR} --output ${DATADIR}/aohbatch.csv +echo "Generating AoHs..." littlejohn -j 200 -o ${DATADIR}/aohbatch.log -c ${DATADIR}/aohbatch.csv ${VIRTUAL_ENV}/bin/python3 -- ./aoh-calculator/aohcalc.py # Calculate predictors from AoHs +echo "Generating species richness..." python3 ./aoh-calculator/summaries/species_richness.py --aohs_folder ${DATADIR}/aohs/current/ \ --output ${DATADIR}/summaries/species_richness.tif +echo "Generating endemism..." python3 ./aoh-calculator/summaries/endemism.py --aohs_folder ${DATADIR}/aohs/current/ \ --species_richness ${DATADIR}/summaries/species_richness.tif \ --output ${DATADIR}/summaries/endemism.tif # Aoh Validation +echo "Collating validation data..." python3 ./aoh-calculator/validation/collate_data.py --aoh_results ${DATADIR}/aohs/current/ \ --output ${DATADIR}/validation/aohs.csv +echo "Calculating model validation..." python3 ./aoh-calculator/validation/validate_map_prevalence.py --collated_aoh_data ${DATADIR}/validation/aohs.csv \ --output ${DATADIR}/validation/model_validation.csv # Threats +echo "Generating threat task list..." python3 ./utils/threats_generator.py --input ${DATADIR}/species-info --datadir ${DATADIR} --output ${DATADIR}/threatbatch.csv +echo "Generating threat rasters..." littlejohn -j 200 -o ${DATADIR}/threatbatch.log -c ${DATADIR}/threatcatch.csv ${VIRTUAL_ENV}/bin/python3 -- ./threats/threat_processing.py +echo "Summarising threats..." python3 ./threats/threat_summation.py --threat_rasters ${DATADIR}/threat_rasters --output ${DATADIR}/threat_results From c77eb62a6f0e2d06bae72edb4653070dda4dd805 Mon Sep 17 00:00:00 2001 From: Michael Dales Date: Fri, 16 May 2025 17:43:55 +0100 Subject: [PATCH 6/7] fix for python path in docker --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index ca21abd..1d4f490 100644 --- a/Dockerfile +++ b/Dockerfile @@ -41,7 +41,7 @@ RUN mkdir -p /data # This is because outside of Docker we want to ensure # the Python virtualenv is set, but in Docker we don't # use a virtualenv, as docker *is* a virtualenv -ENV VIRTUAL_ENV=/ +ENV VIRTUAL_ENV=/usr ENV PYTHONPATH=/root/star RUN python3 -m pytest ./tests From 5e66dee9d19669bbc11c295650becc610639f921 Mon Sep 17 00:00:00 2001 From: Michael Dales Date: Fri, 16 May 2025 17:44:11 +0100 Subject: [PATCH 7/7] Take a guess at how many CPUs to use in run.sh --- scripts/run.sh | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/scripts/run.sh b/scripts/run.sh index 24c4b73..25d5668 100755 --- a/scripts/run.sh +++ b/scripts/run.sh @@ -19,6 +19,10 @@ if [ -z "${VIRTUAL_ENV}" ]; then exit 1 fi +export CPUS=`getconf _NPROCESSORS_ONLN` +export THREADS=$(($CPUS / 2)) +echo "Using $THREADS threads." + declare -a TAXALIST=("AMPHIBIA" "AVES" "MAMMALIA" "REPTILIA") # Get habitat layer and prepare for use @@ -80,7 +84,7 @@ echo "Generating AoH task list..." python3 ./utils/aoh_generator.py --input ${DATADIR}/species-info --datadir ${DATADIR} --output ${DATADIR}/aohbatch.csv echo "Generating AoHs..." -littlejohn -j 200 -o ${DATADIR}/aohbatch.log -c ${DATADIR}/aohbatch.csv ${VIRTUAL_ENV}/bin/python3 -- ./aoh-calculator/aohcalc.py +littlejohn -j ${THREADS} -o ${DATADIR}/aohbatch.log -c ${DATADIR}/aohbatch.csv ${VIRTUAL_ENV}/bin/python3 -- ./aoh-calculator/aohcalc.py # Calculate predictors from AoHs echo "Generating species richness..." @@ -104,7 +108,7 @@ echo "Generating threat task list..." python3 ./utils/threats_generator.py --input ${DATADIR}/species-info --datadir ${DATADIR} --output ${DATADIR}/threatbatch.csv echo "Generating threat rasters..." -littlejohn -j 200 -o ${DATADIR}/threatbatch.log -c ${DATADIR}/threatcatch.csv ${VIRTUAL_ENV}/bin/python3 -- ./threats/threat_processing.py +littlejohn -j ${THREADS} -o ${DATADIR}/threatbatch.log -c ${DATADIR}/threatcatch.csv ${VIRTUAL_ENV}/bin/python3 -- ./threats/threat_processing.py echo "Summarising threats..." python3 ./threats/threat_summation.py --threat_rasters ${DATADIR}/threat_rasters --output ${DATADIR}/threat_results