From 104820b8a5412588d7bde57ef334a7e06bbc5594 Mon Sep 17 00:00:00 2001 From: Michael Dales Date: Thu, 5 Feb 2026 12:32:53 +0000 Subject: [PATCH 01/27] Add snakemake rules for habitat maps --- config/config.yaml | 28 +++++ workflow/Snakefile | 43 +++++++ workflow/rules/habitat.smk | 236 +++++++++++++++++++++++++++++++++++++ workflow/rules/prepare.smk | 105 +++++++++++++++++ 4 files changed, 412 insertions(+) create mode 100644 config/config.yaml create mode 100644 workflow/Snakefile create mode 100644 workflow/rules/habitat.smk create mode 100644 workflow/rules/prepare.smk diff --git a/config/config.yaml b/config/config.yaml new file mode 100644 index 0000000..99ec52c --- /dev/null +++ b/config/config.yaml @@ -0,0 +1,28 @@ +# LIFE Pipeline Configuration +# =========================== + +# Taxonomic classes to process +taxa: + - AMPHIBIA + - AVES + - MAMMALIA + - REPTILIA + +# Pixel scale +pixel_scale: 0.016666666666667 + +# Hyde projection data +hyde_projection: 'GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]' +hyde_pixel_scale: 0.08333333333333333 + +# Zenodo configuration for downloading raw habitat +zenodo: + jung_habitat: + zenodo_id: 4058819 + filename: "iucn_habitatclassification_composite_lvl2_ver004.zip" + jung_habitat_updates: + zenodo_id: 4058819 + filename: "lvl2_changemasks_ver004.zip" + jung_pnv: + zenodo_id: 4038749 + filename: "pnv_lvl1_004.zip" diff --git a/workflow/Snakefile b/workflow/Snakefile new file mode 100644 index 0000000..5e3e2b6 --- /dev/null +++ b/workflow/Snakefile @@ -0,0 +1,43 @@ +# LIFE Pipeline - Snakemake Workflow +# =================================== +# +# This workflow implements the LIFE biodivesity metric methodology. +# It processes species data, generates Area of Habitat (AoH) rasters, and +# produces the different scenario output rasters +# +# Usage: +# snakemake --cores N all +# snakemake --cores N validation +# snakemake --cores N life +# +# Environment variables required: +# DATADIR - Directory for input/output data +# DB_HOST, DB_NAME, DB_USER, DB_PASSWORD - PostgreSQL credentials +# +# For GBIF validation (optional): +# GBIF_USERNAME, GBIF_EMAIL, GBIF_PASSWORD + +import os +from pathlib import Path + +# Allow GDAL/OGR to handle large GeoJSON objects +os.environ["OGR_GEOJSON_MAX_OBJ_SIZE"] = "0" + +# Source directory (where this repo lives) +SRCDIR = Path(workflow.basedir).parent + + +# Load configuration +configfile: SRCDIR / "config/config.yaml" + + +# Data directory from environment variable +DATADIR = Path(os.environ.get("DATADIR", "/data")) + +# Taxa list from config +TAXA = config["taxa"] + + +# Include rule modules +include: "rules/prepare.smk" +include: "rules/habitat.smk" diff --git a/workflow/rules/habitat.smk b/workflow/rules/habitat.smk new file mode 100644 index 0000000..c57ad79 --- /dev/null +++ b/workflow/rules/habitat.smk @@ -0,0 +1,236 @@ +# LIFE Pipeline - Updated habitat map rules +# ========================================== +# +# These rules handle combining the Jung habitat map with +# farming data from GAEZ and Hyde to generate a more suitable +# base map. + + +# ============================================================================= +# GAEZ data +# ============================================================================= +rule gaez_download: + """ + Fetch the compressed GAEZ data. + """ + output: + archive=DATADIR / "food" / "LR.zip" + params: + url="https://s3.eu-west-1.amazonaws.com/data.gaezdev.aws.fao.org/LR.zip", + log: + DATADIR / "logs" / "download_gaez.log", + shell: + """ + curl -o {output.archive} \ + {params.url} \ + 2>&1 | tee {log} + """ + + +rule gaez_expand: + """ + Decompress the GAEZ download. + """ + output: + gaez_raster=DATADIR / "food" / "GLCSv11_02_5m.tif" + params: + filepath="LR/lco/GLCSv11_02_5m.tif" + input: + archive=DATADIR / "food" / "LR.zip" + log: + DATADIR / "logs" / "expand_gaez.log", + shell: + """ + unzip -j {input.archive} \ + {params.filepath} \ + -d $(dirname {output.raster}) \ + 2>&1 | tee {log} + """ + + +# ============================================================================= +# Hyde data +# ============================================================================= +rule hyde_download: + """ + Fetch the compressed Hyde data. + """ + output: + archive=DATADIR / "food" / "baseline.zip" + params: + url="https://geo.public.data.uu.nl/vault-hyde/HYDE%203.2%5B1710494848%5D/original/baseline.zip", + log: + DATADIR / "logs" / "download_hyde.log", + shell: + """ + curl -o {output.archive} \ + {params.url} \ + 2>&1 | tee {log} + """ + + +rule hyde_expand_land_usage_archive: + """ + Extract the inner land usage archive. + """ + output: + inner_hyde=DATADIR / "food" / "2017AD_lu.zip" + params: + filepath="baseline/zip/2017AD_lu.zip" + input: + archive=DATADIR / "food" / "baseline.zip" + log: + DATADIR / "logs" / "expand_hyde_1.log", + shell: + """ + unzip -j {input.archive} \ + {params.filepath} \ + -d $(dirname {output.inner_hyde}) \ + 2>&1 | tee {log} + """ + + +rule hyde_expand_land_usage_raster: + """ + Extract raster from the inner land usage archive. + """ + output: + raw_hyde_raster=DATADIR / "food" / "grazing2017AD.asc" + params: + filepath="grazing2017AD.asc" + input: + archive=DATADIR / "food" / "2017AD_lu.zip" + log: + DATADIR / "logs" / "expand_hyde_2.log", + shell: + """ + unzip -j {input.archive} \ + {params.filepath} \ + -d $(dirname {output.raw_hyde_raster}) \ + 2>&1 | tee {log} + """ + + +rule modify_hyde_pixel_scale: + """ + Modify the pixel scale value in the Hyde data as it isn't precise enough to match the GAEZ data. + """ + output: + modified_hyde_raster=DATADIR / "food" / "modified_grazing2017AD.asc" + input: + raw_hyde_raster=DATADIR / "food" / "grazing2017AD.asc" + shell: + """ + sed "s/0.0833333/0.08333333333333333/" {input.raw_hyde_raster} > {output.modified_hyde_raster} + """ + + +rule add_hyde_projection_info: + """ + The Hyde data ships without a projection specified, so we need to add one so that + the rest of the workflow doesn't complain when we try to mix it in with other projected + raster data. + """ + output: + hyde_projection_file=DATADIR / "food" / "modified_grazing2017AD.prj" + shell: + """ + echo {config["hyde_projection"]} > {output.hyde_projection_file} + """ + + +# ============================================================================= +# Combine GAEZ/Hyde data +# ============================================================================= +rule combine_gaez_hyde: + """ + Combine the GAEZ and Hyde data, adjusting for overflow in cells. + """ + input: + hyde_projection_file=DATADIR / "food" / "modified_grazing2017AD.prj", + hyde_raster=DATADIR / "food" / "modified_grazing2017AD.asc", + gaez_raster=DATADIR / "food" / "GLCSv11_02_5m.tif", + output: + crop=DATADIR / "food" / "crop.tif", + pasture=DATADIR / "food" / "pasture.tif" + params: + output_dir=DATADIR / "food" + script: + str(SRCDIR / "prepare_layers" / "build_gaez_hyde.py") + + +# ============================================================================= +# Project Jung to Hyde/GAEZ +# ============================================================================= +rule project_jung_for_hyde_gaez: + """ + We need a proportional value per pixel per habitat class version of the Jung + current L1 map at the same scale as the GAEZ and Hyde data so we can then + compare them. + """ + input: + habitat=ancient(DATADIR / "habitat" / "current_raw.tif"), + output: + crop=DATADIR / "food" / "current_layers" / "lcc_1401.tif", + pasture=DATADIR / "food" / "current_layers" / "lcc_1402.tif", + params: + scale=config["hyde_pixel_scale"], + output_dir=DATADIR / "food" / "current_layers", + log: + DATADIR / "logs" / "process_habitat_food.log", + shell: + """ + set -e + aoh-habitat-process --habitat {input.habitat} \ + --scale {params.scale} \ + --projection "{params.projection}" \ + --output {params.output_dir} \ + 2>&1 | tee {log} + """ + + +# ============================================================================= +# Combine Jung, Hyde, and GAEZ +# ============================================================================= +rule diff_crop: + """ + Get the differencen betwee the jung data and the new crop data. + """ + input: + raster_a=DATADIR / "food" / "crop.tif", + raster_b=DATADIR / "food" / "current_layers" / "lcc_1401.tif", + output: + diff=DATADIR / "food" / "crop_diff.tif" + script: + str(SRCDIR / "utils" / "raster_diff.py") + + +rule diff_pasture: + """ + Get the difference between the jung data and the new pasture data. + """ + input: + raster_a=DATADIR / "food" / "pasture.tif", + raster_b=DATADIR / "food" / "current_layers" / "lcc_1402.tif", + output: + diff=DATADIR / "food" / "pasture_diff.tif" + script: + str(SRCDIR / "utils" / "raster_diff.py") + + +rule build_food_map: + """ + Pull together all the pieces into a new L1 habitat map at the original Jung scale. + """ + input: + crop_diff=DATADIR / "food" / "crop_diff.tif", + pasture_diff=DATADIR / "food" / "pasture_diff.tif", + pnv=DATADIR / "habitat" / "pnv_raw.tif", + jung=DATADIR / "habitat" / "current_raw.tif", + output: + raster=DATADIR / "food" / "current_raw.tif", + params: + seed=42 + threads: workflow.cores + script: + str(SRCDIR / "prepare_layers" / "make_food_current_map.py") diff --git a/workflow/rules/prepare.smk b/workflow/rules/prepare.smk new file mode 100644 index 0000000..aa64ded --- /dev/null +++ b/workflow/rules/prepare.smk @@ -0,0 +1,105 @@ +# LIFE Pipeline - Prepare base layers +# ===================================== + + +# ============================================================================= +# Crosswalk Table +# ============================================================================= + +rule convert_crosswalk: + """ + Generate a crosswalk between IUCN Habitat classes and Jung raster pixel values. + """ + output: + crosswalk=DATADIR / "crosswalk.csv", + script: + str(SRCDIR / "prepare_layers" / "generate_crosswalk.py") + + +# ============================================================================= +# Jung L2 habitat map +# ============================================================================= + +rule jung_habitat_map: + """ + Fetch the Jung habitat map used as the current map. + """ + output: + habitat=DATADIR / "habitat" / "jung_l2_raw.tif" + params: + zenodo_id=config["zenodo"]["jung_habitat"]["zenodo_id"], + filename=config["zenodo"]["jung_habitat"]["filename"], + log: + DATADIR / "logs" / "download_habitat.log", + shell: + """ + reclaimer zenodo --zenodo_id {params.zenodo_id} \ + --filename "{params.filename}" \ + --extract \ + --output {output.habitat} \ + 2>&1 | tee {log} + """ + +rule jung_habitat_updates: + """ + Fetch the Jung habitat map updates. + """ + output: + sentinel=DATADIR / "habitat" / ".downloaded_updates" + params: + habitat_dir=DATADIR / "habitat", + zenodo_id=config["zenodo"]["jung_habitat"]["zenodo_id"], + filename=config["zenodo"]["jung_habitat"]["filename"], + log: + DATADIR / "logs" / "download_habitat_update.log", + shell: + """ + reclaimer zenodo --zenodo_id {params.zenodo_id} \ + --filename "{params.filename}" \ + --extract \ + --output {params.habitat_dir} \ + 2>&1 | tee {log} + + touch {output.sentinel} + """ + +rule current_raw: + """ + Build the LIFE current map, which is Jung with updates applied + and restricted to L1 to match the PNV map restrictions. + """ + input: + updates_sentinel=DATADIR / "habitat" / ".downloaded_updates", + habitat=DATADIR / "habitat" / "jung_l2_raw.tif", + crosswalk=DATADIR / "crosswalk.csv", + params: + updates_dir=DATADIR / "habitat" / "lvl2_changemasks_ver004" + output: + current=DATADIR / "habitat" / "current_raw.tif" + threads: workflow.cores + script: + str(SRCDIR / "prepare_layers" / "make_current_map.py") + +# ============================================================================= +# Jung PNV map +# ============================================================================= + +rule jung_pnv_map: + """ + Fetch the Jung PNV map from zenodo. + """ + output: + habitat=DATADIR / "habitat" / "pnv_raw.tif" + params: + zenodo_id=config["zenodo"]["jung_pnv"]["zenodo_id"], + filename=config["zenodo"]["jung_pnv"]["filename"], + log: + DATADIR / "logs" / "download_pnv.log", + shell: + """ + reclaimer zenodo --zenodo_id {params.zenodo_id} \ + --filename "{params.filename}" \ + --extract \ + --output {output.habitat} \ + 2>&1 | tee {log} + """ From 265dabf166b93fdddf46a923c80d70baa49e0bab Mon Sep 17 00:00:00 2001 From: Michael Dales Date: Thu, 5 Feb 2026 12:34:06 +0000 Subject: [PATCH 02/27] Update food map scripts for snakemake and newer yirgacheffe --- prepare_layers/build_gaez_hyde.py | 13 ++-- prepare_layers/generate_crosswalk.py | 13 ++-- prepare_layers/make_current_map.py | 38 ++++++----- prepare_layers/make_food_current_map.py | 91 ++++++++++++++----------- requirements.txt | 7 ++ utils/raster_diff.py | 23 ++++--- 6 files changed, 111 insertions(+), 74 deletions(-) diff --git a/prepare_layers/build_gaez_hyde.py b/prepare_layers/build_gaez_hyde.py index de0e632..4812fb7 100644 --- a/prepare_layers/build_gaez_hyde.py +++ b/prepare_layers/build_gaez_hyde.py @@ -5,7 +5,7 @@ from pathlib import Path import yirgacheffe as yg -import yirgacheffe.operators as yo +from snakemake_argparse_bridge import snakemake_compatible # type: ignore from make_area_map import make_area_map @@ -35,12 +35,12 @@ def build_gaez_hyde( # where gaez and hyde disagree (sum greater than disagg cutoff), scale down uncapped_total = portional_gaez + portional_hyde # NaNs stop warnings about divide by zero - uncapped_total_with_nan = yo.where(uncapped_total == 0.0, float("nan"), uncapped_total) + uncapped_total_with_nan = yg.where(uncapped_total == 0.0, float("nan"), uncapped_total) # calculate ag-perc scalars - total = yo.where( + total = yg.where( uncapped_total_with_nan >= DISAGG_CUTOFF, - DISAGG_CUTOFF - (yg.constant(1) / yo.exp(uncapped_total_with_nan * 2)), + DISAGG_CUTOFF - (yg.constant(1) / yg.exp(uncapped_total_with_nan * 2)), uncapped_total_with_nan, ) @@ -52,6 +52,11 @@ def build_gaez_hyde( hyde_values = total * hyde_ratio hyde_values.to_geotiff(output_dir_path / "pasture.tif") +@snakemake_compatible(mapping={ + "gaez_path": "input.gaez_raster", + "hyde_path": "input.hyde_raster", + "output_dir_path": "params.output_dir", +}) def main() -> None: parser = argparse.ArgumentParser(description="Generate a combined GAEZ and Hyde layers") parser.add_argument( diff --git a/prepare_layers/generate_crosswalk.py b/prepare_layers/generate_crosswalk.py index e9fcfd9..9e909e5 100644 --- a/prepare_layers/generate_crosswalk.py +++ b/prepare_layers/generate_crosswalk.py @@ -1,8 +1,10 @@ import argparse import os +from pathlib import Path import pandas as pd from iucn_modlib.translator import toJung +from snakemake_argparse_bridge import snakemake_compatible # Take from https://www.iucnredlist.org/resources/habitat-classification-scheme @@ -31,11 +33,9 @@ ] def generate_crosswalk( - output_filename: str, + output_filename: Path, ) -> None: - output_dir, _ = os.path.split(output_filename) - if output_dir: - os.makedirs(output_dir, exist_ok=True) + os.makedirs(output_filename.parent, exist_ok=True) res = [] for iucn_code in IUCN_HABITAT_CODES: @@ -48,11 +48,14 @@ def generate_crosswalk( df = pd.DataFrame(res, columns=["code", "value"]) df.to_csv(output_filename, index=False) +@snakemake_compatible(mapping={ + "output_filename": "output.crosswalk", +}) def main() -> None: parser = argparse.ArgumentParser(description="Generate a Jung crosswalk table as CSV.") parser.add_argument( '--output', - type=str, + type=Path, help='Path where final crosswalk should be stored', required=True, dest='output_filename', diff --git a/prepare_layers/make_current_map.py b/prepare_layers/make_current_map.py index 519e0b4..74824ce 100644 --- a/prepare_layers/make_current_map.py +++ b/prepare_layers/make_current_map.py @@ -5,9 +5,9 @@ from typing import Dict, List, Optional import pandas as pd -import yirgacheffe.operators as yo # type: ignore +import yirgacheffe as yg from alive_progress import alive_bar # type: ignore -from yirgacheffe.layers import RasterLayer # type: ignore +from snakemake_argparse_bridge import snakemake_compatible # type: ignore from osgeo import gdal # type: ignore gdal.SetCacheMax(1 * 1024 * 1024 * 1024) @@ -41,37 +41,39 @@ def make_current_map( if update_masks_path is not None: update_masks = [ - RasterLayer.layer_from_file(x) for x in sorted(list(update_masks_path.glob("*.tif"))) + yg.read_raster(x) for x in sorted(list(update_masks_path.glob("*.tif"))) ] else: update_masks = [] - with RasterLayer.layer_from_file(jung_path) as jung: + with yg.read_raster(jung_path) as jung: crosswalk = load_crosswalk_table(crosswalk_path) map_preserve_code = list(itertools.chain.from_iterable([crosswalk[x] for x in IUCN_CODE_ARTIFICAL])) updated_jung = jung for update in update_masks: - updated_jung = yo.where(update != 0, update, updated_jung) + updated_jung = yg.where(update != 0, update, updated_jung) - current_map = yo.where( + current_map = yg.where( updated_jung.isin(map_preserve_code), updated_jung, - (yo.floor(updated_jung / 100) * 100).astype(yo.DataType.UInt16), + (yg.floor(updated_jung / 100) * 100).astype(yg.DataType.UInt16), ) - with RasterLayer.empty_raster_layer_like( - jung, - filename=output_path, - threads=16 - ) as result: - if show_progress: - with alive_bar(manual=True) as bar: - current_map.parallel_save(result, callback=bar, parallelism=concurrency) - else: - current_map.parallel_save(result, parallelism=concurrency) - + if show_progress: + with alive_bar(manual=True) as bar: + current_map.to_geotiff(output_path, callback=bar, parallelism=concurrency) + else: + current_map.to_geotiff(output_path, parallelism=concurrency) + +@snakemake_compatible(mapping={ + "jung_path": "input.habitat", + "update_masks_path": "params.updates_dir", + "crosswalk_path": "input.crosswalk", + "concurrency": "threads", + "results_path": "output.current", +}) def main() -> None: set_start_method("spawn") diff --git a/prepare_layers/make_food_current_map.py b/prepare_layers/make_food_current_map.py index 0b7e876..74225ef 100644 --- a/prepare_layers/make_food_current_map.py +++ b/prepare_layers/make_food_current_map.py @@ -13,6 +13,7 @@ import numpy as np import yirgacheffe as yg from yirgacheffe.layers import RasterLayer +from snakemake_argparse_bridge import snakemake_compatible # type: ignore gdal.SetCacheMax(4 * 1024 * 1024 * 1024) @@ -93,18 +94,20 @@ def process_tile_concurrently( input_queue: Queue, result_queue: Queue, ) -> None: - with yg.read_raster(current_lvl1_path) as current: - with yg.read_raster(pnv_path) as pnv: - while True: - job : tuple[TileInfo, int] | None = input_queue.get() - if job is None: - break - tile, seed = job - if np.isnan(tile.crop_diff) and np.isnan(tile.pasture_diff): - result_queue.put((tile, None)) - else: - data = process_tile(current, pnv, tile, seed) - result_queue.put((tile, data.tobytes())) + with ( + yg.read_raster(current_lvl1_path) as current, + yg.read_raster(pnv_path) as pnv + ): + while True: + job : tuple[TileInfo, int] | None = input_queue.get() + if job is None: + break + tile, seed = job + if np.isnan(tile.crop_diff) and np.isnan(tile.pasture_diff): + result_queue.put((tile, None)) + else: + data = process_tile(current, pnv, tile, seed) + result_queue.put((tile, data.tobytes())) result_queue.put(None) @@ -116,31 +119,33 @@ def build_tile_list( tiles = [] with yg.read_raster(current_lvl1_path) as current: current_dimensions = current.window.xsize, current.window.ysize - with yg.read_raster(crop_adjustment_path) as crop_diff: - with yg.read_raster(pasture_adjustment_path) as pasture_diff: - assert crop_diff.window == pasture_diff.window - diff_dimensions = crop_diff.window.xsize, crop_diff.window.ysize - - x_scale = current_dimensions[0] / diff_dimensions[0] - y_scale = current_dimensions[1] / diff_dimensions[1] - - x_steps = [round(i * x_scale) for i in range(diff_dimensions[0])] - x_steps.append(current_dimensions[0]) - y_steps = [round(i * y_scale) for i in range(diff_dimensions[1])] - y_steps.append(current_dimensions[1]) - - for y in range(crop_diff.window.ysize): - crop_row = crop_diff.read_array(0, y, crop_diff.window.xsize, 1) - pasture_row = pasture_diff.read_array(0, y, pasture_diff.window.xsize, 1) - for x in range(crop_diff.window.xsize): - tiles.append(TileInfo( - x_steps[x], - y_steps[y], - (x_steps[x+1] - x_steps[x]), - (y_steps[y+1] - y_steps[y]), - crop_row[0][x], - pasture_row[0][x], - )) + with ( + yg.read_raster(crop_adjustment_path) as crop_diff, + yg.read_raster(pasture_adjustment_path) as pasture_diff + ): + assert crop_diff.window == pasture_diff.window + diff_dimensions = crop_diff.window.xsize, crop_diff.window.ysize + + x_scale = current_dimensions[0] / diff_dimensions[0] + y_scale = current_dimensions[1] / diff_dimensions[1] + + x_steps = [round(i * x_scale) for i in range(diff_dimensions[0])] + x_steps.append(current_dimensions[0]) + y_steps = [round(i * y_scale) for i in range(diff_dimensions[1])] + y_steps.append(current_dimensions[1]) + + for y in range(crop_diff.window.ysize): + crop_row = crop_diff.read_array(0, y, crop_diff.window.xsize, 1) + pasture_row = pasture_diff.read_array(0, y, pasture_diff.window.xsize, 1) + for x in range(crop_diff.window.xsize): + tiles.append(TileInfo( + x_steps[x], + y_steps[y], + (x_steps[x+1] - x_steps[x]), + (y_steps[y+1] - y_steps[y]), + crop_row[0][x], + pasture_row[0][x], + )) return tiles def assemble_map( @@ -209,7 +214,6 @@ def make_food_current_map( # we need to adjust the ulimit, which is quite low by default _, max_fd_limit = resource.getrlimit(resource.RLIMIT_NOFILE) resource.setrlimit(resource.RLIMIT_NOFILE, (max_fd_limit, max_fd_limit)) - print(f"Set fd limit to {max_fd_limit}") os.makedirs(output_path.parent, exist_ok=True) @@ -256,6 +260,15 @@ def make_food_current_map( processes.remove(candidate) time.sleep(0.1) +@snakemake_compatible(mapping={ + "current_lvl1_path": "input.jung", + "pnv_path": "input.pnv", + "crop_adjustment_path": "input.crop_diff", + "pasture_adjustment_path": "input.pasture_diff", + "seed": "params.seed", + "processes_count": "threads", + "output_path": "output.raster", +}) def main() -> None: parser = argparse.ArgumentParser(description="Build the food current map") parser.add_argument( @@ -304,7 +317,7 @@ def main() -> None: "-j", type=int, required=False, - default=round(cpu_count() / 1), + default=cpu_count() // 2, dest="processes_count", help="Number of concurrent threads to use." ) diff --git a/requirements.txt b/requirements.txt index 0da34ab..6030a2d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -20,6 +20,12 @@ gdal[numpy] git+https://github.com/quantifyearth/iucn_modlib.git git+https://github.com/quantifyearth/seasonality + +# Snakemake workflow management +snakemake>=8.0 +# We require a bug fix on this package that isn't yet upstream +snakemake-argparse-bridge @ git+https://github.com/mdales/snakemake-argparse-bridge@77ac272d0bc22c370aeb9d1670882fd9dac9b098 + # developer requirements pytest pylint @@ -28,3 +34,4 @@ pandas-stubs types-requests yirgacheffe[dev] aoh[dev] +snakefmt diff --git a/utils/raster_diff.py b/utils/raster_diff.py index d61055a..6c52f9b 100644 --- a/utils/raster_diff.py +++ b/utils/raster_diff.py @@ -3,6 +3,7 @@ from pathlib import Path import yirgacheffe as yg +from snakemake_argparse_bridge import snakemake_compatible # type: ignore def raster_diff( raster_a_path: Path, @@ -10,12 +11,18 @@ def raster_diff( output_path: Path, ) -> None: os.makedirs(output_path.parent, exist_ok=True) + with ( + yg.read_raster(raster_a_path) as raster_a, + yg.read_raster(raster_b_path) as raster_b + ): + result = raster_a - raster_b + result.to_geotiff(output_path, parallelism=True) - with yg.read_raster(raster_a_path) as raster_a: - with yg.read_raster(raster_b_path) as raster_b: - result = raster_a - raster_b - result.to_geotiff(output_path, parallelism=True) - +@snakemake_compatible(mapping={ + "raster_a_path": "input.raster_a", + "raster_b_path": "input.raster_b", + "output_path": "output.diff", +}) def main() -> None: parser = argparse.ArgumentParser(description="Calculates the difference between two rasters") parser.add_argument( @@ -30,13 +37,13 @@ def main() -> None: type=Path, required=True, dest="raster_b_path", - help="Right hands side of the difference" + help="Right hand side of the difference" ) parser.add_argument( "--output", type=Path, required=True, - dest="output_filename", + dest="output_path", help="Destination geotiff file for results." ) args = parser.parse_args() @@ -44,7 +51,7 @@ def main() -> None: raster_diff( args.raster_a_path, args.raster_b_path, - args.output_filename, + args.output_path, ) if __name__ == "__main__": From aa019407cf7dfe05318a11bfa2848828faab43e7 Mon Sep 17 00:00:00 2001 From: Michael Dales Date: Tue, 10 Feb 2026 16:04:05 +0000 Subject: [PATCH 03/27] WiP: fractional food map but hitting alignment with diff issues --- prepare_layers/make_arable_map.py | 24 +- prepare_layers/make_current_map.py | 54 +++- prepare_layers/make_food_current_map.py | 298 +++++++++++------ tests/test_food_layer.py | 406 +++++++++++++----------- workflow/rules/habitat.smk | 2 +- workflow/rules/prepare.smk | 7 +- 6 files changed, 486 insertions(+), 305 deletions(-) diff --git a/prepare_layers/make_arable_map.py b/prepare_layers/make_arable_map.py index ab7ca93..aeda289 100644 --- a/prepare_layers/make_arable_map.py +++ b/prepare_layers/make_arable_map.py @@ -2,9 +2,8 @@ from pathlib import Path from typing import Optional -import yirgacheffe.operators as yo +import yirgacheffe as yg from alive_progress import alive_bar -from yirgacheffe.layers import RasterLayer from osgeo import gdal gdal.SetCacheMax(1 * 1024 * 1024 * 1024) @@ -18,20 +17,13 @@ def make_arable_map( concurrency: Optional[int], show_progress: bool, ) -> None: - with RasterLayer.layer_from_file(current_path) as current: - - arable_map = yo.where(current != JUNG_URBAN_CODE, JUNG_ARABLE_CODE, JUNG_URBAN_CODE) - - with RasterLayer.empty_raster_layer_like( - arable_map, - filename=output_path, - threads=16 - ) as result: - if show_progress: - with alive_bar(manual=True) as bar: - arable_map.parallel_save(result, callback=bar, parallelism=concurrency) - else: - arable_map.parallel_save(result, parallelism=concurrency) + with yg.read_raster(current_path) as current: + arable_map = yg.where(current != JUNG_URBAN_CODE, JUNG_ARABLE_CODE, JUNG_URBAN_CODE) + if show_progress: + with alive_bar(manual=True) as bar: + arable_map.to_geotiff(output_path, callback=bar, parallelism=concurrency) + else: + arable_map.to_geotiff(output_path, parallelism=concurrency) def main() -> None: parser = argparse.ArgumentParser(description="Generate the arable scenario map.") diff --git a/prepare_layers/make_current_map.py b/prepare_layers/make_current_map.py index 74824ce..e7ea9bc 100644 --- a/prepare_layers/make_current_map.py +++ b/prepare_layers/make_current_map.py @@ -1,5 +1,6 @@ import argparse import itertools +import os from pathlib import Path from multiprocessing import set_start_method from typing import Dict, List, Optional @@ -29,15 +30,17 @@ def load_crosswalk_table(table_file_name: Path) -> Dict[str,List[int]]: result[row.code] = [int(row.value)] return result - -def make_current_map( +def make_current_maps( jung_path: Path, update_masks_path: Optional[Path], crosswalk_path: Path, - output_path: Path, + output_dir_path: Path, concurrency: Optional[int], show_progress: bool, + sentinel_path: Path | None, ) -> None: + os.makedirs(output_dir_path, exist_ok=True) + print(f"Using {concurrency} workers") if update_masks_path is not None: update_masks = [ @@ -61,18 +64,38 @@ def make_current_map( (yg.floor(updated_jung / 100) * 100).astype(yg.DataType.UInt16), ) - if show_progress: + print("Calculating unique land cover types...") + # vals = current_map.unique() + vals = [100, 200, 300, 400, 500, 600, 800] + map_preserve_code + + for lcc in vals: + print(f"Processing {lcc}...") + per_class = current_map == lcc + cast_per_class = per_class.astype(yg.DataType.Float32) + cast_per_class.pretty_print() with alive_bar(manual=True) as bar: - current_map.to_geotiff(output_path, callback=bar, parallelism=concurrency) - else: - current_map.to_geotiff(output_path, parallelism=concurrency) + cast_per_class.to_geotiff( + output_dir_path / f"lcc_{lcc}.tif", + callback=bar, + parallelism=concurrency, + nodata=0.0, + sparse=True, + ) + + # This script generates a bunch of rasters, but snakemake needs one + # output to say when this is done, so if we're in snakemake mode we touch a sentinel file to + # let it know we've done. One day this should be another decorator. + if sentinel_path is not None: + os.makedirs(sentinel_path.parent, exist_ok=True) + sentinel_path.touch() @snakemake_compatible(mapping={ "jung_path": "input.habitat", "update_masks_path": "params.updates_dir", "crosswalk_path": "input.crosswalk", "concurrency": "threads", - "results_path": "output.current", + "output_dir_path": "params.output_dir", + "sentinel_path": "output.sentinel", }) def main() -> None: set_start_method("spawn") @@ -104,7 +127,7 @@ def main() -> None: type=Path, help='Path where final map should be stored', required=True, - dest='results_path', + dest='output_dir_path', ) parser.add_argument( '-j', @@ -122,15 +145,24 @@ def main() -> None: action='store_true', dest='show_progress', ) + parser.add_argument( + '--sentinel', + type=Path, + help='Generate a sentinel file on completion for snakemake to track', + required=False, + default=None, + dest='sentinel_path', + ) args = parser.parse_args() - make_current_map( + make_current_maps( args.jung_path, args.update_masks_path, args.crosswalk_path, - args.results_path, + args.output_dir_path, args.concurrency, args.show_progress, + args.sentinel_path, ) if __name__ == "__main__": diff --git a/prepare_layers/make_food_current_map.py b/prepare_layers/make_food_current_map.py index 74225ef..8de3e56 100644 --- a/prepare_layers/make_food_current_map.py +++ b/prepare_layers/make_food_current_map.py @@ -1,5 +1,4 @@ import argparse -import math import os import resource import sys @@ -23,6 +22,9 @@ # Codes not to touch. We're currently working at Level 1 except for artificial which is level 2 PRESERVE_CODES = [600, 700, 900, 1000, 1100, 1200, 1300, 1405] +# PNV codes +# array([ 100, 200, 300, 400, 500, 600, 800, 900, 1100, 1200], dtype=uint16) + class TileInfo(NamedTuple): """Info about a tile to process""" x_position : int @@ -32,61 +34,169 @@ class TileInfo(NamedTuple): crop_diff : float pasture_diff : float +def balance_crop_and_pasture_differences( + crop_diff: float, + pasture_diff: float, + lcc_data_map: dict[int,np.ndarray], +) -> tuple[float,float]: + """ + If we remove one time of agricultural land but expand another, keep them in the same area where possible. + + One thing we know is that reductions are always achievable, as the difference is generated as + (GAEZ and HYDE - Jung), so if we have a negative value the initial state is enough to cover that. + """ + # Either both are a reduction or both an increase, or at least one is null, so no + # balancing required. + if crop_diff * pasture_diff >= 0: + return crop_diff, pasture_diff + + # If balanced they will cancel each other out, otherwise + # we will move the smaller difference from one to the other. + transfer_amount = min(abs(crop_diff), abs(pasture_diff)) + + if crop_diff > 0: + # Crop increasing, pasture decreasing + src_lcc, dst_lcc = PASTURE_CODE, CROP_CODE + else: + # Pasture increasing, crop decreasing + src_lcc, dst_lcc = CROP_CODE, PASTURE_CODE + + src_raster = lcc_data_map[src_lcc] + dst_raster = lcc_data_map[dst_lcc] + + total_cells = src_raster.size + + transfer_mask = src_raster > 0 + src_cells = src_raster.sum() + if src_cells == 0: + if crop_diff > 0: + raise ValueError(f"not cells in pasture {src_raster.sum()}, but pasture diff is -ve {pasture_diff}") + else: + raise ValueError(f"not cells in crop {src_raster.sum()}, but crop diff is -ve {crop_diff}") + + src_coverage = src_raster.sum() / total_cells + + # Per-cell reduction factor: what fraction of each cell's current value to move + # This is safe because our simplifying assumption guarantees transfer <= source_coverage + per_cell_factor = transfer_amount / src_coverage + + transferred = transfer_mask * per_cell_factor + src_raster -= transferred + dst_raster += transferred + + new_crop_diff = crop_diff + transfer_amount * np.sign(pasture_diff) + new_pasture_diff = pasture_diff + transfer_amount * np.sign(crop_diff) + return new_crop_diff, new_pasture_diff + +def remove_land_cover( + lcc_code: int, + diff: float, + pnv: np.ndarray, + lcc_data_map: dict[int,np.ndarray], +) -> None: + assert diff <= 0 + diff = abs(diff) + + agri_raster = lcc_data_map[lcc_code] + removal_mask = agri_raster > 0 + + current_coverage = agri_raster.sum() / agri_raster.size + if current_coverage == 0: + return + + per_cell_fraction = min(diff / current_coverage, 1.0) + + removed_grid = agri_raster * (removal_mask * per_cell_fraction) + agri_raster -= removed_grid + + # Reallocate to PNV classes - note we assume this does not include the agricultural classes + # so as to not undo what we just did! + for lcc, lcc_data in lcc_data_map.items(): + pnv_match = (pnv == lcc) & removal_mask + lcc_data[pnv_match] += removed_grid[pnv_match] + +def add_land_cover( + lcc_code: int, + diff: float, + lcc_data_map: dict[int,np.ndarray], +) -> None: + assert diff >= 0 + + agri_raster = lcc_data_map[lcc_code] + + # Find areas we can put the new data. This is anywhere we don't already + # have agricultural land, and other places unlikely to be converted (cities, lakes, etc.) + # We know that there should be no partial cells involving crop/pasture at this stage + # because of the balancing we did initially. + excluded_codes = [CROP_CODE, PASTURE_CODE] + PRESERVE_CODES + eligible_mask = np.ones_like(agri_raster, dtype=bool) + for excluded_code in excluded_codes: + if excluded_code in lcc_data_map: + eligible_mask &= (lcc_data_map[excluded_code] == 0) + + eligible_count = eligible_mask.sum() + if eligible_count == 0: + return + + # Calculate capacity + total_cells = eligible_mask.size + eligible_fraction = eligible_count / total_cells + + if eligible_fraction == 0: + return + + per_cell_addition = diff / eligible_fraction + per_cell_addition = min(per_cell_addition, 1.0) + + agri_raster[eligible_mask] += per_cell_addition + for lcc, lcc_data in lcc_data_map.items(): + if lcc == lcc_code: + continue + lcc_data[eligible_mask & (lcc_data > 0)] -= per_cell_addition + def process_tile( - current: yg.layers.RasterLayer, + current_maps: dict[int,yg.layers.RasterLayer], pnv: yg.layers.RasterLayer, tile: TileInfo, - random_seed: int, ) -> np.ndarray: - rng = np.random.default_rng(random_seed) + for current in current_maps.values(): + assert current.map_projection == pnv.map_projection + assert current.area == pnv.area - data = current.read_array(tile.x_position, tile.y_position, tile.width, tile.height) + lcc_data_map = { + lcc: raster.nan_to_num().read_array(tile.x_position, tile.y_position, tile.width, tile.height) + for lcc, raster in current_maps.items() + } + pnv_data = None + crop_diff, pasture_diff = balance_crop_and_pasture_differences( + tile.crop_diff, + tile.pasture_diff, + lcc_data_map, + ) + + # Order the changes by removals first then additions. In random sampling this was + # important, but not so with a fractional approach. However, we need some consistent + # ordering so we leave this in for consistency. diffs = [ - (tile.crop_diff, CROP_CODE), - (tile.pasture_diff, PASTURE_CODE), + (crop_diff, CROP_CODE), + (pasture_diff, PASTURE_CODE), ] diffs.sort(key=lambda a: a[0]) - # Ordered so removes will come first for diff_value, habitat_code in diffs: - if np.isnan(diff_value): - continue - required_points = math.floor(data.shape[0] * data.shape[1] * math.fabs(diff_value)) - if required_points == 0: - continue - - if diff_value > 0: - valid_mask = ~np.isin(data, [CROP_CODE, PASTURE_CODE] + PRESERVE_CODES) - else: - valid_mask = data == habitat_code - - valid_locations = valid_mask.nonzero() - possible_points = len(valid_locations[0]) - if possible_points == 0: + if diff_value == 0 or np.isnan(diff_value): continue - required_points = min(required_points, possible_points) - selected_locations = rng.choice( - len(valid_locations[0]), - size=required_points, - replace=False - ) - rows = valid_locations[0][selected_locations] - cols = valid_locations[1][selected_locations] - if diff_value > 0: - data[rows, cols] = habitat_code + if diff_value < 0: + if pnv is None: + pnv = pnv.read_array(tile.x_position, tile.y_position, tile.width, tile.height) + remove_land_cover(habitat_code, diff_value, pnv_data, lcc_data_map) else: - for y, x in zip(rows, cols): - absolute_x = x + tile.x_position - absolute_y = y + tile.y_position - lat, lng = current.latlng_for_pixel(absolute_x, absolute_y) - pnv_x, pnv_y = pnv.pixel_for_latlng(lat, lng) - val = pnv.read_array(pnv_x, pnv_y, 1, 1)[0][0] - data[y][x] = val + add_land_cover(habitat_code, diff_value, lcc_data_map) - return data + return lcc_data_map def process_tile_concurrently( current_lvl1_path: Path, @@ -94,20 +204,21 @@ def process_tile_concurrently( input_queue: Queue, result_queue: Queue, ) -> None: - with ( - yg.read_raster(current_lvl1_path) as current, - yg.read_raster(pnv_path) as pnv - ): + current_maps = { + int(filename.stem.split('_')[1]): yg.read_raster(filename) for filename in current_lvl1_path.glob("lcc_*.tif") + } + reference_layer = next(iter(current_maps.values())) + with yg.read_raster_like(pnv_path, reference_layer, yg.ResamplingMethod.Nearest) as pnv: + pnv.set_window_for_intersection(reference_layer.area) while True: - job : tuple[TileInfo, int] | None = input_queue.get() - if job is None: + tile : TileInfo | None = input_queue.get() + if tile is None: break - tile, seed = job - if np.isnan(tile.crop_diff) and np.isnan(tile.pasture_diff): + if (np.isnan(tile.crop_diff) or tile.crop_diff == 0) and (np.isnan(tile.pasture_diff) or tile.pasture_diff == 0): result_queue.put((tile, None)) else: - data = process_tile(current, pnv, tile, seed) - result_queue.put((tile, data.tobytes())) + res = process_tile(current_maps, pnv, tile) + result_queue.put((tile, {lcc: data.tobytes() for lcc, data in res.items() })) result_queue.put(None) @@ -117,7 +228,8 @@ def build_tile_list( pasture_adjustment_path: Path, ) -> list[TileInfo]: tiles = [] - with yg.read_raster(current_lvl1_path) as current: + + with yg.read_rasters(current_lvl1_path.glob("*.tif")) as current: current_dimensions = current.window.xsize, current.window.ysize with ( yg.read_raster(crop_adjustment_path) as crop_diff, @@ -154,29 +266,47 @@ def assemble_map( result_queue: Queue, sentinal_count: int, ) -> None: + os.makedirs(output_path, exist_ok=True) + + original_filenames = list(current_lvl1_path.glob("lcc_*.tif")) + current_maps = { + int(filename.stem.split('_')[1]): yg.read_raster(filename).nan_to_num() for filename in original_filenames + } + new_maps = {} + with yg.read_raster(original_filenames[0]) as example: + for filename in original_filenames: + lcc = int(filename.stem.split('_')[1]) + outputname = output_path / f"lcc_{lcc}.tif" + new_maps[lcc] = RasterLayer.empty_raster_layer_like( + example, + filename=outputname, + nodata=0.0, + threads=16, + sparse=True, + ) + dtype = example.read_array(0, 0, 1, 1).dtype + + count = 0 + while True: + result : tuple[TileInfo,bytearray | None] | None = result_queue.get() + if result is None: + sentinal_count -= 1 + if sentinal_count == 0: + break + continue - with yg.read_raster(current_lvl1_path) as current: - dtype = current.read_array(0, 0, 1, 1).dtype - with RasterLayer.empty_raster_layer_like(current, filename=output_path) as output: - - # A cheat as we don't have a neat API for this on yirgacheffe yet - band = output._dataset.GetRasterBand(1) # pylint: disable=W0212 - - while True: - result : tuple[TileInfo,bytearray | None] | None = result_queue.get() - if result is None: - sentinal_count -= 1 - if sentinal_count == 0: - break - continue - - tile, rawdata = result - if rawdata is None: - data = current.read_array(tile.x_position, tile.y_position, tile.width, tile.height) - else: - n = np.frombuffer(rawdata, dtype=dtype) - data = np.reshape(n, (tile.height, tile.width)) - + count += 1 + tile, rawdata = result + if rawdata is None: + for lcc in current_maps.keys(): + data = current_maps[lcc].read_array(tile.x_position, tile.y_position, tile.width, tile.height) + band = new_maps[lcc]._dataset.GetRasterBand(1) + band.WriteArray(data, tile.x_position, tile.y_position) + else: + for lcc in current_maps.keys(): + n = np.frombuffer(rawdata[lcc], dtype=dtype) + data = np.reshape(n, (tile.height, tile.width)) + band = new_maps[lcc]._dataset.GetRasterBand(1) band.WriteArray(data, tile.x_position, tile.y_position) @@ -186,18 +316,15 @@ def pipeline_source( pasture_adjustment_path: Path, source_queue: Queue, sentinal_count: int, - random_seed: int, ) -> None: - rng = np.random.default_rng(random_seed) - tiles = build_tile_list( current_lvl1_path, crop_adjustment_path, pasture_adjustment_path, ) - seeds = rng.integers(2**63, size=len(tiles)) - for tile, seed in zip(tiles, seeds): - source_queue.put((tile, seed)) + print(f"There are {len(tiles)} tiles") + for tile in tiles: + source_queue.put(tile) for _ in range(sentinal_count): source_queue.put(None) @@ -206,7 +333,6 @@ def make_food_current_map( pnv_path: Path, crop_adjustment_path: Path, pasture_adjustment_path: Path, - random_seed: int, output_path: Path, processes_count: int, ) -> None: @@ -236,7 +362,6 @@ def make_food_current_map( pasture_adjustment_path, source_queue, processes_count, - random_seed, )) source_worker.start() @@ -265,7 +390,6 @@ def make_food_current_map( "pnv_path": "input.pnv", "crop_adjustment_path": "input.crop_diff", "pasture_adjustment_path": "input.pasture_diff", - "seed": "params.seed", "processes_count": "threads", "output_path": "output.raster", }) @@ -275,7 +399,7 @@ def main() -> None: "--current_lvl1", type=Path, required=True, - help="Path of lvl1 current map", + help="Path of lvl1 current maps", dest="current_lvl1_path", ) parser.add_argument( @@ -299,13 +423,6 @@ def main() -> None: help="Path of adjustment for pasture diff", dest="pasture_adjustment_path", ) - parser.add_argument( - "--seed", - type=int, - required=True, - help="Seed the random number generator", - dest="seed", - ) parser.add_argument( '--output', type=Path, @@ -328,7 +445,6 @@ def main() -> None: args.pnv_path, args.crop_adjustment_path, args.pasture_adjustment_path, - args.seed, args.output_path, args.processes_count, ) diff --git a/tests/test_food_layer.py b/tests/test_food_layer.py index 43edba9..07a871a 100644 --- a/tests/test_food_layer.py +++ b/tests/test_food_layer.py @@ -6,191 +6,231 @@ from yirgacheffe.operators import DataType from yirgacheffe.window import Area, PixelScale -from prepare_layers.make_food_current_map import TileInfo, process_tile - -@pytest.mark.parametrize("initial,crop_diff,pasture_diff,expected", [ - (42, float("nan"), float("nan"), 42), - - # Other habitat replacement - (42, 1.0, float("nan"), 1401), - (42, float("nan"), 1.0, 1402), - (42, 0.0, float("nan"), 42), - (42, float("nan"), 0.0, 42), - (42, -1.0, float("nan"), 42), - (42, float("nan"), -1.0, 42), - - # Crop replacement - (1401, 1.0, float("nan"), 1401), - (1401, float("nan"), 1.0, 1401), - (1401, 0.0, float("nan"), 1401), - (1401, float("nan"), 0.0, 1401), - (1401, -1.0, float("nan"), 31), - (1401, -1.0, 1.0, 1402), - (1401, float("nan"), -1.0, 1401), - - # Pasture replacement - (1402, 1.0, float("nan"), 1402), - (1402, 1.0, float("nan"), 1402), - (1402, float("nan"), 0.0, 1402), - (1402, 0.0, float("nan"), 1402), - (1402, float("nan"), -1.0, 31), - (1402, 1.0, -1.0, 1401), - (1402, -1.0, float("nan"), 1402), - - # Don't touch urban - (1405, 1.0, float("nan"), 1405), - (1405, float("nan"), 1.0, 1405), - (1405, 0.0, float("nan"), 1405), - (1405, float("nan"), 0.0, 1405), - (1405, -1.0, float("nan"), 1405), - (1405, -1.0, 1.0, 1405), - (1405, float("nan"), -1.0, 1405), - +from prepare_layers.make_food_current_map import TileInfo, process_tile, balance_crop_and_pasture_differences, \ + CROP_CODE, PASTURE_CODE, remove_land_cover, add_land_cover + +@pytest.mark.parametrize("initial_crop_diff,initial_pasture_diff,expected_crop_diff,expected_pasture_diff", [ + (0.0, 0.0, 0.0, 0.0), + (0.8, 0.0, 0.8, 0.0), + (-0.8, 0.0, -0.8, 0.0), + (0.0, 0.8, 0.0, 0.8), + (0.0, -0.8, 0.0, -0.8), + (0.4, 0.2, 0.4, 0.2), + (-0.4, -0.2, -0.4, -0.2), +]) +def test_balance_not_needed( + initial_crop_diff: float, + initial_pasture_diff: float, + expected_crop_diff: float, + expected_pasture_diff: float, +) -> None: + result_crop_diff, result_pasture_diff = balance_crop_and_pasture_differences( + initial_crop_diff, + initial_pasture_diff, + dict(), + ) + assert expected_crop_diff == result_crop_diff + assert expected_pasture_diff == result_pasture_diff + + +def test_one_sided_balance() -> None: + lcc_data_map = { + CROP_CODE: np.zeros((10, 10)), + PASTURE_CODE: np.zeros((10, 10)), + } + result_crop_diff, result_pasture_diff = balance_crop_and_pasture_differences( + 0.5, + -0.1, + lcc_data_map, + ) + # assert expected_crop_diff == result_crop_diff + # assert expected_pasture_diff == result_pasture_diff + # assert (lcc_data_map[CROP_CODE] == crop_cell_value).all() + # assert (lcc_data_map[PASTURE_CODE] == pasture_cell_value).all() + + +@pytest.mark.parametrize( + "initial_crop_diff,initial_pasture_diff,expected_crop_diff,expected_pasture_diff,crop_cell_value,pasture_cell_value", + [ + (0.25, -0.5, 0.0, -0.25, 0.25, 0.75), + ] +) +def test_simple_balance_transfer( + initial_crop_diff: float, + initial_pasture_diff: float, + expected_crop_diff: float, + expected_pasture_diff: float, + crop_cell_value: float, + pasture_cell_value: float, +) -> None: + # 0% crop, 100% pasture + lcc_data_map = { + CROP_CODE: np.zeros((10, 10)), + PASTURE_CODE: np.ones((10, 10)), + } + result_crop_diff, result_pasture_diff = balance_crop_and_pasture_differences( + initial_crop_diff, + initial_pasture_diff, + lcc_data_map, + ) + assert expected_crop_diff == result_crop_diff + assert expected_pasture_diff == result_pasture_diff + assert (lcc_data_map[CROP_CODE] == crop_cell_value).all() + assert (lcc_data_map[PASTURE_CODE] == pasture_cell_value).all() + + +@pytest.mark.parametrize( + "initial_crop_diff,initial_pasture_diff,expected_crop_diff,expected_pasture_diff,crop_cell_value,pasture_cell_value", + [ + (0.25, -0.5, 0.0, -0.25, 0.5, 0.5), + (0.5, -0.25, 0.25, 0.0, 0.5, 0.5), + ] +) +def test_simple_half_balance_transfer( + initial_crop_diff: float, + initial_pasture_diff: float, + expected_crop_diff: float, + expected_pasture_diff: float, + crop_cell_value: float, + pasture_cell_value: float, +) -> None: + # 0% crop, 50% pasture + lcc_data_map = { + CROP_CODE: np.zeros((10, 10)), + PASTURE_CODE: np.array([[i % 2] * 10 for i in range(10)]).astype(float), + } + result_crop_diff, result_pasture_diff = balance_crop_and_pasture_differences( + initial_crop_diff, + initial_pasture_diff, + lcc_data_map, + ) + assert expected_crop_diff == result_crop_diff + assert expected_pasture_diff == result_pasture_diff + + expected_crop_map = np.array([[i % 2] * 10 for i in range(10)]).astype(float) * crop_cell_value + expected_pasture_map = np.array([[i % 2] * 10 for i in range(10)]).astype(float) * pasture_cell_value + assert (expected_crop_map == lcc_data_map[CROP_CODE]).all() + assert (expected_pasture_map == lcc_data_map[PASTURE_CODE]).all() + + +@pytest.mark.parametrize("crop_diff,expected_crop_cell,expected_other_cell", [ + (-0.5, 0.5, 0.5), +]) +def test_remove_land_simple( + crop_diff: float, + expected_crop_cell: float, + expected_other_cell: float, +) -> None: + # 100% crop, 0% other 1 + lcc_data_map = { + 1: np.zeros((10, 10)), + CROP_CODE: np.ones((10, 10)), + } + pnv_data = np.full((10, 10), 1) + + remove_land_cover( + CROP_CODE, + crop_diff, + pnv_data, + lcc_data_map, + ) + + expected_crop_map = np.full((10, 10), expected_crop_cell) + expected_other_map = np.full((10, 10), expected_other_cell) + assert (expected_crop_map == lcc_data_map[CROP_CODE]).all() + assert (expected_other_map == lcc_data_map[1]).all() + + +@pytest.mark.parametrize("crop_diff,pnv_value,expected_crop_cell,expected_other_1_cell,expected_other_2_cell", [ + (-0.5, 1, 0.0, 1.0, 0.0), + (-0.75, 1, 0.0, 1.0, 0.0), # too much + (-0.25, 1, 0.5, 0.5, 0.0), + (-0.5, 2, 0.0, 0.0, 1.0), + (-0.25, 2, 0.5, 0.0, 0.5), ]) -def test_process_tile_all(initial, crop_diff, pasture_diff, expected) -> None: - with RasterLayer.empty_raster_layer( - Area(-180, 90, 180, -90), - PixelScale(1.0, -1.0), - datatype=DataType.Int16, - ) as current: - yg.constant(initial).save(current) - with RasterLayer.empty_raster_layer( - Area(-180, 90, 180, -90), - PixelScale(1.0, -1.0), - datatype=DataType.Int16, - ) as pnv: - yg.constant(31).save(pnv) - - test_tile = TileInfo( - x_position=0, - y_position=0, - width=10, - height=12, - crop_diff=crop_diff, - pasture_diff=pasture_diff, - ) - - result = process_tile(current, pnv, test_tile, 42) - expected = np.full((12, 10), expected, dtype=np.int16) - assert (result == expected).all() - -@pytest.mark.parametrize("initial,crop_diff,pasture_diff,expected_crop_count,expected_pasture_count,expected_pnv_count", [ # pylint:disable=C0301 - (42, float("nan"), float("nan"), 0, 0, 0), - (42, 0.5, float("nan"), 50, 0, 0), - (42, float("nan"), 0.5, 0, 50, 0), - (42, -0.5, float("nan"), 0, 0, 0), - (42, float("nan"), -0.5, 0, 0, 0), - - (1401, float("nan"), float("nan"), 100, 0, 0), - (1401, -0.5, float("nan"), 50, 0, 50), - (1401, float("nan"), -0.5, 100, 0, 0), - - (1402, float("nan"), float("nan"), 0, 100, 0), - (1402, float("nan"), -0.5, 0, 50, 50), - (1402, -0.5, float("nan"), 0, 100, 0), +def test_remove_land_less_simple( + crop_diff: float, + pnv_value: int, + expected_crop_cell: float, + expected_other_1_cell: float, + expected_other_2_cell: float, +) -> None: + # 50% crop, 50% other 2, 0% other 1 + lcc_data_map = { + 1: np.zeros((10, 10)), + 2: np.array([[(i + 1) % 2] * 10 for i in range(10)]).astype(float), + CROP_CODE: np.array([[i % 2] * 10 for i in range(10)]).astype(float), + } + pnv_data = np.full((10, 10), pnv_value) + + remove_land_cover( + CROP_CODE, + crop_diff, + pnv_data, + lcc_data_map, + ) + + expected_crop_map = np.array([[i % 2] * 10 for i in range(10)]).astype(float) * expected_crop_cell + assert (expected_crop_map == lcc_data_map[CROP_CODE]).all() + expected_other_1_map = np.array([[i % 2] * 10 for i in range(10)]).astype(float) * expected_other_1_cell + assert (expected_other_1_map == lcc_data_map[1]).all() + expected_other_2_map = np.array([[(i + 1) % 2] * 10 for i in range(10)]).astype(float) + \ + (np.array([[i % 2] * 10 for i in range(10)]).astype(float) * expected_other_2_cell) + assert (expected_other_2_map == lcc_data_map[2]).all() + +@pytest.mark.parametrize("crop_diff,expected_crop_cell,expected_other_cell", [ + (0.5, 0.5, 0.5), + (1.0, 1.0, 0.0), ]) -def test_partial_replacement( - initial : int, - crop_diff : float, - pasture_diff : float, - expected_crop_count : int, - expected_pasture_count : int, - expected_pnv_count : int, +def test_add_land_simple( + crop_diff: float, + expected_crop_cell: float, + expected_other_cell: float, ) -> None: - with RasterLayer.empty_raster_layer( - Area(-180, 90, 180, -90), - PixelScale(1.0, -1.0), - datatype=DataType.Int16, - ) as current: - yg.constant(initial).save(current) - with RasterLayer.empty_raster_layer( - Area(-180, 90, 180, -90), - PixelScale(1.0, -1.0), - datatype=DataType.Int16, - ) as pnv: - yg.constant(31).save(pnv) - - test_tile = TileInfo( - x_position=0, - y_position=0, - width=10, - height=10, - crop_diff=crop_diff, - pasture_diff=pasture_diff, - ) - - result = process_tile(current, pnv, test_tile, 42) - crop_count = (result == 1401).sum() - assert crop_count == expected_crop_count - pasture_count = (result == 1402).sum() - assert pasture_count == expected_pasture_count - pnv_count = (result == 31).sum() - assert pnv_count == expected_pnv_count - -@pytest.mark.parametrize("initial,crop_diff,pasture_diff,expected_crop_count,expected_pasture_count,expected_pnv_count", [ # pylint:disable=C0301 - (42, float("nan"), float("nan"), 0, 0, 0), - (42, 0.5, float("nan"), 50, 0, 0), - (42, float("nan"), 0.5, 0, 50, 0), - (42, -0.5, float("nan"), 0, 0, 0), - (42, float("nan"), -0.5, 0, 0, 0), - - (1401, float("nan"), float("nan"), 50, 0, 0), - (1401, 1.0, float("nan"), 100, 0, 0), - (1401, 0.5, float("nan"), 100, 0, 0), - (1401, 0.1, float("nan"), 60, 0, 0), - (1401, -0.1, float("nan"), 40, 0, 10), - (1401, -0.5, float("nan"), 0, 0, 50), - (1401, -1.0, float("nan"), 0, 0, 50), - (1401, float("nan"), 1.0, 50, 50, 0), - - (1405, float("nan"), float("nan"), 0, 0, 0), - (1405, 1.0, float("nan"), 50, 0, 0), - (1405, 0.5, float("nan"), 50, 0, 0), - (1405, 0.1, float("nan"), 10, 0, 0), - (1405, -0.1, float("nan"), 0, 0, 0), - (1405, -0.5, float("nan"), 0, 0, 0), - (1405, -1.0, float("nan"), 0, 0, 0), + # 100% crop, 0% other 1 + lcc_data_map = { + 1: np.ones((10, 10)), + CROP_CODE: np.zeros((10, 10)), + } + pnv_data = np.full((10, 10), 1) + + add_land_cover( + CROP_CODE, + crop_diff, + lcc_data_map, + ) + + expected_crop_map = np.full((10, 10), expected_crop_cell) + expected_other_map = np.full((10, 10), expected_other_cell) + assert (expected_crop_map == lcc_data_map[CROP_CODE]).all() + assert (expected_other_map == lcc_data_map[1]).all() + +@pytest.mark.parametrize("crop_diff,expected_crop_cell,expected_other_cell", [ + (0.25, 0.5, 0.5), + (0.5, 1.0, 0.0), ]) -def test_partial_initial_tile( - initial : int, - crop_diff : float, - pasture_diff : float, - expected_crop_count : int, - expected_pasture_count : int, - expected_pnv_count : int, +def test_add_land_avoid_excluded( + crop_diff: float, + expected_crop_cell: float, + expected_other_cell: float, ) -> None: - with RasterLayer.empty_raster_layer( - Area(-180, 90, 180, -90), - PixelScale(1.0, -1.0), - datatype=DataType.Int16, - ) as current: - - # Cheating as Yirgacheffe doesn't have a neat way to provide numpy data straight to a layer - band = current._dataset.GetRasterBand(1) - vals = np.array([[initial, 22], [22, initial]]) - all_vals = np.tile(vals, (90, 180)) - band.WriteArray(all_vals, 0, 0) - - with RasterLayer.empty_raster_layer( - Area(-180, 90, 180, -90), - PixelScale(1.0, -1.0), - datatype=DataType.Int16, - ) as pnv: - yg.constant(31).save(pnv) - - test_tile = TileInfo( - x_position=0, - y_position=0, - width=10, - height=10, - crop_diff=crop_diff, - pasture_diff=pasture_diff, - ) - - result = process_tile(current, pnv, test_tile, 42) - crop_count = (result == 1401).sum() - assert crop_count == expected_crop_count - pasture_count = (result == 1402).sum() - assert pasture_count == expected_pasture_count - pnv_count = (result == 31).sum() - assert pnv_count == expected_pnv_count + # 100% crop, 0% other 1 + lcc_data_map = { + 1: np.array([[(i + 1) % 2] * 10 for i in range(10)]).astype(float), + PASTURE_CODE: np.array([[i % 2] * 10 for i in range(10)]).astype(float), + CROP_CODE: np.zeros((10, 10)), + } + pnv_data = np.full((10, 10), 1) + + add_land_cover( + CROP_CODE, + crop_diff, + lcc_data_map, + ) + + expected_crop_map = np.array([[(i + 1) % 2] * 10 for i in range(10)]).astype(float) * expected_crop_cell + expected_pasture_map = np.array([[i % 2] * 10 for i in range(10)]).astype(float) # unchanged + expected_other_map = np.array([[(i + 1) % 2] * 10 for i in range(10)]).astype(float) * expected_other_cell + assert (expected_crop_map == lcc_data_map[CROP_CODE]).all() + assert (expected_pasture_map == lcc_data_map[PASTURE_CODE]).all() + assert (expected_other_map == lcc_data_map[1]).all() diff --git a/workflow/rules/habitat.smk b/workflow/rules/habitat.smk index c57ad79..6ef1031 100644 --- a/workflow/rules/habitat.smk +++ b/workflow/rules/habitat.smk @@ -194,7 +194,7 @@ rule project_jung_for_hyde_gaez: # ============================================================================= rule diff_crop: """ - Get the differencen betwee the jung data and the new crop data. + Get the difference between the jung data and the new crop data. """ input: raster_a=DATADIR / "food" / "crop.tif", diff --git a/workflow/rules/prepare.smk b/workflow/rules/prepare.smk index aa64ded..e389d27 100644 --- a/workflow/rules/prepare.smk +++ b/workflow/rules/prepare.smk @@ -63,7 +63,7 @@ rule jung_habitat_updates: touch {output.sentinel} """ -rule current_raw: +rule current_raws: """ Build the LIFE current map, which is Jung with updates applied and restricted to L1 to match the PNV map restrictions. @@ -73,9 +73,10 @@ rule current_raw: habitat=DATADIR / "habitat" / "jung_l2_raw.tif", crosswalk=DATADIR / "crosswalk.csv", params: - updates_dir=DATADIR / "habitat" / "lvl2_changemasks_ver004" + updates_dir=DATADIR / "habitat" / "lvl2_changemasks_ver004", + output_dir=DATADIR / "habitat" / "current_raw", output: - current=DATADIR / "habitat" / "current_raw.tif" + sentinel=DATADIR / "habitat" / "current_raw" / ".current_raw" threads: workflow.cores script: str(SRCDIR / "prepare_layers" / "make_current_map.py") From 8e6758bf55d6052799861dcc93927b4b68e6b1c9 Mon Sep 17 00:00:00 2001 From: Michael Dales Date: Tue, 10 Feb 2026 16:57:41 +0000 Subject: [PATCH 04/27] WiP: do crop diff on demand --- prepare_layers/make_food_current_map.py | 79 ++++++++++++++++--------- workflow/rules/habitat.smk | 4 +- 2 files changed, 54 insertions(+), 29 deletions(-) diff --git a/prepare_layers/make_food_current_map.py b/prepare_layers/make_food_current_map.py index 8de3e56..a96495d 100644 --- a/prepare_layers/make_food_current_map.py +++ b/prepare_layers/make_food_current_map.py @@ -31,8 +31,8 @@ class TileInfo(NamedTuple): y_position : int width : int height : int - crop_diff : float - pasture_diff : float + crop_target : float + pasture_target : float def balance_crop_and_pasture_differences( crop_diff: float, @@ -160,19 +160,42 @@ def process_tile( tile: TileInfo, ) -> np.ndarray: + if np.isnan(tile.crop_target) and np.isnan(tile.pasture_target): + return None + for current in current_maps.values(): assert current.map_projection == pnv.map_projection assert current.area == pnv.area + pnv_data = None + + if not np.isnan(tile.crop_target): + crop_data = current_maps[CROP_CODE].nan_to_num().read_array(tile.x_position, tile.y_position, tile.width, tile.height) + crop_diff = tile.crop_target - (crop_data.sum() / crop_data.size) + else: + crop_data = None + crop_diff = 0 + if not np.isnan(tile.pasture_target): + pasture_data = current_maps[PASTURE_CODE].nan_to_num().read_array(tile.x_position, tile.y_position, tile.width, tile.height) + pasture_diff = tile.pasture_target - (pasture_data.sum() / pasture_data.size) + else: + pasture_data = None + pasture_diff = 0 + if (crop_diff == 0) and (pasture_diff == 0): + return None + lcc_data_map = { - lcc: raster.nan_to_num().read_array(tile.x_position, tile.y_position, tile.width, tile.height) - for lcc, raster in current_maps.items() + CROP_CODE: crop_data if crop_data is not None else current_maps[CROP_CODE].nan_to_num().read_array(tile.x_position, tile.y_position, tile.width, tile.height), + PASTURE_CODE: pasture_data if pasture_data is not None else current_maps[PASTURE_CODE].nan_to_num().read_array(tile.x_position, tile.y_position, tile.width, tile.height), } - pnv_data = None + for lcc in current_maps: + if lcc in [CROP_CODE, PASTURE_CODE]: + continue + lcc_data_map[lcc] = current_maps[lcc].nan_to_num().read_array(tile.x_position, tile.y_position, tile.width, tile.height) crop_diff, pasture_diff = balance_crop_and_pasture_differences( - tile.crop_diff, - tile.pasture_diff, + crop_diff, + pasture_diff, lcc_data_map, ) @@ -214,11 +237,12 @@ def process_tile_concurrently( tile : TileInfo | None = input_queue.get() if tile is None: break - if (np.isnan(tile.crop_diff) or tile.crop_diff == 0) and (np.isnan(tile.pasture_diff) or tile.pasture_diff == 0): - result_queue.put((tile, None)) - else: - res = process_tile(current_maps, pnv, tile) + + res = process_tile(current_maps, pnv, tile) + if res is not None: result_queue.put((tile, {lcc: data.tobytes() for lcc, data in res.items() })) + else: + result_queue.put((tile, None)) result_queue.put(None) @@ -232,24 +256,24 @@ def build_tile_list( with yg.read_rasters(current_lvl1_path.glob("*.tif")) as current: current_dimensions = current.window.xsize, current.window.ysize with ( - yg.read_raster(crop_adjustment_path) as crop_diff, - yg.read_raster(pasture_adjustment_path) as pasture_diff + yg.read_raster(crop_adjustment_path) as crop, + yg.read_raster(pasture_adjustment_path) as pasture, ): - assert crop_diff.window == pasture_diff.window - diff_dimensions = crop_diff.window.xsize, crop_diff.window.ysize + assert crop.window == pasture.window + argi_dimensions = crop.window.xsize, crop.window.ysize - x_scale = current_dimensions[0] / diff_dimensions[0] - y_scale = current_dimensions[1] / diff_dimensions[1] + x_scale = current_dimensions[0] / argi_dimensions[0] + y_scale = current_dimensions[1] / argi_dimensions[1] - x_steps = [round(i * x_scale) for i in range(diff_dimensions[0])] + x_steps = [round(i * x_scale) for i in range(argi_dimensions[0])] x_steps.append(current_dimensions[0]) - y_steps = [round(i * y_scale) for i in range(diff_dimensions[1])] + y_steps = [round(i * y_scale) for i in range(argi_dimensions[1])] y_steps.append(current_dimensions[1]) - for y in range(crop_diff.window.ysize): - crop_row = crop_diff.read_array(0, y, crop_diff.window.xsize, 1) - pasture_row = pasture_diff.read_array(0, y, pasture_diff.window.xsize, 1) - for x in range(crop_diff.window.xsize): + for y in range(crop.window.ysize): + crop_row = crop.read_array(0, y, crop.window.xsize, 1) + pasture_row = pasture.read_array(0, y, pasture.window.xsize, 1) + for x in range(crop.window.xsize): tiles.append(TileInfo( x_steps[x], y_steps[y], @@ -308,6 +332,7 @@ def assemble_map( data = np.reshape(n, (tile.height, tile.width)) band = new_maps[lcc]._dataset.GetRasterBand(1) band.WriteArray(data, tile.x_position, tile.y_position) + print(f"assembled {count} tiles") def pipeline_source( @@ -410,17 +435,17 @@ def main() -> None: dest='pnv_path', ) parser.add_argument( - "--crop_diff", + "--crop", type=Path, required=True, - help="Path of adjustment for crop diff", + help="Path of crop area", dest="crop_adjustment_path", ) parser.add_argument( - "--pasture_diff", + "--pasture", type=Path, required=True, - help="Path of adjustment for pasture diff", + help="Path of pasture area", dest="pasture_adjustment_path", ) parser.add_argument( diff --git a/workflow/rules/habitat.smk b/workflow/rules/habitat.smk index 6ef1031..f199ed8 100644 --- a/workflow/rules/habitat.smk +++ b/workflow/rules/habitat.smk @@ -147,8 +147,8 @@ rule combine_gaez_hyde: Combine the GAEZ and Hyde data, adjusting for overflow in cells. """ input: - hyde_projection_file=DATADIR / "food" / "modified_grazing2017AD.prj", - hyde_raster=DATADIR / "food" / "modified_grazing2017AD.asc", + hyde_projection_file=DATADIR / "food" / "modied_grazing2017AD.prj", + hyde_raster=DATADIR / "food" / "modified_ifgrazing2017AD.asc", gaez_raster=DATADIR / "food" / "GLCSv11_02_5m.tif", output: crop=DATADIR / "food" / "crop.tif", From 6894dc6dff3536f9af9e9b03f0bfe40c35f88db4 Mon Sep 17 00:00:00 2001 From: mwd24 Date: Wed, 11 Feb 2026 13:57:22 +0000 Subject: [PATCH 05/27] WiP: improvements to food map generation. --- prepare_layers/make_current_map.py | 8 +- prepare_layers/make_food_current_map.py | 124 +++++++++++------------- workflow/rules/habitat.smk | 63 +----------- 3 files changed, 64 insertions(+), 131 deletions(-) diff --git a/prepare_layers/make_current_map.py b/prepare_layers/make_current_map.py index e7ea9bc..517c167 100644 --- a/prepare_layers/make_current_map.py +++ b/prepare_layers/make_current_map.py @@ -61,21 +61,19 @@ def make_current_maps( current_map = yg.where( updated_jung.isin(map_preserve_code), updated_jung, - (yg.floor(updated_jung / 100) * 100).astype(yg.DataType.UInt16), + (yg.floor(updated_jung / 100) * 100), ) print("Calculating unique land cover types...") - # vals = current_map.unique() - vals = [100, 200, 300, 400, 500, 600, 800] + map_preserve_code + vals = current_map.unique() for lcc in vals: print(f"Processing {lcc}...") per_class = current_map == lcc cast_per_class = per_class.astype(yg.DataType.Float32) - cast_per_class.pretty_print() with alive_bar(manual=True) as bar: cast_per_class.to_geotiff( - output_dir_path / f"lcc_{lcc}.tif", + output_dir_path / f"lcc_{int(lcc)}.tif", callback=bar, parallelism=concurrency, nodata=0.0, diff --git a/prepare_layers/make_food_current_map.py b/prepare_layers/make_food_current_map.py index a96495d..2f51f33 100644 --- a/prepare_layers/make_food_current_map.py +++ b/prepare_layers/make_food_current_map.py @@ -1,10 +1,11 @@ import argparse +import multiprocessing import os import resource import sys import time from pathlib import Path -from multiprocessing import Manager, Process, cpu_count +from multiprocessing import Process, cpu_count from queue import Queue from typing import NamedTuple from osgeo import gdal @@ -158,10 +159,15 @@ def process_tile( current_maps: dict[int,yg.layers.RasterLayer], pnv: yg.layers.RasterLayer, tile: TileInfo, -) -> np.ndarray: +) -> dict[int,np.ndarray]: + + lcc_data_map = { + lcc: current_map.nan_to_num().read_array(tile.x_position, tile.y_position, tile.width, tile.height) + for lcc, current_map in current_maps.items() + } if np.isnan(tile.crop_target) and np.isnan(tile.pasture_target): - return None + return lcc_data_map for current in current_maps.values(): assert current.map_projection == pnv.map_projection @@ -170,28 +176,17 @@ def process_tile( pnv_data = None if not np.isnan(tile.crop_target): - crop_data = current_maps[CROP_CODE].nan_to_num().read_array(tile.x_position, tile.y_position, tile.width, tile.height) + crop_data = lcc_data_map[CROP_CODE] crop_diff = tile.crop_target - (crop_data.sum() / crop_data.size) else: - crop_data = None crop_diff = 0 if not np.isnan(tile.pasture_target): - pasture_data = current_maps[PASTURE_CODE].nan_to_num().read_array(tile.x_position, tile.y_position, tile.width, tile.height) + pasture_data = lcc_data_map[PASTURE_CODE] pasture_diff = tile.pasture_target - (pasture_data.sum() / pasture_data.size) else: - pasture_data = None pasture_diff = 0 if (crop_diff == 0) and (pasture_diff == 0): - return None - - lcc_data_map = { - CROP_CODE: crop_data if crop_data is not None else current_maps[CROP_CODE].nan_to_num().read_array(tile.x_position, tile.y_position, tile.width, tile.height), - PASTURE_CODE: pasture_data if pasture_data is not None else current_maps[PASTURE_CODE].nan_to_num().read_array(tile.x_position, tile.y_position, tile.width, tile.height), - } - for lcc in current_maps: - if lcc in [CROP_CODE, PASTURE_CODE]: - continue - lcc_data_map[lcc] = current_maps[lcc].nan_to_num().read_array(tile.x_position, tile.y_position, tile.width, tile.height) + return lcc_data_map crop_diff, pasture_diff = balance_crop_and_pasture_differences( crop_diff, @@ -239,10 +234,7 @@ def process_tile_concurrently( break res = process_tile(current_maps, pnv, tile) - if res is not None: - result_queue.put((tile, {lcc: data.tobytes() for lcc, data in res.items() })) - else: - result_queue.put((tile, None)) + result_queue.put((tile, {lcc: data.tobytes() for lcc, data in res.items() })) result_queue.put(None) @@ -332,7 +324,8 @@ def assemble_map( data = np.reshape(n, (tile.height, tile.width)) band = new_maps[lcc]._dataset.GetRasterBand(1) band.WriteArray(data, tile.x_position, tile.y_position) - print(f"assembled {count} tiles") + if count % 1000 == 0: + print(f"assembled {count} tiles") def pipeline_source( @@ -368,55 +361,54 @@ def make_food_current_map( os.makedirs(output_path.parent, exist_ok=True) - with Manager() as manager: - source_queue = manager.Queue() - result_queue = manager.Queue() - - workers = [Process(target=process_tile_concurrently, args=( - current_lvl1_path, - pnv_path, - source_queue, - result_queue, - )) for _ in range(processes_count)] - for worker_process in workers: - worker_process.start() - - source_worker = Process(target=pipeline_source, args=( - current_lvl1_path, - crop_adjustment_path, - pasture_adjustment_path, - source_queue, - processes_count, - )) - source_worker.start() - - assemble_map( - current_lvl1_path, - output_path, - result_queue, - processes_count, - ) - - processes = workers - processes.append(source_worker) - while processes: - candidates = [x for x in processes if not x.is_alive()] - for candidate in candidates: - candidate.join() - if candidate.exitcode: - for victim in processes: - victim.kill() - sys.exit(candidate.exitcode) - processes.remove(candidate) - time.sleep(0.1) + source_queue = multiprocessing.Queue(maxsize=1000) + result_queue = multiprocessing.Queue(maxsize=1000) + + workers = [Process(target=process_tile_concurrently, args=( + current_lvl1_path, + pnv_path, + source_queue, + result_queue, + )) for _ in range(processes_count)] + for worker_process in workers: + worker_process.start() + + source_worker = Process(target=pipeline_source, args=( + current_lvl1_path, + crop_adjustment_path, + pasture_adjustment_path, + source_queue, + processes_count, + )) + source_worker.start() + + assemble_map( + current_lvl1_path, + output_path, + result_queue, + processes_count, + ) + + processes = workers + processes.append(source_worker) + while processes: + candidates = [x for x in processes if not x.is_alive()] + for candidate in candidates: + candidate.join() + if candidate.exitcode: + for victim in processes: + victim.kill() + sys.exit(candidate.exitcode) + processes.remove(candidate) + time.sleep(0.1) @snakemake_compatible(mapping={ "current_lvl1_path": "input.jung", "pnv_path": "input.pnv", - "crop_adjustment_path": "input.crop_diff", - "pasture_adjustment_path": "input.pasture_diff", + "crop_adjustment_path": "input.crop", + "pasture_adjustment_path": "input.pasture", "processes_count": "threads", - "output_path": "output.raster", + "output_path": "output.rasters", }) def main() -> None: parser = argparse.ArgumentParser(description="Build the food current map") diff --git a/workflow/rules/habitat.smk b/workflow/rules/habitat.smk index f199ed8..35d7f49 100644 --- a/workflow/rules/habitat.smk +++ b/workflow/rules/habitat.smk @@ -159,78 +159,21 @@ rule combine_gaez_hyde: str(SRCDIR / "prepare_layers" / "build_gaez_hyde.py") -# ============================================================================= -# Project Jung to Hyde/GAEZ -# ============================================================================= -rule project_jung_for_hyde_gaez: - """ - We need a proportional value per pixel per habitat class version of the Jung - current L1 map at the same scale as the GAEZ and Hyde data so we can then - compare them. - """ - input: - habitat=ancient(DATADIR / "habitat" / "current_raw.tif"), - output: - crop=DATADIR / "food" / "current_layers" / "lcc_1401.tif", - pasture=DATADIR / "food" / "current_layers" / "lcc_1402.tif", - params: - scale=config["hyde_pixel_scale"], - output_dir=DATADIR / "food" / "current_layers", - log: - DATADIR / "logs" / "process_habitat_food.log", - shell: - """ - set -e - aoh-habitat-process --habitat {input.habitat} \ - --scale {params.scale} \ - --projection "{params.projection}" \ - --output {params.output_dir} \ - 2>&1 | tee {log} - """ - - # ============================================================================= # Combine Jung, Hyde, and GAEZ # ============================================================================= -rule diff_crop: - """ - Get the difference between the jung data and the new crop data. - """ - input: - raster_a=DATADIR / "food" / "crop.tif", - raster_b=DATADIR / "food" / "current_layers" / "lcc_1401.tif", - output: - diff=DATADIR / "food" / "crop_diff.tif" - script: - str(SRCDIR / "utils" / "raster_diff.py") - - -rule diff_pasture: - """ - Get the difference between the jung data and the new pasture data. - """ - input: - raster_a=DATADIR / "food" / "pasture.tif", - raster_b=DATADIR / "food" / "current_layers" / "lcc_1402.tif", - output: - diff=DATADIR / "food" / "pasture_diff.tif" - script: - str(SRCDIR / "utils" / "raster_diff.py") - rule build_food_map: """ Pull together all the pieces into a new L1 habitat map at the original Jung scale. """ input: - crop_diff=DATADIR / "food" / "crop_diff.tif", - pasture_diff=DATADIR / "food" / "pasture_diff.tif", + crop=DATADIR / "food" / "crop.tif", + pasture=DATADIR / "food" / "pasture.tif", pnv=DATADIR / "habitat" / "pnv_raw.tif", jung=DATADIR / "habitat" / "current_raw.tif", output: - raster=DATADIR / "food" / "current_raw.tif", - params: - seed=42 + rasters=DATADIR / "food" / "current_raw", threads: workflow.cores script: str(SRCDIR / "prepare_layers" / "make_food_current_map.py") From cee07a0072501c0e00fdbd4bde93b56e94a56770 Mon Sep 17 00:00:00 2001 From: Michael Dales Date: Wed, 25 Feb 2026 10:29:17 +0000 Subject: [PATCH 06/27] Updates to get fractional pipeline --- deltap/delta_p_scaled.py | 16 +- deltap/global_code_residents_pixel.py | 313 +++++++++++++------------- local/merge_global_habitat.py | 36 ++- prepare_layers/make_arable_map.py | 28 ++- prepare_layers/make_restore_map.py | 67 +++--- requirements.txt | 2 +- scripts/run.sh | 13 +- tests/test_food_layer.py | 3 - utils/binary_maps.py | 37 ++- utils/habitat_stats.py | 50 ++-- utils/persistencegenerator.py | 10 +- utils/regression_plot.py | 24 +- utils/speciesgenerator.py | 4 +- workflow/Snakefile | 16 ++ 14 files changed, 307 insertions(+), 312 deletions(-) diff --git a/deltap/delta_p_scaled.py b/deltap/delta_p_scaled.py index aed4923..3de0383 100644 --- a/deltap/delta_p_scaled.py +++ b/deltap/delta_p_scaled.py @@ -4,8 +4,7 @@ from pathlib import Path import pandas as pd -import yirgacheffe.operators as yo -from yirgacheffe.layers import RasterLayer +import yirgacheffe as yg SCALE = 1e6 @@ -18,19 +17,18 @@ def delta_p_scaled_area( os.makedirs(output_path.parent, exist_ok=True) per_taxa = [ - RasterLayer.layer_from_file(os.path.join(input_path, x)) - for x in sorted(input_path.glob("*.tif")) + yg.read_raster(x) for x in sorted(input_path.glob("*.tif")) ] if not per_taxa: sys.exit(f"Failed to find any per-taxa maps in {input_path}") - area_restore = RasterLayer.layer_from_file(diff_area_map_path) + area_restore = yg.read_raster(diff_area_map_path) total_counts = pd.read_csv(totals_path) - area_restore_filter = yo.where(area_restore < SCALE, float('nan'), area_restore) / SCALE + area_restore_filter = yg.where(area_restore < SCALE, float('nan'), area_restore / SCALE) - with RasterLayer.empty_raster_layer_like( + with yg.layers.RasterLayer.empty_raster_layer_like( area_restore, filename=output_path, nodata=float('nan'), @@ -44,7 +42,7 @@ def delta_p_scaled_area( for layer in per_taxa[1:]: summed_layer = summed_layer + layer - scaled_filtered_layer = yo.where( + scaled_filtered_layer = yg.where( area_restore_filter != 0, ((summed_layer / area_restore_filter) * -1.0) / species_count, float('nan') @@ -57,7 +55,7 @@ def delta_p_scaled_area( taxa = name[:-4] species_count = int(total_counts[total_counts.taxa==taxa]["count"].values[0]) result._dataset.GetRasterBand(idx + 2).SetDescription(taxa) # pylint: disable=W0212 - scaled_filtered_layer = yo.where( + scaled_filtered_layer = yg.where( area_restore_filter != 0, ((inlayer / area_restore_filter) * -1.0) / species_count, float('nan') diff --git a/deltap/global_code_residents_pixel.py b/deltap/global_code_residents_pixel.py index 6bb310d..572d34e 100644 --- a/deltap/global_code_residents_pixel.py +++ b/deltap/global_code_residents_pixel.py @@ -1,298 +1,293 @@ import argparse +import json import math import os import shutil import sys from enum import Enum -from tempfile import TemporaryDirectory -from typing import Union +from pathlib import Path -import geopandas as gpd import numpy as np -from osgeo import gdal -from yirgacheffe.layers import RasterLayer, ConstantLayer +import yirgacheffe as yg + +# This isn't a hard requirement, but in practice most experiments use 0.25, and the original +# paper used the other three values for comparison. Other values are valid, but to save wasted +# times with typos, we do restrict this to the subset used for the paper. +FLOAT_EXPONENTS = {0.1, 0.25, 0.5, 1.0} GOMPERTZ_A = 2.5 GOMPERTZ_B = -14.5 GOMPERTZ_ALPHA = 1 -class Season(Enum): - RESIDENT = 1 - BREEDING = 2 - NONBREEDING = 3 +def open_layer(filename: Path) -> tuple[yg.YirgacheffeLayer,float]: + """We use this helper function for two reasons: + 1. The delta-p values are quite small, and so we want to ensure things are in float64. + 2. We almost always need the total area, but rather than calculate it we can get that + from the JSON file that sits besides the TIFF. + """ + # The "nan" is an artefact of bouncing the data via pandas + if filename.name == "nan": + return yg.constant(0.0), 0.0 -def gen_gompertz(x: float) -> float: - return math.exp(-math.exp(GOMPERTZ_A + (GOMPERTZ_B * (x ** GOMPERTZ_ALPHA)))) + layer = yg.read_raster(filename) -def numpy_gompertz(x: float) -> float: - return np.exp(-np.exp(GOMPERTZ_A + (GOMPERTZ_B * (x ** GOMPERTZ_ALPHA)))) + json_filename = filename.parent / f"{filename.stem}.json" + with open(json_filename, "r", encoding="utf-8") as f: + data = json.load(f) + total_aoh = data["aoh_total"] + # total_aoh = layer.astype(yg.DataType.Float64).sum() -def open_layer_as_float64(filename: str) -> Union[ConstantLayer,RasterLayer]: - if filename == "nan": - return ConstantLayer(0.0) - layer = RasterLayer.layer_from_file(filename) - if layer.datatype == gdal.GDT_Float64: - return layer - layer64 = RasterLayer.empty_raster_layer_like(layer, datatype=gdal.GDT_Float64) - layer.save(layer64) - return layer64 + return layer, total_aoh -def calc_persistence_value(current_aoh: float, historic_aoh: float, exponent_func) -> float: - sp_p = exponent_func(current_aoh / historic_aoh) - sp_p_fix = 1 if sp_p > 1 else sp_p - return sp_p_fix +def calc_persistence_value( + current_aoh: float, + historic_aoh: float, + exponent: str | float +) -> float: + scaled_aoh = current_aoh / historic_aoh + if isinstance(exponent, float): + sp_p = scaled_aoh ** exponent + else: + assert exponent == "gompetz" + sp_p = math.exp(-math.exp(GOMPERTZ_A + (GOMPERTZ_B * (scaled_aoh ** GOMPERTZ_ALPHA)))) + return 1.0 if sp_p > 1.0 else sp_p def process_delta_p( - current: Union[ConstantLayer,RasterLayer], - scenario: Union[ConstantLayer,RasterLayer], + current: yg.YirgacheffeLayer, + scenario: yg.YirgacheffeLayer, current_aoh: float, historic_aoh: float, - exponent_func_raster -) -> RasterLayer: - # In theory we could recalc current_aoh, but given we already have it don't duplicate work - # New section added in: Calculating for rasters rather than csv's - - - new_p = ((ConstantLayer(current_aoh) - current) + scenario) / historic_aoh + exponent: str | float +) -> yg.YirgacheffeLayer: + new_aoh = (current_aoh - current) + scenario - const_layer = ConstantLayer(current_aoh) - calc_1 = (const_layer - current) + scenario - new_aoh = RasterLayer.empty_raster_layer_like(current) - calc_1.save(new_aoh) - - calc_2 = (new_aoh / historic_aoh).numpy_apply(exponent_func_raster) - calc_2 = calc_2.numpy_apply(lambda chunk: np.where(chunk > 1, 1, chunk)) - new_p = RasterLayer.empty_raster_layer_like(new_aoh) - calc_2.save(new_p) + scaled_aoh = new_aoh / historic_aoh + if isinstance(exponent, float): + calc_2 = scaled_aoh ** exponent + else: + assert exponent == "gompetz" + calc_2 = yg.exp(-yg.exp(GOMPERTZ_A + (GOMPERTZ_B * (scaled_aoh ** GOMPERTZ_ALPHA)))) + new_p = yg.where(calc_2 > 1, 1, calc_2) return new_p def global_code_residents_pixel_ae( - species_data_path: str, - current_aohs_path: str, - scenario_aohs_path: str, - historic_aohs_path: str, - exponent: str, - output_folder: str, + taxid: int, + season: str, + current_aohs_path: Path, + scenario_aohs_path: Path, + historic_aohs_path: Path, + exponent: str | float, + output_folder: Path, ) -> None: os.makedirs(output_folder, exist_ok=True) - os.environ["OGR_GEOJSON_MAX_OBJ_SIZE"] = "0" - try: - filtered_species_info = gpd.read_file(species_data_path) - except: # pylint:disable=W0702 - sys.exit(f"Failed to read {species_data_path}") - taxid = filtered_species_info.id_no.values[0] - season = Season[filtered_species_info.season.values[0]] - - try: - exp_val = float(exponent) - z_exponent_func_float = lambda x: np.float_power(x, exp_val) - z_exponent_func_raster = lambda x: np.float_power(x, exp_val) - except ValueError: - if exponent == "gompertz": - z_exponent_func_float = gen_gompertz - z_exponent_func_raster = numpy_gompertz - else: - sys.exit(f"unrecognised exponent {exponent}") - match season: - case Season.RESIDENT: - filename = f"{taxid}_{season.name}.tif" + case "RESIDENT": + filename = f"{taxid}_{season}.tif" try: - current = open_layer_as_float64(os.path.join(current_aohs_path, filename)) + current, current_aoh = open_layer(current_aohs_path / filename) except FileNotFoundError: - print(f"Failed to open current layer {os.path.join(current_aohs_path, filename)}") - sys.exit() + sys.exit(f"Failed to open current layer {current_aohs_path / filename}") try: - scenario = open_layer_as_float64(os.path.join(scenario_aohs_path, filename)) + scenario, _ = open_layer(scenario_aohs_path / filename) except FileNotFoundError: # If there is a current but now scenario file it's because the species went extinct under the scenario - scenario = ConstantLayer(0.0) + scenario = 0.0 try: - historic_aoh = RasterLayer.layer_from_file(os.path.join(historic_aohs_path, filename)).sum() + _, historic_aoh = open_layer(historic_aohs_path / filename) except FileNotFoundError: - print(f"Failed to open historic layer {os.path.join(historic_aohs_path, filename)}") - sys.exit() + sys.exit(f"Failed to open historic layer {historic_aohs_path / filename}") if historic_aoh == 0.0: - print(f"Historic AoH for {taxid} is zero, aborting") - sys.exit() + sys.exit(f"Historic AoH for {taxid} is zero, aborting") - # print(f"current: {current.sum()}\nscenario: {scenario.sum()}\nhistoric: {historic_aoh.sum()}") + old_persistence = calc_persistence_value(current_aoh, historic_aoh, exponent) - layers = [current, scenario] - union = RasterLayer.find_union(layers) + + # In general Yirgacheffe can infer the behaviour needed for area intersections based on + # operator, but in this instance we want to force the caclulation to take place for the + # union of the areas involved. + layers = [x for x in [current, scenario] if isinstance(x, yg.YirgacheffeLayer)] + union = yg.layers.RasterLayer.find_union(layers) for layer in layers: - try: - layer.set_window_for_union(union) - except ValueError: - pass + layer.set_window_for_union(union) - current_aoh = current.sum() + new_p_layer = process_delta_p(current, scenario, current_aoh, historic_aoh, exponent) - new_p_layer = process_delta_p(current, scenario, current_aoh, historic_aoh, z_exponent_func_raster) - print(new_p_layer.sum()) + delta_p = new_p_layer - old_persistence - old_persistence = calc_persistence_value(current_aoh, historic_aoh, z_exponent_func_float) - print(old_persistence) - calc = new_p_layer - ConstantLayer(old_persistence) + try: + delta_p.to_geotiff(output_folder / filename) + except ValueError: + sys.exit(f"Failed to align layers for {taxid}_{season}") - with TemporaryDirectory() as tmpdir: - tmpfile = os.path.join(tmpdir, filename) - with RasterLayer.empty_raster_layer_like(new_p_layer, filename=tmpfile) as delta_p: - calc.save(delta_p) - shutil.move(tmpfile, os.path.join(output_folder, filename)) - case Season.NONBREEDING: - nonbreeding_filename = f"{taxid}_{Season.NONBREEDING.name}.tif" - breeding_filename = f"{taxid}_{Season.BREEDING.name}.tif" + case "NONBREEDING": + nonbreeding_filename = f"{taxid}_NONBREEDING.tif" + breeding_filename = f"{taxid}_BREEDING.tif" try: - with RasterLayer.layer_from_file(os.path.join(historic_aohs_path, breeding_filename)) as aoh: - historic_aoh_breeding = aoh.sum() + _, historic_aoh_breeding = open_layer(historic_aohs_path / breeding_filename) if historic_aoh_breeding == 0.0: - print(f"Historic AoH breeding for {taxid} is zero, aborting") - sys.exit() + sys.exit(f"Historic AoH breeding for {taxid} is zero, aborting") except FileNotFoundError: - print(f"Historic AoH for breeding {taxid} not found, aborting") - sys.exit() + sys.exit(f"Historic AoH for breeding {taxid} not found, aborting") try: - with RasterLayer.layer_from_file(os.path.join(historic_aohs_path, nonbreeding_filename)) as aoh: - historic_aoh_non_breeding = aoh.sum() + _, historic_aoh_non_breeding = open_layer(historic_aohs_path / nonbreeding_filename) if historic_aoh_non_breeding == 0.0: - print(f"Historic AoH for non breeding {taxid} is zero, aborting") - sys.exit() + sys.exit(f"Historic AoH for non breeding {taxid} is zero, aborting") except FileNotFoundError: - print(f"Historic AoH for non breeding {taxid} not found, aborting") - sys.exit() + sys.exit(f"Historic AoH for non breeding {taxid} not found, aborting") - - if scenario_aohs_path != "nan": - non_breeding_scenario_path = os.path.join(scenario_aohs_path, nonbreeding_filename) - breeding_scenario_path = os.path.join(scenario_aohs_path, breeding_filename) + if scenario_aohs_path.name != "nan": + non_breeding_scenario_path = scenario_aohs_path / nonbreeding_filename + breeding_scenario_path = scenario_aohs_path / breeding_filename else: non_breeding_scenario_path = "nan" breeding_scenario_path = "nan" try: - current_breeding = open_layer_as_float64(os.path.join(current_aohs_path, breeding_filename)) + current_breeding, current_aoh_breeding = open_layer(current_aohs_path / breeding_filename) except FileNotFoundError: - print(f"Failed to open current breeding {os.path.join(current_aohs_path, breeding_filename)}") - sys.exit() + sys.exit(f"Failed to open current breeding {current_aohs_path / breeding_filename}") try: - current_non_breeding = open_layer_as_float64(os.path.join(current_aohs_path, nonbreeding_filename)) + current_non_breeding, current_aoh_non_breeding = open_layer(current_aohs_path / nonbreeding_filename) except FileNotFoundError: - print(f"Failed to open current non breeding {os.path.join(current_aohs_path, nonbreeding_filename)}") - sys.exit() + sys.exit(f"Failed to open current non breeding {current_aohs_path / nonbreeding_filename}") try: - scenario_breeding = open_layer_as_float64(breeding_scenario_path) + scenario_breeding, _ = open_layer(breeding_scenario_path) except FileNotFoundError: # If there is a current but now scenario file it's because the species went extinct under the scenario - scenario_breeding = ConstantLayer(0.0) + scenario_breeding = 0.0 try: - scenario_non_breeding = open_layer_as_float64(non_breeding_scenario_path) + scenario_non_breeding, _ = open_layer(non_breeding_scenario_path) except FileNotFoundError: # If there is a current but now scenario file it's because the species went extinct under the scenario - scenario_non_breeding = ConstantLayer(0.0) - - layers = [current_breeding, current_non_breeding, scenario_breeding, scenario_non_breeding] - union = RasterLayer.find_union(layers) - for layer in layers: - try: - layer.set_window_for_union(union) - except ValueError: - pass + scenario_non_breeding = 0.0 - current_aoh_breeding = current_breeding.sum() persistence_breeding = calc_persistence_value( current_aoh_breeding, historic_aoh_breeding, - z_exponent_func_float + exponent, ) - current_aoh_non_breeding = current_non_breeding.sum() persistence_non_breeding = calc_persistence_value( current_aoh_non_breeding, historic_aoh_non_breeding, - z_exponent_func_float + exponent, ) old_persistence = (persistence_breeding ** 0.5) * (persistence_non_breeding ** 0.5) + # + # In general Yirgacheffe can infer the behaviour needed for area intersections based on + # operator, but in this instance we want to force the caclulation to take place for the + # union of the areas involved. + layers = [x for x in [current_breeding, scenario_breeding, current_non_breeding, scenario_non_breeding] if isinstance(x, yg.YirgacheffeLayer)] + union = yg.layers.RasterLayer.find_union(layers) + for layer in layers: + layer.set_window_for_union(union) + + + new_p_breeding = process_delta_p( current_breeding, scenario_breeding, current_aoh_breeding, historic_aoh_breeding, - z_exponent_func_raster + exponent, ) new_p_non_breeding = process_delta_p( current_non_breeding, scenario_non_breeding, current_aoh_non_breeding, historic_aoh_non_breeding, - z_exponent_func_raster + exponent, ) new_p_layer = (new_p_breeding ** 0.5) * (new_p_non_breeding ** 0.5) - delta_p_layer = new_p_layer - ConstantLayer(old_persistence) + delta_p_layer = new_p_layer - old_persistence - with TemporaryDirectory() as tmpdir: - tmpfile = os.path.join(tmpdir, nonbreeding_filename) - with RasterLayer.empty_raster_layer_like(new_p_breeding, filename=tmpfile) as output: - delta_p_layer.save(output) - shutil.move(tmpfile, os.path.join(output_folder, nonbreeding_filename)) + try: + delta_p_layer.to_geotiff(output_folder / nonbreeding_filename) + except ValueError: + delta_p_layer.pretty_print() + sys.exit(f"Failed to align layers for {taxid}_{season}") - case Season.BREEDING: + case "BREEDING": pass # covered by the nonbreeding case case _: sys.exit(f"Unexpected season for species {taxid}: {season}") +def exponent_type(value: str): + if value == "gompertz": + return value + try: + f = float(value) + except ValueError: + raise argparse.ArgumentTypeError(f"invalid exponent: {value!r}") + if f not in FLOAT_EXPONENTS: + raise argparse.ArgumentTypeError(f"numeric exponent must be one of {sorted(FLOAT_EXPONENTS)}, got {f}") + return f + def main() -> None: parser = argparse.ArgumentParser() parser.add_argument( - '--speciesdata', + '--taxid', + type=int, + required=True, + dest='taxid', + help="Species taxon id", + ) + parser.add_argument( + '--season', type=str, - help="Single species/seasonality geojson", required=True, - dest="species_data_path" + dest='season', + help="Seasonality", ) parser.add_argument( '--current_path', - type=str, + type=Path, required=True, dest="current_path", help="path to species current AOH hex" ) parser.add_argument( '--scenario_path', - type=str, + type=Path, required=True, dest="scenario_path", help="path to species scenario AOH hex" ) parser.add_argument( '--historic_path', - type=str, - required=False, + type=Path, + required=True, dest="historic_path", help="path to species historic AOH hex" ) parser.add_argument('--output_path', - type=str, + type=Path, required=True, dest="output_path", help="path to save output csv" ) - parser.add_argument('--z', dest='exponent', type=str, default='0.25') + parser.add_argument( + '--z', + dest='exponent', + type=exponent_type, + default=0.25 + ) args = parser.parse_args() global_code_residents_pixel_ae( - args.species_data_path, + args.taxid, + args.season, args.current_path, args.scenario_path, args.historic_path, diff --git a/local/merge_global_habitat.py b/local/merge_global_habitat.py index 73f247e..0086039 100644 --- a/local/merge_global_habitat.py +++ b/local/merge_global_habitat.py @@ -1,11 +1,14 @@ +""" +This script is used to merge localised raster data into a global raster map. + +Specifically it was used originally to merge crosswalked Brazil Mapbiomass data +into the Jung habitat map. +""" + import argparse from pathlib import Path -from yirgacheffe.layers import RasterLayer, RescaledRasterLayer -import yirgacheffe.operators as yo - -from osgeo import gdal -gdal.SetCacheMax(32 * 1024 * 1024) +import yirgacheffe as yg def merge_global_habitat( global_layer_path: Path, @@ -13,20 +16,15 @@ def merge_global_habitat( output_layer_path: Path, ) -> None: # Note, we assume naively the local data is higher resolution than the global layer for now - with RasterLayer.layer_from_file(local_layer_path) as local_layer: - with RescaledRasterLayer.layer_from_file( - global_layer_path, - pixel_scale=local_layer.pixel_scale - ) as global_layer: - local_layer.set_window_for_union(global_layer.area) - cleared = local_layer.nan_to_num() - combined = yo.where(cleared != 0, local_layer, global_layer) - with RasterLayer.empty_raster_layer_like( - combined, - filename=output_layer_path, - datatype=local_layer.datatype - ) as result: - combined.parallel_save(result, parallelism=200) + # In a better world we'd work out which is higher res and make everything in that pixel scale + with ( + yg.read_raster(local_layer_path) as local_layer, + yg.read_raster_like(global_layer_path, local_layer, yg.ResamplingMethod.Nearest) as global_layer: + ): + local_layer.set_window_for_union(global_layer.area) + cleared = local_layer.nan_to_num() + combined = yg.where(cleared != 0, local_layer, global_layer) + combined.to_geotiff(output_layer_path, parallelism=True) def main() -> None: parser = argparse.ArgumentParser() diff --git a/prepare_layers/make_arable_map.py b/prepare_layers/make_arable_map.py index aeda289..20aa6da 100644 --- a/prepare_layers/make_arable_map.py +++ b/prepare_layers/make_arable_map.py @@ -1,38 +1,44 @@ import argparse +import os +import shutil from pathlib import Path from typing import Optional import yirgacheffe as yg from alive_progress import alive_bar -from osgeo import gdal -gdal.SetCacheMax(1 * 1024 * 1024 * 1024) - JUNG_ARABLE_CODE = 1401 JUNG_URBAN_CODE = 1405 def make_arable_map( - current_path: Path, + current_dir_path: Path, output_path: Path, concurrency: Optional[int], show_progress: bool, ) -> None: - with yg.read_raster(current_path) as current: - arable_map = yg.where(current != JUNG_URBAN_CODE, JUNG_ARABLE_CODE, JUNG_URBAN_CODE) + os.makedirs(output_path, exist_ok=True) + + # In this scenario all land that isn't urban is covered to arable + urban_filename = current_dir_path / f"lcc_{JUNG_URBAN_CODE}.tif" + new_arable_filename = output_path / f"lcc_{JUNG_ARABLE_CODE}.tif" + + shutil.copy(urban_filename, output_path) + with yg.read_raster(urban_filename) as urban: + new_arable = 1.0 - urban if show_progress: with alive_bar(manual=True) as bar: - arable_map.to_geotiff(output_path, callback=bar, parallelism=concurrency) + new_arable.to_geotiff(new_arable_filename, callback=bar, parallelism=concurrency) else: - arable_map.to_geotiff(output_path, parallelism=concurrency) + new_arable.to_geotiff(new_arable_filename, parallelism=concurrency) def main() -> None: parser = argparse.ArgumentParser(description="Generate the arable scenario map.") parser.add_argument( '--current', type=Path, - help='Path of current map', + help='Path of fractional current maps', required=True, - dest='current_path', + dest='current_dir_path', ) parser.add_argument( '--output', @@ -60,7 +66,7 @@ def main() -> None: args = parser.parse_args() make_arable_map( - args.current_path, + args.current_dir_path, args.results_path, args.concurrency, args.show_progress, diff --git a/prepare_layers/make_restore_map.py b/prepare_layers/make_restore_map.py index cb8826e..57450cc 100644 --- a/prepare_layers/make_restore_map.py +++ b/prepare_layers/make_restore_map.py @@ -1,15 +1,13 @@ import argparse import itertools +import os +from contextlib import ExitStack, nullcontext from pathlib import Path from typing import Dict, List, Optional import pandas as pd -import yirgacheffe.operators as yo +import yirgacheffe as yg from alive_progress import alive_bar -from yirgacheffe.layers import RasterLayer, RescaledRasterLayer - -from osgeo import gdal -gdal.SetCacheMax(1 * 1024 * 1024 * 1024) # From Eyres et al: In the restoration scenario all areas classified as arable or pasture were restored to their PNV IUCN_CODE_REPLACEMENTS = [ @@ -30,33 +28,44 @@ def load_crosswalk_table(table_file_name: Path) -> Dict[str,List[int]]: result[row.code] = [int(row.value)] return result - def make_restore_map( pnv_path: Path, - current_path: Path, + current_dir_path: Path, crosswalk_path: Path, output_path: Path, - concurrency: Optional[int], + parallelism: Optional[int], show_progress: bool, ) -> None: - with RasterLayer.layer_from_file(current_path) as current: - with RescaledRasterLayer.layer_from_file(pnv_path, current.pixel_scale) as pnv: - crosswalk = load_crosswalk_table(crosswalk_path) + os.makedirs(output_path, exist_ok=True) + + crosswalk = load_crosswalk_table(crosswalk_path) + + map_replacement_codes = list(itertools.chain.from_iterable([crosswalk[x] for x in IUCN_CODE_REPLACEMENTS])) + ideal_map_replacement_filenames = [current_dir_path / f"lcc_{code}.tif" for code in map_replacement_codes] + map_replacement_filenames = [path for path in ideal_map_replacement_filenames if path.exists()] + + with ExitStack() as stack: + replacement_maps = [stack.enter_context(yg.read_raster(filename)) for filename in map_replacement_filenames] + replacement_total = yg.sum(replacement_maps) - map_replacement_codes = list(itertools.chain.from_iterable([crosswalk[x] for x in IUCN_CODE_REPLACEMENTS])) - restore_map = yo.where(current.isin(map_replacement_codes), pnv, current) + # all the ones we expect to be left with, but not the ones we're removing + current_raster_filenames = [path for path in current_dir_path.glob("*.tif") if path not in map_replacement_filenames] - with RasterLayer.empty_raster_layer_like( - restore_map, - filename=output_path, - threads=16 - ) as result: - if show_progress: - with alive_bar(manual=True) as bar: - restore_map.parallel_save(result, callback=bar, parallelism=concurrency) - else: - restore_map.parallel_save(result, parallelism=concurrency) + # Read the PNV as the same scale as the other maps + with yg.read_raster_like(pnv_path, replacement_maps[0], yg.ResamplingMethod.Nearest) as pnv: + for filename in current_raster_filenames: + lcc_code = int(filename.stem.split('_')[1]) + ctx = alive_bar(manual=True, title=str(lcc_code)) if show_progress else nullcontext() + with ctx as bar: + with yg.read_raster(filename) as layer: + updated_layer = layer + (replacement_total * (pnv == lcc_code).astype(yg.DataType.Float32)) + capped_updated_layer = yg.where(updated_layer > 1, 1.0, updated_layer) + capped_updated_layer.to_geotiff( + output_path / f"lcc_{lcc_code}.tif", + callback=bar, + parallelism=parallelism, + ) def main() -> None: parser = argparse.ArgumentParser(description="Zenodo resource downloader.") @@ -70,9 +79,9 @@ def main() -> None: parser.add_argument( '--current', type=Path, - help='Path of current map', + help='Path of current maps', required=True, - dest='current_path', + dest='current_dir_path', ) parser.add_argument( '--crosswalk', @@ -91,10 +100,10 @@ def main() -> None: parser.add_argument( '-j', type=int, - help='Number of concurrent threads to use for calculation.', + help='Number of parallel threads to use for calculation.', required=False, default=None, - dest='concurrency', + dest='parallelism', ) parser.add_argument( '-p', @@ -108,10 +117,10 @@ def main() -> None: make_restore_map( args.pnv_path, - args.current_path, + args.current_dir_path, args.crosswalk_path, args.results_path, - args.concurrency, + args.parallelism, args.show_progress, ) diff --git a/requirements.txt b/requirements.txt index 6030a2d..4b1d48b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,7 +15,7 @@ requests alive-progress matplotlib yirgacheffe -aoh +aoh>=2.0.5 gdal[numpy] git+https://github.com/quantifyearth/iucn_modlib.git git+https://github.com/quantifyearth/seasonality diff --git a/scripts/run.sh b/scripts/run.sh index 958bd8f..c553de9 100755 --- a/scripts/run.sh +++ b/scripts/run.sh @@ -111,11 +111,6 @@ if [ ! -d "${DATADIR}"/habitat_maps/pnv ]; then --output "${DATADIR}"/habitat_maps/pnv/ fi -# Generate an area scaling map -if [ ! -f "${DATADIR}"/area-per-pixel.tif ]; then - python3 ./prepare_layers/make_area_map.py --scale "${PIXEL_SCALE}" --output "${DATADIR}"/area-per-pixel.tif -fi - # Generate the arable scenario map if check_scenario "arable"; then if [ ! -f "${DATADIR}"/habitat/arable.tif ]; then @@ -132,7 +127,6 @@ if check_scenario "arable"; then if [ ! -f "${DATADIR}"/habitat/arable_diff_area.tif ]; then python3 ./prepare_layers/make_diff_map.py --current "${DATADIR}"/habitat/current_raw.tif \ --scenario "${DATADIR}"/habitat/arable.tif \ - --area "${DATADIR}"/area-per-pixel.tif \ --scale "${PIXEL_SCALE}" \ --output "${DATADIR}"/habitat/arable_diff_area.tif fi @@ -154,7 +148,6 @@ if check_scenario "pasture"; then if [ ! -f "${DATADIR}"/habitat/pasture_diff_area.tif ]; then python3 ./prepare_layers/make_diff_map.py --current "${DATADIR}"/habitat/current_raw.tif \ --scenario "${DATADIR}"/habitat/pasture.tif \ - --area "${DATADIR}"/area-per-pixel.tif \ --scale "${PIXEL_SCALE}" \ --output "${DATADIR}"/habitat/pasture_diff_area.tif fi @@ -178,7 +171,6 @@ if check_scenario "restore"; then if [ ! -f "${DATADIR}"/habitat/restore_diff_area.tif ]; then python3 ./prepare_layers/make_diff_map.py --current "${DATADIR}"/habitat/current_raw.tif \ --scenario "${DATADIR}"/habitat/restore.tif \ - --area "${DATADIR}"/area-per-pixel.tif \ --scale "${PIXEL_SCALE}" \ --output "${DATADIR}"/habitat/restore_diff_area.tif fi @@ -202,7 +194,6 @@ if check_scenario "restore_agriculture"; then if [ ! -f "${DATADIR}"/habitat/restore_agriculture_diff_area.tif ]; then python3 ./prepare_layers/make_diff_map.py --current "${DATADIR}"/habitat/current_raw.tif \ --scenario "${DATADIR}"/habitat/restore_agriculture.tif \ - --area "${DATADIR}"/area-per-pixel.tif \ --scale "${PIXEL_SCALE}" \ --output "${DATADIR}"/habitat/restore_agriculture_diff_area.tif fi @@ -226,7 +217,6 @@ if check_scenario "restore_all"; then if [ ! -f "${DATADIR}"/habitat/restore_all_diff_area.tif ]; then python3 ./prepare_layers/make_diff_map.py --current "${DATADIR}"/habitat/current_raw.tif \ --scenario "${DATADIR}"/habitat/restore_all.tif \ - --area "${DATADIR}"/area-per-pixel.tif \ --scale "${PIXEL_SCALE}" \ --output "${DATADIR}"/habitat/restore_all_diff_area.tif fi @@ -245,7 +235,6 @@ if check_scenario "urban"; then python3 ./prepare_layers/make_constant_diff_map.py --current "${DATADIR}"/habitat/current_raw.tif \ --habitat_code 14.5 \ --crosswalk "${DATADIR}"/crosswalk.csv \ - --area "${DATADIR}"/area-per-pixel.tif \ --scale "${PIXEL_SCALE}" \ --output "${DATADIR}"/habitat/urban_diff_area.tif fi @@ -289,7 +278,7 @@ python3 ./utils/persistencegenerator.py --datadir "${DATADIR}" \ --scenarios "${SCENARIOS[@]}" # Calculate all the AoHs -littlejohn -j "${PROCESS_COUNT}" -o "${DATADIR}"/aohbatch.log -c "${DATADIR}"/aohbatch.csv "${VIRTUAL_ENV}"/bin/aoh-calc -- --force-habitat +littlejohn -j "${PROCESS_COUNT}" -o "${DATADIR}"/aohbatch.log -c "${DATADIR}"/aohbatch.csv "${VIRTUAL_ENV}"/bin/aoh-calc -- --force-habitat --pixel-area # Generate validation summaries aoh-collate-data --aoh_results "${DATADIR}"/aohs/current/ --output "${DATADIR}"/aohs/current.csv diff --git a/tests/test_food_layer.py b/tests/test_food_layer.py index 07a871a..d62e024 100644 --- a/tests/test_food_layer.py +++ b/tests/test_food_layer.py @@ -2,9 +2,6 @@ import numpy as np import pytest import yirgacheffe as yg -from yirgacheffe.layers import RasterLayer -from yirgacheffe.operators import DataType -from yirgacheffe.window import Area, PixelScale from prepare_layers.make_food_current_map import TileInfo, process_tile, balance_crop_and_pasture_differences, \ CROP_CODE, PASTURE_CODE, remove_land_cover, add_land_cover diff --git a/utils/binary_maps.py b/utils/binary_maps.py index f0857b5..2899484 100644 --- a/utils/binary_maps.py +++ b/utils/binary_maps.py @@ -1,45 +1,40 @@ import argparse import os from multiprocessing import cpu_count +from pathlib import Path -import numpy as np -from osgeo import gdal -from yirgacheffe.layers import RasterLayer +import yirgacheffe as yg -layers = ["all", "AMPHIBIANS", "AVES", "MAMMALIA", "REPTILES"] -# layers = ["AMPHIBIANS", "AVES", "MAMMALIA", "REPTILES"] +LAYERS = ["all", "AMPHIBIANS", "AVES", "MAMMALIA", "REPTILES"] +# LAYERS = ["AMPHIBIANS", "AVES", "MAMMALIA", "REPTILES"] def binary_maps( - map_path: str, - output_path: str, + map_path: Path, + output_path: Path, parallelism: int ) -> None: - output_dir, filename = os.path.split(output_path) - base, _ext = os.path.splitext(filename) - os.makedirs(output_dir, exist_ok=True) + os.makedirs(output_path.parent, exist_ok=True) - for index, layername in enumerate(layers): - with RasterLayer.layer_from_file(map_path, band=index+1) as inputmap: - with RasterLayer.empty_raster_layer_like( - inputmap, - filename=os.path.join(output_dir, f"{base}_{layername}_binary.tif"), - datatype=gdal.GDT_Int16, - ) as result: - calc = inputmap.numpy_apply(lambda c: np.where(c != 0, c / np.abs(c), 0)) - calc.parallel_save(result, parallelism=parallelism) + for index, layername in enumerate(LAYERS): + with yg.read_raster(map_path, band=index+1) as inputmap: + binary_map = yg.where(inputmap != 0, inputmap / yg.abs(inputmap), 0) + binary_map.astype(yg.DataType.Int16).to_geotiff( + filename.parent / f"{filename.stem}_{layername}_binary.tif", + parallelism=parallelism, + ) def main() -> None: parser = argparse.ArgumentParser(description="Converts the output maps to binary") parser.add_argument( "--input", - type=str, + type=Path, required=True, dest="input_filename", help="multilayer result geotiff" ) parser.add_argument( "--output", - type=str, + type=Path, required=True, dest="output_filename", help="Destination geotiff path." diff --git a/utils/habitat_stats.py b/utils/habitat_stats.py index 9221cf3..0cdb49e 100644 --- a/utils/habitat_stats.py +++ b/utils/habitat_stats.py @@ -4,36 +4,33 @@ from pathlib import Path import pandas as pd -from yirgacheffe.layers import RasterLayer, UniformAreaLayer - -from osgeo import gdal -gdal.SetCacheMax(1024 * 1024 * 16) +import yirgacheffe as yg def habitat_stats( habitats_dir: Path, - area_map_path: Path, output_dir: Path, process_count: int, ) -> None: - with UniformAreaLayer.layer_from_file(area_map_path) as area_raster: - os.makedirs(output_dir, exist_ok=True) - for scenario in Path(habitats_dir).iterdir(): - res = [] - for habitat in scenario.glob("*.tif"): - # Filenames have the form "lcc_200.tif" - we want the IUCN habitat class 2 or 14.1 etc." - basename, _ = os.path.splitext(habitat.name) - jung_habitat_class = int(basename.split("_")[1]) - if jung_habitat_class == 0: - continue - with RasterLayer.layer_from_file(str(habitat)) as raster: - raw_area = raster.parallel_sum(parallelism=process_count) - scaled_area_calc = raster * area_raster - scaled_area = scaled_area_calc.parallel_sum(parallelism=process_count) + os.makedirs(output_dir, exist_ok=True) + for scenario in habitats_dir.iterdir(): + res = [] + for habitat in scenario.glob("*.tif"): + # Filenames have the form "lcc_200.tif" - we want the IUCN habitat class 2 or 14.1 etc." + jung_habitat_class = int(habitat.stem.split("_")[1]) + if jung_habitat_class == 0: + continue + with ( + yg.read_raster(habitat) as raster, + yg.area_raster(raster.map_projection) as area_raster, + ): + raw_area = raster.parallel_sum(parallelism=process_count) + scaled_area_calc = raster * area_raster + scaled_area = scaled_area_calc.parallel_sum(parallelism=process_count) - res.append([jung_habitat_class, raw_area, scaled_area]) - df = pd.DataFrame(res, columns=["habitat", "pixel area", "geo area"]) - df = df.sort_values("habitat") - df.to_csv(output_dir / f"{scenario.name}.csv", index=False) + res.append([jung_habitat_class, raw_area, scaled_area]) + df = pd.DataFrame(res, columns=["habitat", "pixel area", "geo area"]) + df = df.sort_values("habitat") + df.to_csv(output_dir / f"{scenario.name}.csv", index=False) def main() -> None: parser = argparse.ArgumentParser(description="Generate stats on the habitat makeup of each scenario") @@ -43,12 +40,6 @@ def main() -> None: required=True, dest="habitats_dir", ) - parser.add_argument( - "--area", - type=Path, - required=True, - dest="area_map_path", - ) parser.add_argument( "--output", type=Path, @@ -68,7 +59,6 @@ def main() -> None: habitat_stats( args.habitats_dir, - args.area_map_path, args.output_dir, args.process_count, ) diff --git a/utils/persistencegenerator.py b/utils/persistencegenerator.py index a709246..b95bf43 100644 --- a/utils/persistencegenerator.py +++ b/utils/persistencegenerator.py @@ -24,11 +24,12 @@ def species_generator( res = [] for taxa in taxas: taxa_path = species_info_dir / taxa / "current" - speciess = list(taxa_path.glob("*.geojson")) + speciess = [x.stem.split('_') for x in taxa_path.glob("*.geojson")] for scenario in scenarios: - for species in speciess: + for taxid, season in speciess: res.append([ - species, + taxid, + season, aohs_path / "current" / taxa, aohs_path / scenario / taxa, aohs_path / "pnv" / taxa, @@ -37,7 +38,8 @@ def species_generator( ]) df = pd.DataFrame(res, columns=[ - '--speciesdata', + '--taxid', + '--season', '--current_path', '--scenario_path', '--historic_path', diff --git a/utils/regression_plot.py b/utils/regression_plot.py index 48894f6..373b0d5 100644 --- a/utils/regression_plot.py +++ b/utils/regression_plot.py @@ -9,7 +9,7 @@ import matplotlib.pyplot as plt import numpy as np -from yirgacheffe.layers import RasterLayer +import yirgacheffe as yg def filter_data(chunks): a_chunk, b_chunk = chunks @@ -30,17 +30,19 @@ def regression_plot( output_dir, _ = os.path.split(output_path) os.makedirs(output_dir, exist_ok=True) - with RasterLayer.layer_from_file(a_path) as a_layer: - with RasterLayer.layer_from_file(b_path) as b_layer: - if a_layer.pixel_scale != b_layer.pixel_scale: - sys.exit("GeoTIFFs have different pixel scale") - if a_layer.area != b_layer.area: - sys.exit("GeoTIFFs have different spatial coordinates") - if a_layer.window != b_layer.window: - sys.exit("GeoTIFFs have different pixel dimensions") + with ( + yg.read_raster(a_path) as a_layer, + yg.read_raster(b_path) as b_layer, + ): + if a_layer.pixel_scale != b_layer.pixel_scale: + sys.exit("GeoTIFFs have different pixel scale") + if a_layer.area != b_layer.area: + sys.exit("GeoTIFFs have different spatial coordinates") + if a_layer.window != b_layer.window: + sys.exit("GeoTIFFs have different pixel dimensions") - a_pixels = a_layer.read_array(0, 0, a_layer.window.xsize, a_layer.window.ysize) - b_pixels = b_layer.read_array(0, 0, b_layer.window.xsize, b_layer.window.ysize) + a_pixels = a_layer.read_array(0, 0, a_layer.window.xsize, a_layer.window.ysize) + b_pixels = b_layer.read_array(0, 0, b_layer.window.xsize, b_layer.window.ysize) with Pool(processes=cpu_count() // 2) as pool: filtered_chunk_pairs = pool.map(filter_data, zip(a_pixels, b_pixels)) diff --git a/utils/speciesgenerator.py b/utils/speciesgenerator.py index 85e2808..ccd538d 100644 --- a/utils/speciesgenerator.py +++ b/utils/speciesgenerator.py @@ -41,17 +41,15 @@ def species_generator( habitat_maps_path, data_dir / "elevation-max.tif", data_dir / "elevation-min.tif", - data_dir / "area-per-pixel.tif", data_dir / "crosswalk.csv", species, aohs_path / scenario / taxa, ]) df = pd.DataFrame(res, columns=[ - '--habitats', + '--fractional_habitats', '--elevation-max', '--elevation-min', - '--area', '--crosswalk', '--speciesdata', '--output' diff --git a/workflow/Snakefile b/workflow/Snakefile index 5e3e2b6..c7e72ad 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -41,3 +41,19 @@ TAXA = config["taxa"] # Include rule modules include: "rules/prepare.smk" include: "rules/habitat.smk" +include: "rules/aoh.smk" +include: "rules/scenario.smk" +include: "rules/species.smk" + + + +rule species_data: +""" +Target: extract species data from PostgreSQL. +""" +input: + DATADIR / "species-info" / "AVES" / ".birdlife_applied", + expand( + str(DATADIR / "species-info" / "{taxa}" / SCENARIO / "report.csv"), + taxa=TAXA, + ), From 318b9ffb9d76b83cb280e56412e3d4c70a9305bb Mon Sep 17 00:00:00 2001 From: Michael Dales Date: Wed, 25 Feb 2026 10:29:34 +0000 Subject: [PATCH 07/27] Remove old area map script --- prepare_layers/make_area_map.py | 91 --------------------------------- 1 file changed, 91 deletions(-) delete mode 100644 prepare_layers/make_area_map.py diff --git a/prepare_layers/make_area_map.py b/prepare_layers/make_area_map.py deleted file mode 100644 index 3a2ca29..0000000 --- a/prepare_layers/make_area_map.py +++ /dev/null @@ -1,91 +0,0 @@ -import argparse -import math -from pathlib import Path - -import numpy as np -from yirgacheffe.layers import RasterLayer # type: ignore -from yirgacheffe.operators import DataType # type: ignore -from yirgacheffe.window import Area, PixelScale # type: ignore - -# Taken from https://gis.stackexchange.com/questions/127165/more-accurate-way-to-calculate-area-of-rasters -def area_of_pixel(pixel_size: float, center_lat: float) -> float: - """Calculate m^2 area of a wgs84 square pixel. - - Adapted from: https://gis.stackexchange.com/a/127327/2397 - - Parameters: - pixel_size (float): length of side of pixel in degrees. - center_lat (float): latitude of the center of the pixel. Note this - value +/- half the `pixel-size` must not exceed 90/-90 degrees - latitude or an invalid area will be calculated. - - Returns: - Area of square pixel of side length `pixel_size` centered at - `center_lat` in m^2. - - """ - a = 6378137 # meters - b = 6356752.3142 # meters - e = math.sqrt(1 - (b/a)**2) - area_list = [] - for f in [center_lat+pixel_size/2, center_lat-pixel_size/2]: - zm = 1 - e*math.sin(math.radians(f)) - zp = 1 + e*math.sin(math.radians(f)) - area_list.append( - math.pi * b**2 * ( - math.log(zp/zm) / (2*e) + - math.sin(math.radians(f)) / (zp*zm))) - return pixel_size / 360. * (area_list[0] - area_list[1]) - -def make_area_map( - pixel_scale: float, - output_path: Path, -) -> None: - pixels = [0.0,] * math.floor(90.0 / pixel_scale) - for i in range(len(pixels)): # pylint: disable=C0200 - y = (i + 0.5) * pixel_scale - pixels[i] = area_of_pixel(pixel_scale, y) - - allpixels = np.rot90(np.array([list(reversed(pixels)) + pixels])) - - area = Area( - left=0, - right=pixel_scale, - top=(math.floor(90 / pixel_scale) * pixel_scale), - bottom=(math.floor(90 / pixel_scale) * pixel_scale * -1.0) - ) - with RasterLayer.empty_raster_layer( - area, - PixelScale(pixel_scale, pixel_scale * -1.0), - DataType.Float32, - filename=output_path - ) as res: - assert res.window.xsize == 1 - res._dataset.WriteArray(allpixels, 0, 0) # pylint: disable=W0212 - - -def main() -> None: - parser = argparse.ArgumentParser(description="Downsample habitat map to raster per terrain type.") - parser.add_argument( - "--scale", - type=float, - required=True, - dest="pixel_scale", - help="Output pixel scale value." - ) - parser.add_argument( - "--output", - type=str, - required=True, - dest="output_path", - help="Destination file for area raster." - ) - args = parser.parse_args() - - make_area_map( - args.pixel_scale, - args.output_path - ) - -if __name__ == "__main__": - main() From 930ac5029f5f3db1e120ade57f726e5b32f21a3d Mon Sep 17 00:00:00 2001 From: Michael Dales Date: Wed, 25 Feb 2026 11:03:12 +0000 Subject: [PATCH 08/27] Update run.sh for new fractional maps --- scripts/run.sh | 265 ++++++++++++++++++++++++++++--------------------- 1 file changed, 150 insertions(+), 115 deletions(-) diff --git a/scripts/run.sh b/scripts/run.sh index c553de9..5557289 100755 --- a/scripts/run.sh +++ b/scripts/run.sh @@ -84,17 +84,31 @@ if [ ! -f "${DATADIR}"/habitat/current_raw.tif ]; then --output "${DATADIR}"/habitat/ fi - python3 ./prepare_layers/make_current_map.py --jung "${DATADIR}"/habitat/jung_l2_raw.tif \ - --update_masks "${DATADIR}"/habitat/lvl2_changemasks_ver004 \ - --crosswalk "${DATADIR}"/crosswalk.csv \ - --output "${DATADIR}"/habitat/current_raw.tif \ - -j 16 -fi + if [ ! -f "${DATADIR}"/100m/current/.sentinel ]; then + python3 ./prepare_layers/make_current_map.py --jung_l2 "${DATADIR}"/habitat/jung_l2_raw.tif \ + --update_masks "${DATADIR}"/habitat/lvl2_changemasks_ver004 \ + --crosswalk "${DATADIR}"/crosswalk.csv \ + --output "${DATADIR}"/100m/current \ + --sentinel "${DATADIR}"/100m/current/.sentinel \ + -j 16 + fi -if [ ! -d "${DATADIR}"/habitat_maps/current ]; then - aoh-habitat-process --habitat "${DATADIR}"/habitat/current_raw.tif \ - --scale "${PIXEL_SCALE}" \ - --output "${DATADIR}"/habitat_maps/current/ + if [ ! -f "${DATADIR}"/5_arc_seconds/current/.sentinel ]; then + mkdir -p "${DATADIR}"/5_arc_seconds/current + for d in "${DATADIR}"/100m/current/*.tif; do + basename=$(basename "${d}") + gdalwarp -t_srs EPSG:4326 \ + -tr 0.016666666666667 -0.016666666666667 \ + -r average \ + -multi \ + -co COMPRESS=LZW \ + -co NUM_THREADS=16 \ + -wo NUM_THREADS=16 \ + "${d}" \ + "${DATADIR}"/5_arc_seconds/current/"${basename}" + done + touch "${DATADIR}"/5_arc_seconds/current/.sentinel + fi fi # Get PNV layer and prepare for use @@ -105,140 +119,161 @@ if [ ! -f "${DATADIR}"/habitat/pnv_raw.tif ]; then --output "${DATADIR}"/habitat/pnv_raw.tif fi -if [ ! -d "${DATADIR}"/habitat_maps/pnv ]; then +if [ ! -d "${DATADIR}"/5_arc_seconds/pnv ]; then aoh-habitat-process --habitat "${DATADIR}"/habitat/pnv_raw.tif \ --scale "${PIXEL_SCALE}" \ - --output "${DATADIR}"/habitat_maps/pnv/ + --output "${DATADIR}"/5_arc_seconds/pnv/ fi # Generate the arable scenario map if check_scenario "arable"; then - if [ ! -f "${DATADIR}"/habitat/arable.tif ]; then - python3 ./prepare_layers/make_arable_map.py --current "${DATADIR}"/habitat/current_raw.tif \ - --output "${DATADIR}"/habitat/arable.tif + if [ ! -d "${DATADIR}"/habitat/arable ]; then + python3 ./prepare_layers/make_arable_map.py --current "${DATADIR}"/100m/current \ + --output "${DATADIR}"/100m/arable fi - if [ ! -d "${DATADIR}"/habitat_maps/arable ]; then - aoh-habitat-process --habitat "${DATADIR}"/habitat/arable.tif \ - --scale "${PIXEL_SCALE}" \ - --output "${DATADIR}"/habitat_maps/arable/ + if [ ! -f "${DATADIR}"/5_arc_seconds/arable/.sentinel ]; then + mkdir -p "${DATADIR}"/5_arc_seconds/arable + for d in "${DATADIR}"/100m/arable/*.tif; do + basename=$(basename "${d}") + gdalwarp -t_srs EPSG:4326 \ + -tr 0.016666666666667 -0.016666666666667 \ + -r average \ + -multi \ + -co COMPRESS=LZW \ + -co NUM_THREADS=16 \ + -wo NUM_THREADS=16 \ + "${d}" \ + "${DATADIR}"/5_arc_seconds/arable/"${basename}" + done + touch "${DATADIR}"/5_arc_seconds/arable/.sentinel fi if [ ! -f "${DATADIR}"/habitat/arable_diff_area.tif ]; then - python3 ./prepare_layers/make_diff_map.py --current "${DATADIR}"/habitat/current_raw.tif \ - --scenario "${DATADIR}"/habitat/arable.tif \ - --scale "${PIXEL_SCALE}" \ + python3 ./prepare_layers/make_diff_map_2.py --current "${DATADIR}"/5_arc_seconds/current \ + --scenario "${DATADIR}"/5_arc_seconds/arable \ --output "${DATADIR}"/habitat/arable_diff_area.tif fi fi # Generate the pasture scenario map -if check_scenario "pasture"; then - if [ ! -f "${DATADIR}"/habitat/pasture.tif ]; then - python3 ./prepare_layers/make_pasture_map.py --current "${DATADIR}"/habitat/current_raw.tif \ - --output "${DATADIR}"/habitat/pasture.tif - fi - - if [ ! -d "${DATADIR}"/habitat_maps/pasture ]; then - aoh-habitat-process --habitat "${DATADIR}"/habitat/pasture.tif \ - --scale "${PIXEL_SCALE}" \ - --output "${DATADIR}"/habitat_maps/pasture/ - fi - - if [ ! -f "${DATADIR}"/habitat/pasture_diff_area.tif ]; then - python3 ./prepare_layers/make_diff_map.py --current "${DATADIR}"/habitat/current_raw.tif \ - --scenario "${DATADIR}"/habitat/pasture.tif \ - --scale "${PIXEL_SCALE}" \ - --output "${DATADIR}"/habitat/pasture_diff_area.tif - fi -fi +# if check_scenario "pasture"; then +# if [ ! -f "${DATADIR}"/habitat/pasture.tif ]; then +# python3 ./prepare_layers/make_pasture_map.py --current "${DATADIR}"/habitat/current_raw.tif \ +# --output "${DATADIR}"/habitat/pasture.tif +# fi +# +# if [ ! -d "${DATADIR}"/habitat_maps/pasture ]; then +# aoh-habitat-process --habitat "${DATADIR}"/habitat/pasture.tif \ +# --scale "${PIXEL_SCALE}" \ +# --output "${DATADIR}"/habitat_maps/pasture/ +# fi +# +# if [ ! -f "${DATADIR}"/habitat/pasture_diff_area.tif ]; then +# python3 ./prepare_layers/make_diff_map.py --current "${DATADIR}"/habitat/current_raw.tif \ +# --scenario "${DATADIR}"/habitat/pasture.tif \ +# --scale "${PIXEL_SCALE}" \ +# --output "${DATADIR}"/habitat/pasture_diff_area.tif +# fi +# fi # Generate the restore map if check_scenario "restore"; then - if [ ! -f "${DATADIR}"/habitat/restore.tif ]; then + if [ ! -d "${DATADIR}"/habitat/restore ]; then python3 ./prepare_layers/make_restore_map.py --pnv "${DATADIR}"/habitat/pnv_raw.tif \ - --current "${DATADIR}"/habitat/current_raw.tif \ + --current "${DATADIR}"/100m/current \ --crosswalk "${DATADIR}"/crosswalk.csv \ - --output "${DATADIR}"/habitat/restore.tif + --output "${DATADIR}"/100m/restore fi - if [ ! -d "${DATADIR}"/habitat_maps/restore ]; then - aoh-habitat-process --habitat "${DATADIR}"/habitat/restore.tif \ - --scale "${PIXEL_SCALE}" \ - --output "${DATADIR}"/habitat_maps/restore/ + if [ ! -f "${DATADIR}"/5_arc_seconds/restore/.sentinel ]; then + mkdir -p "${DATADIR}"/5_arc_seconds/restore + for d in "${DATADIR}"/100m/restore/*.tif; do + basename=$(basename "${d}") + gdalwarp -t_srs EPSG:4326 \ + -tr 0.016666666666667 -0.016666666666667 \ + -r average \ + -multi \ + -co COMPRESS=LZW \ + -co NUM_THREADS=16 \ + -wo NUM_THREADS=16 \ + "${d}" \ + "${DATADIR}"/5_arc_seconds/restore/"${basename}" + done + touch "${DATADIR}"/5_arc_seconds/restore/.sentinel fi + if [ ! -f "${DATADIR}"/habitat/restore_diff_area.tif ]; then - python3 ./prepare_layers/make_diff_map.py --current "${DATADIR}"/habitat/current_raw.tif \ - --scenario "${DATADIR}"/habitat/restore.tif \ - --scale "${PIXEL_SCALE}" \ + python3 ./prepare_layers/make_diff_map.py --current "${DATADIR}"/5_arc_seconds/current \ + --scenario "${DATADIR}"/5_arc_seconds/restore \ --output "${DATADIR}"/habitat/restore_diff_area.tif fi fi - -# Generate the restore_agriculture map -if check_scenario "restore_agriculture"; then - if [ ! -f "${DATADIR}"/habitat/restore_agriculture.tif ]; then - python3 ./prepare_layers/make_restore_agriculture_map.py --pnv "${DATADIR}"/habitat/pnv_raw.tif \ - --current "${DATADIR}"/habitat/current_raw.tif \ - --crosswalk "${DATADIR}"/crosswalk.csv \ - --output "${DATADIR}"/habitat/restore_agriculture.tif - fi - - if [ ! -d "${DATADIR}"/habitat_maps/restore_agriculture ]; then - aoh-habitat-process --habitat "${DATADIR}"/habitat/restore_agriculture.tif \ - --scale "${PIXEL_SCALE}" \ - --output "${DATADIR}"/habitat_maps/restore_agriculture/ - fi - - if [ ! -f "${DATADIR}"/habitat/restore_agriculture_diff_area.tif ]; then - python3 ./prepare_layers/make_diff_map.py --current "${DATADIR}"/habitat/current_raw.tif \ - --scenario "${DATADIR}"/habitat/restore_agriculture.tif \ - --scale "${PIXEL_SCALE}" \ - --output "${DATADIR}"/habitat/restore_agriculture_diff_area.tif - fi -fi - -# Generate the restore all map -if check_scenario "restore_all"; then - if [ ! -f "${DATADIR}"/habitat/restore_all.tif ]; then - python3 ./prepare_layers/make_restore_all_map.py --pnv "${DATADIR}"/habitat/pnv_raw.tif \ - --current "${DATADIR}"/habitat/current_raw.tif \ - --crosswalk "${DATADIR}"/crosswalk.csv \ - --output "${DATADIR}"/habitat/restore_all.tif - fi - - if [ ! -d "${DATADIR}"/habitat_maps/restore_all ]; then - aoh-habitat-process --habitat "${DATADIR}"/habitat/restore_all.tif \ - --scale "${PIXEL_SCALE}" \ - --output "${DATADIR}"/habitat_maps/restore_all/ - fi - - if [ ! -f "${DATADIR}"/habitat/restore_all_diff_area.tif ]; then - python3 ./prepare_layers/make_diff_map.py --current "${DATADIR}"/habitat/current_raw.tif \ - --scenario "${DATADIR}"/habitat/restore_all.tif \ - --scale "${PIXEL_SCALE}" \ - --output "${DATADIR}"/habitat/restore_all_diff_area.tif - fi -fi - -# Generate urban all map -if check_scenario "urban"; then - if [ ! -d "${DATADIR}"/habitat_maps/urban ]; then - python3 ./prepare_layers/make_constant_habitat.py --examplar "${DATADIR}"/habitat_maps/arable/lcc_1401.tif \ - --habitat_code 14.5 \ - --crosswalk "${DATADIR}"/crosswalk.csv \ - --output "${DATADIR}"/habitat_maps/urban - fi - - if [ ! -f "${DATADIR}"/habitat/urban_diff_area.tif ]; then - python3 ./prepare_layers/make_constant_diff_map.py --current "${DATADIR}"/habitat/current_raw.tif \ - --habitat_code 14.5 \ - --crosswalk "${DATADIR}"/crosswalk.csv \ - --scale "${PIXEL_SCALE}" \ - --output "${DATADIR}"/habitat/urban_diff_area.tif - fi -fi +# +# # Generate the restore_agriculture map +# if check_scenario "restore_agriculture"; then +# if [ ! -f "${DATADIR}"/habitat/restore_agriculture.tif ]; then +# python3 ./prepare_layers/make_restore_agriculture_map.py --pnv "${DATADIR}"/habitat/pnv_raw.tif \ +# --current "${DATADIR}"/habitat/current_raw.tif \ +# --crosswalk "${DATADIR}"/crosswalk.csv \ +# --output "${DATADIR}"/habitat/restore_agriculture.tif +# fi +# +# if [ ! -d "${DATADIR}"/habitat_maps/restore_agriculture ]; then +# aoh-habitat-process --habitat "${DATADIR}"/habitat/restore_agriculture.tif \ +# --scale "${PIXEL_SCALE}" \ +# --output "${DATADIR}"/habitat_maps/restore_agriculture/ +# fi +# +# if [ ! -f "${DATADIR}"/habitat/restore_agriculture_diff_area.tif ]; then +# python3 ./prepare_layers/make_diff_map.py --current "${DATADIR}"/habitat/current_raw.tif \ +# --scenario "${DATADIR}"/habitat/restore_agriculture.tif \ +# --scale "${PIXEL_SCALE}" \ +# --output "${DATADIR}"/habitat/restore_agriculture_diff_area.tif +# fi +# fi +# +# # Generate the restore all map +# if check_scenario "restore_all"; then +# if [ ! -f "${DATADIR}"/habitat/restore_all.tif ]; then +# python3 ./prepare_layers/make_restore_all_map.py --pnv "${DATADIR}"/habitat/pnv_raw.tif \ +# --current "${DATADIR}"/habitat/current_raw.tif \ +# --crosswalk "${DATADIR}"/crosswalk.csv \ +# --output "${DATADIR}"/habitat/restore_all.tif +# fi +# +# if [ ! -d "${DATADIR}"/habitat_maps/restore_all ]; then +# aoh-habitat-process --habitat "${DATADIR}"/habitat/restore_all.tif \ +# --scale "${PIXEL_SCALE}" \ +# --output "${DATADIR}"/habitat_maps/restore_all/ +# fi +# +# if [ ! -f "${DATADIR}"/habitat/restore_all_diff_area.tif ]; then +# python3 ./prepare_layers/make_diff_map.py --current "${DATADIR}"/habitat/current_raw.tif \ +# --scenario "${DATADIR}"/habitat/restore_all.tif \ +# --scale "${PIXEL_SCALE}" \ +# --output "${DATADIR}"/habitat/restore_all_diff_area.tif +# fi +# fi +# +# # Generate urban all map +# if check_scenario "urban"; then +# if [ ! -d "${DATADIR}"/habitat_maps/urban ]; then +# python3 ./prepare_layers/make_constant_habitat.py --examplar "${DATADIR}"/habitat_maps/arable/lcc_1401.tif \ +# --habitat_code 14.5 \ +# --crosswalk "${DATADIR}"/crosswalk.csv \ +# --output "${DATADIR}"/habitat_maps/urban +# fi +# +# if [ ! -f "${DATADIR}"/habitat/urban_diff_area.tif ]; then +# python3 ./prepare_layers/make_constant_diff_map.py --current "${DATADIR}"/habitat/current_raw.tif \ +# --habitat_code 14.5 \ +# --crosswalk "${DATADIR}"/crosswalk.csv \ +# --scale "${PIXEL_SCALE}" \ +# --output "${DATADIR}"/habitat/urban_diff_area.tif +# fi +# fi # Fetch and prepare the elevation layers if [ ! -f "${DATADIR}"/elevation.tif ]; then From 60222be127b5507ea1c722c30d69099a73557e1d Mon Sep 17 00:00:00 2001 From: Michael Dales Date: Thu, 26 Feb 2026 13:10:18 +0000 Subject: [PATCH 09/27] Improve performance for combine stage --- deltap/global_code_residents_pixel.py | 11 +-- local/merge_global_habitat.py | 2 +- prepare_layers/build_gaez_hyde.py | 47 ++++------ prepare_layers/make_current_map.py | 20 +++-- prepare_layers/make_food_current_map.py | 113 +++++++++++------------- prepare_layers/make_restore_map.py | 4 +- requirements.txt | 2 +- utils/binary_maps.py | 2 +- utils/raster_sum.py | 3 +- 9 files changed, 94 insertions(+), 110 deletions(-) diff --git a/deltap/global_code_residents_pixel.py b/deltap/global_code_residents_pixel.py index 572d34e..0dbe88f 100644 --- a/deltap/global_code_residents_pixel.py +++ b/deltap/global_code_residents_pixel.py @@ -2,12 +2,9 @@ import json import math import os -import shutil import sys -from enum import Enum from pathlib import Path -import numpy as np import yirgacheffe as yg # This isn't a hard requirement, but in practice most experiments use 0.25, and the original @@ -35,7 +32,6 @@ def open_layer(filename: Path) -> tuple[yg.YirgacheffeLayer,float]: with open(json_filename, "r", encoding="utf-8") as f: data = json.load(f) total_aoh = data["aoh_total"] - # total_aoh = layer.astype(yg.DataType.Float64).sum() return layer, total_aoh @@ -187,7 +183,8 @@ def global_code_residents_pixel_ae( # In general Yirgacheffe can infer the behaviour needed for area intersections based on # operator, but in this instance we want to force the caclulation to take place for the # union of the areas involved. - layers = [x for x in [current_breeding, scenario_breeding, current_non_breeding, scenario_non_breeding] if isinstance(x, yg.YirgacheffeLayer)] + src_layers = [current_breeding, scenario_breeding, current_non_breeding, scenario_non_breeding] + layers = [x for x in src_layers if isinstance(x, yg.YirgacheffeLayer)] union = yg.layers.RasterLayer.find_union(layers) for layer in layers: layer.set_window_for_union(union) @@ -228,8 +225,8 @@ def exponent_type(value: str): return value try: f = float(value) - except ValueError: - raise argparse.ArgumentTypeError(f"invalid exponent: {value!r}") + except ValueError as exc: + raise argparse.ArgumentTypeError(f"invalid exponent: {value!r}") from exc if f not in FLOAT_EXPONENTS: raise argparse.ArgumentTypeError(f"numeric exponent must be one of {sorted(FLOAT_EXPONENTS)}, got {f}") return f diff --git a/local/merge_global_habitat.py b/local/merge_global_habitat.py index 0086039..f26b523 100644 --- a/local/merge_global_habitat.py +++ b/local/merge_global_habitat.py @@ -19,7 +19,7 @@ def merge_global_habitat( # In a better world we'd work out which is higher res and make everything in that pixel scale with ( yg.read_raster(local_layer_path) as local_layer, - yg.read_raster_like(global_layer_path, local_layer, yg.ResamplingMethod.Nearest) as global_layer: + yg.read_raster_like(global_layer_path, local_layer, yg.ResamplingMethod.Nearest) as global_layer, ): local_layer.set_window_for_union(global_layer.area) cleared = local_layer.nan_to_num() diff --git a/prepare_layers/build_gaez_hyde.py b/prepare_layers/build_gaez_hyde.py index 4812fb7..06f1842 100644 --- a/prepare_layers/build_gaez_hyde.py +++ b/prepare_layers/build_gaez_hyde.py @@ -1,14 +1,10 @@ import argparse -import math import os -import tempfile from pathlib import Path import yirgacheffe as yg from snakemake_argparse_bridge import snakemake_compatible # type: ignore -from make_area_map import make_area_map - DISAGG_CUTOFF = yg.constant(0.95) def build_gaez_hyde( @@ -23,34 +19,29 @@ def build_gaez_hyde( assert gaez.map_projection == hyde.map_projection projection = gaez.map_projection - with tempfile.TemporaryDirectory() as tmpdir: - area_raster = Path(tmpdir) / "area.tif" - make_area_map(math.fabs(projection.ystep), area_raster) - - with yg.read_narrow_raster(area_raster) as area: - - portional_hyde = (hyde.nan_to_num() * 1000000) / area - portional_gaez = gaez / 100.0 + with yg.area_raster(projection) as area: + portional_hyde = (hyde.nan_to_num() * 1000000) / area + portional_gaez = gaez / 100.0 - # where gaez and hyde disagree (sum greater than disagg cutoff), scale down - uncapped_total = portional_gaez + portional_hyde - # NaNs stop warnings about divide by zero - uncapped_total_with_nan = yg.where(uncapped_total == 0.0, float("nan"), uncapped_total) + # where gaez and hyde disagree (sum greater than disagg cutoff), scale down + uncapped_total = portional_gaez + portional_hyde + # NaNs stop warnings about divide by zero + uncapped_total_with_nan = yg.where(uncapped_total == 0.0, float("nan"), uncapped_total) - # calculate ag-perc scalars - total = yg.where( - uncapped_total_with_nan >= DISAGG_CUTOFF, - DISAGG_CUTOFF - (yg.constant(1) / yg.exp(uncapped_total_with_nan * 2)), - uncapped_total_with_nan, - ) + # calculate ag-perc scalars + total = yg.where( + uncapped_total_with_nan >= DISAGG_CUTOFF, + DISAGG_CUTOFF - (yg.constant(1) / yg.exp(uncapped_total_with_nan * 2)), + uncapped_total_with_nan, + ) - gaez_ratio = portional_gaez / uncapped_total_with_nan - gaez_values = total * gaez_ratio - gaez_values.to_geotiff(output_dir_path / "crop.tif") + gaez_ratio = portional_gaez / uncapped_total_with_nan + gaez_values = total * gaez_ratio + gaez_values.to_geotiff(output_dir_path / "crop.tif") - hyde_ratio = portional_hyde / uncapped_total_with_nan - hyde_values = total * hyde_ratio - hyde_values.to_geotiff(output_dir_path / "pasture.tif") + hyde_ratio = portional_hyde / uncapped_total_with_nan + hyde_values = total * hyde_ratio + hyde_values.to_geotiff(output_dir_path / "pasture.tif") @snakemake_compatible(mapping={ "gaez_path": "input.gaez_raster", diff --git a/prepare_layers/make_current_map.py b/prepare_layers/make_current_map.py index 517c167..a650f61 100644 --- a/prepare_layers/make_current_map.py +++ b/prepare_layers/make_current_map.py @@ -1,6 +1,8 @@ import argparse import itertools +import math import os +from contextlib import nullcontext from pathlib import Path from multiprocessing import set_start_method from typing import Dict, List, Optional @@ -35,12 +37,12 @@ def make_current_maps( update_masks_path: Optional[Path], crosswalk_path: Path, output_dir_path: Path, - concurrency: Optional[int], + parallelism: Optional[int], show_progress: bool, sentinel_path: Path | None, ) -> None: os.makedirs(output_dir_path, exist_ok=True) - print(f"Using {concurrency} workers") + print(f"Using {parallelism} workers") if update_masks_path is not None: update_masks = [ @@ -68,16 +70,18 @@ def make_current_maps( vals = current_map.unique() for lcc in vals: + # Seems there are some NaN values in Jung + if math.isnan(lcc): + continue print(f"Processing {lcc}...") per_class = current_map == lcc cast_per_class = per_class.astype(yg.DataType.Float32) - with alive_bar(manual=True) as bar: + ctx = alive_bar(manual=True, title=str(lcc)) if show_progress else nullcontext() + with ctx as bar: cast_per_class.to_geotiff( output_dir_path / f"lcc_{int(lcc)}.tif", callback=bar, - parallelism=concurrency, - nodata=0.0, - sparse=True, + parallelism=parallelism, ) # This script generates a bunch of rasters, but snakemake needs one @@ -133,7 +137,7 @@ def main() -> None: help='Number of concurrent threads to use for calculation.', required=False, default=None, - dest='concurrency', + dest='parallelism', ) parser.add_argument( '-p', @@ -158,7 +162,7 @@ def main() -> None: args.update_masks_path, args.crosswalk_path, args.output_dir_path, - args.concurrency, + args.parallelism, args.show_progress, args.sentinel_path, ) diff --git a/prepare_layers/make_food_current_map.py b/prepare_layers/make_food_current_map.py index 2f51f33..0de7bc8 100644 --- a/prepare_layers/make_food_current_map.py +++ b/prepare_layers/make_food_current_map.py @@ -12,7 +12,6 @@ import numpy as np import yirgacheffe as yg -from yirgacheffe.layers import RasterLayer from snakemake_argparse_bridge import snakemake_compatible # type: ignore gdal.SetCacheMax(4 * 1024 * 1024 * 1024) @@ -72,8 +71,7 @@ def balance_crop_and_pasture_differences( if src_cells == 0: if crop_diff > 0: raise ValueError(f"not cells in pasture {src_raster.sum()}, but pasture diff is -ve {pasture_diff}") - else: - raise ValueError(f"not cells in crop {src_raster.sum()}, but crop diff is -ve {crop_diff}") + raise ValueError(f"not cells in crop {src_raster.sum()}, but crop diff is -ve {crop_diff}") src_coverage = src_raster.sum() / total_cells @@ -156,13 +154,13 @@ def add_land_cover( lcc_data[eligible_mask & (lcc_data > 0)] -= per_cell_addition def process_tile( - current_maps: dict[int,yg.layers.RasterLayer], - pnv: yg.layers.RasterLayer, + current_maps: dict[int,yg.YirgacheffeLayer], + pnv: yg.YirgacheffeLayer, tile: TileInfo, ) -> dict[int,np.ndarray]: lcc_data_map = { - lcc: current_map.nan_to_num().read_array(tile.x_position, tile.y_position, tile.width, tile.height) + lcc: current_map.read_array(tile.x_position, tile.y_position, tile.width, tile.height) for lcc, current_map in current_maps.items() } @@ -173,8 +171,6 @@ def process_tile( assert current.map_projection == pnv.map_projection assert current.area == pnv.area - pnv_data = None - if not np.isnan(tile.crop_target): crop_data = lcc_data_map[CROP_CODE] crop_diff = tile.crop_target - (crop_data.sum() / crop_data.size) @@ -203,13 +199,14 @@ def process_tile( ] diffs.sort(key=lambda a: a[0]) + pnv_data = None for diff_value, habitat_code in diffs: if diff_value == 0 or np.isnan(diff_value): continue if diff_value < 0: - if pnv is None: - pnv = pnv.read_array(tile.x_position, tile.y_position, tile.width, tile.height) + if pnv_data is None: + pnv_data = pnv.read_array(tile.x_position, tile.y_position, tile.width, tile.height) remove_land_cover(habitat_code, diff_value, pnv_data, lcc_data_map) else: add_land_cover(habitat_code, diff_value, lcc_data_map) @@ -220,23 +217,23 @@ def process_tile_concurrently( current_lvl1_path: Path, pnv_path: Path, input_queue: Queue, - result_queue: Queue, + result_queues: dict[int,Queue], ) -> None: current_maps = { int(filename.stem.split('_')[1]): yg.read_raster(filename) for filename in current_lvl1_path.glob("lcc_*.tif") } reference_layer = next(iter(current_maps.values())) - with yg.read_raster_like(pnv_path, reference_layer, yg.ResamplingMethod.Nearest) as pnv: + with yg.read_raster(pnv_path) as pnv: pnv.set_window_for_intersection(reference_layer.area) while True: tile : TileInfo | None = input_queue.get() if tile is None: break - res = process_tile(current_maps, pnv, tile) - result_queue.put((tile, {lcc: data.tobytes() for lcc, data in res.items() })) - - result_queue.put(None) + for lcc, data in res.items(): + result_queues[lcc].put((tile, data.tobytes())) + for queue in result_queues.values(): + queue.put(None) def build_tile_list( current_lvl1_path: Path, @@ -245,8 +242,8 @@ def build_tile_list( ) -> list[TileInfo]: tiles = [] - with yg.read_rasters(current_lvl1_path.glob("*.tif")) as current: - current_dimensions = current.window.xsize, current.window.ysize + with yg.read_raster(next(current_lvl1_path.glob("*.tif"))) as example: + current_dimensions = example.window.xsize, example.window.ysize with ( yg.read_raster(crop_adjustment_path) as crop, yg.read_raster(pasture_adjustment_path) as pasture, @@ -277,34 +274,25 @@ def build_tile_list( return tiles def assemble_map( + lcc: int, current_lvl1_path: Path, output_path: Path, result_queue: Queue, sentinal_count: int, ) -> None: os.makedirs(output_path, exist_ok=True) - - original_filenames = list(current_lvl1_path.glob("lcc_*.tif")) - current_maps = { - int(filename.stem.split('_')[1]): yg.read_raster(filename).nan_to_num() for filename in original_filenames - } - new_maps = {} - with yg.read_raster(original_filenames[0]) as example: - for filename in original_filenames: - lcc = int(filename.stem.split('_')[1]) - outputname = output_path / f"lcc_{lcc}.tif" - new_maps[lcc] = RasterLayer.empty_raster_layer_like( - example, - filename=outputname, - nodata=0.0, - threads=16, - sparse=True, - ) - dtype = example.read_array(0, 0, 1, 1).dtype + with yg.read_raster(current_lvl1_path / f"lcc_{lcc}.tif") as current_map: + new_map = yg.layers.RasterLayer.empty_raster_layer_like( + current_map, + filename=output_path / f"lcc_{lcc}.tif", + threads=16, + ) + dtype = current_map.read_array(0, 0, 1, 1).dtype + band = new_map._dataset.GetRasterBand(1) # pylint: disable=W0212 count = 0 while True: - result : tuple[TileInfo,bytearray | None] | None = result_queue.get() + result : tuple[TileInfo,bytearray] | None = result_queue.get() if result is None: sentinal_count -= 1 if sentinal_count == 0: @@ -313,20 +301,11 @@ def assemble_map( count += 1 tile, rawdata = result - if rawdata is None: - for lcc in current_maps.keys(): - data = current_maps[lcc].read_array(tile.x_position, tile.y_position, tile.width, tile.height) - band = new_maps[lcc]._dataset.GetRasterBand(1) - band.WriteArray(data, tile.x_position, tile.y_position) - else: - for lcc in current_maps.keys(): - n = np.frombuffer(rawdata[lcc], dtype=dtype) - data = np.reshape(n, (tile.height, tile.width)) - band = new_maps[lcc]._dataset.GetRasterBand(1) - band.WriteArray(data, tile.x_position, tile.y_position) + n = np.frombuffer(rawdata, dtype=dtype) + data = np.reshape(n, (tile.height, tile.width)) + band.WriteArray(data, tile.x_position, tile.y_position) if count % 1000 == 0: - print(f"assembled {count} tiles") - + print(f"{lcc}: assembled {count} tiles") def pipeline_source( current_lvl1_path: Path, @@ -346,6 +325,11 @@ def pipeline_source( for _ in range(sentinal_count): source_queue.put(None) + +def get_lcc_list(current_lvl1_path: Path) -> list[int]: + rasters = current_lvl1_path.glob("*.tif") + return [int(x.stem.split('_')[1]) for x in rasters] + def make_food_current_map( current_lvl1_path: Path, pnv_path: Path, @@ -361,14 +345,28 @@ def make_food_current_map( os.makedirs(output_path.parent, exist_ok=True) - source_queue = multiprocessing.Queue(maxsize=1000) - result_queue = multiprocessing.Queue(maxsize=1000) + lcc_list = get_lcc_list(current_lvl1_path) + result_queues: dict[int,multiprocessing.queues.Queue] = { lcc: multiprocessing.Queue(maxsize=10) for lcc in lcc_list} + + assembly_processes = [ + Process(target=assemble_map, args=( + lcc, + current_lvl1_path, + output_path, + queue, + processes_count, + )) for lcc, queue in result_queues.items() + ] + for assembly_worker in assembly_processes: + assembly_worker.start() + + source_queue: multiprocessing.queues.Queue = multiprocessing.Queue(maxsize=1000) workers = [Process(target=process_tile_concurrently, args=( current_lvl1_path, pnv_path, source_queue, - result_queue, + result_queues, )) for _ in range(processes_count)] for worker_process in workers: worker_process.start() @@ -382,14 +380,7 @@ def make_food_current_map( )) source_worker.start() - assemble_map( - current_lvl1_path, - output_path, - result_queue, - processes_count, - ) - - processes = workers + processes = workers + assembly_processes processes.append(source_worker) while processes: candidates = [x for x in processes if not x.is_alive()] diff --git a/prepare_layers/make_restore_map.py b/prepare_layers/make_restore_map.py index 57450cc..a273b5f 100644 --- a/prepare_layers/make_restore_map.py +++ b/prepare_layers/make_restore_map.py @@ -49,7 +49,9 @@ def make_restore_map( replacement_total = yg.sum(replacement_maps) # all the ones we expect to be left with, but not the ones we're removing - current_raster_filenames = [path for path in current_dir_path.glob("*.tif") if path not in map_replacement_filenames] + current_raster_filenames = [ + path for path in current_dir_path.glob("*.tif") if path not in map_replacement_filenames + ] # Read the PNV as the same scale as the other maps with yg.read_raster_like(pnv_path, replacement_maps[0], yg.ResamplingMethod.Nearest) as pnv: diff --git a/requirements.txt b/requirements.txt index 4b1d48b..eed4e1c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,7 +14,7 @@ rasterio requests alive-progress matplotlib -yirgacheffe +yirgacheffe>=1.12.7 aoh>=2.0.5 gdal[numpy] git+https://github.com/quantifyearth/iucn_modlib.git diff --git a/utils/binary_maps.py b/utils/binary_maps.py index 2899484..a79ee51 100644 --- a/utils/binary_maps.py +++ b/utils/binary_maps.py @@ -19,7 +19,7 @@ def binary_maps( with yg.read_raster(map_path, band=index+1) as inputmap: binary_map = yg.where(inputmap != 0, inputmap / yg.abs(inputmap), 0) binary_map.astype(yg.DataType.Int16).to_geotiff( - filename.parent / f"{filename.stem}_{layername}_binary.tif", + output_path.parent / f"{output_path.stem}_{layername}_binary.tif", parallelism=parallelism, ) diff --git a/utils/raster_sum.py b/utils/raster_sum.py index 40b0592..d375863 100644 --- a/utils/raster_sum.py +++ b/utils/raster_sum.py @@ -9,8 +9,7 @@ from multiprocessing import Manager, Process, Queue, cpu_count from typing import Optional -from yirgacheffe.layers import RasterLayer # type: ignore -from yirgacheffe.operators import DataType +import yirgacheffe as yg def worker( compress: bool, From 9087be8bc94694fb0d5bff1ef7df8ccae20edac9 Mon Sep 17 00:00:00 2001 From: Michael Dales Date: Thu, 26 Feb 2026 15:11:42 +0000 Subject: [PATCH 10/27] Update generate food script --- scripts/generate_food_map.sh | 134 +++++++++++++++++++++-------------- 1 file changed, 82 insertions(+), 52 deletions(-) diff --git a/scripts/generate_food_map.sh b/scripts/generate_food_map.sh index 9d93a88..ea8cd27 100755 --- a/scripts/generate_food_map.sh +++ b/scripts/generate_food_map.sh @@ -8,6 +8,26 @@ set -e set -x + +# We know we use two Go tools, so add go/bin to our path as in slurm world they're likely +# to be installed locally +export PATH="${PATH}":"${HOME}"/go/bin +if ! hash reclaimer 2>/dev/null; then + echo "Please ensure reclaimer is available" + exit 1 +fi + +# Detect if we're running under SLURM +if [[ -n "${SLURM_JOB_ID}" ]]; then + # Slurm users will probably need to customise this + # shellcheck disable=SC1091 + source "${HOME}"/venvs/life/bin/activate + cd "${HOME}"/dev/life + PROCESS_COUNT="${SLURM_JOB_CPUS_PER_NODE}" +else + PROCESS_COUNT=$(nproc --all) +fi + if [ -z "${DATADIR}" ]; then echo "Please specify $DATADIR" exit 1 @@ -18,79 +38,89 @@ if [ -z "${VIRTUAL_ENV}" ]; then exit 1 fi -if [ ! -f "${DATADIR}"/habitat/current_raw.tif ]; then - echo "LIFE Level 1 current map required" - exit 1 +if [ ! -f "${DATADIR}"/100m/jung_current/.sentinel ]; then + if [ ! -f "${DATADIR}"/habitat/jung_l2_raw.tif ]; then + reclaimer zenodo --zenodo_id 4058819 \ + --filename iucn_habitatclassification_composite_lvl2_ver004.zip \ + --extract \ + --output "${DATADIR}"/habitat/jung_l2_raw.tif + fi + + if [ ! -d "${DATADIR}"/habitat/lvl2_changemasks_ver004 ]; then + reclaimer zenodo --zenodo_id 4058819 \ + --filename lvl2_changemasks_ver004.zip \ + --extract \ + --output "${DATADIR}"/habitat/ + fi + + if [ ! -f "${DATADIR}"/100m/jung_current/.sentinel ]; then + python3 ./prepare_layers/make_current_map.py --jung_l2 "${DATADIR}"/habitat/jung_l2_raw.tif \ + --update_masks "${DATADIR}"/habitat/lvl2_changemasks_ver004 \ + --crosswalk "${DATADIR}"/crosswalk.csv \ + --output "${DATADIR}"/100m/jung_current \ + --sentinel "${DATADIR}"/100m/jung_current/.sentinel \ + -j "${PROCESS_COUNT}" + fi fi if [ ! -f "${DATADIR}"/habitat/pnv_raw.tif ]; then - echo "Jung PNV map required" - exit 1 + reclaimer zenodo --zenodo_id 4038749 \ + --filename pnv_lvl1_004.zip \ + --extract \ + --output "${DATADIR}"/habitat/pnv_raw.tif fi -if [ ! -d "${DATADIR}"/food ]; then - mkdir -p "${DATADIR}"/food +if [ ! -f "${DATADIR}"/habitat/pnv_100m.tif ]; then + # In theory we don't need to do this, as Yirgacheffe can rescale dynamically + # but in practice it's faster if we just do this once like this, at the cost + # of some extra storage requirements + gdal -tr 0.000898315284120 -0.000898315284120 \ + -r near \ + -tap \ + -multi -wo NUM_THREADS=ALL_CPUS \ + -co COMPRESS=LZW -co NUM_THREADS=ALL_CPUS \ + "${DATADIR}"/habitat/pnv_raw.tif + "${DATADIR}"/habitat/pnv_100m.tif fi # Get GAEZ data -if [ ! -f "${DATADIR}"/food/GLCSv11_02_5m.tif ]; then - if [ ! -f "${DATADIR}"/food/LR.zip ]; then - curl -o "${DATADIR}"/food/LR.zip https://s3.eu-west-1.amazonaws.com/data.gaezdev.aws.fao.org/LR.zip +if [ ! -f "${DATADIR}"/habitat/GLCSv11_02_5m.tif ]; then + if [ ! -f "${DATADIR}"/habitat/LR.zip ]; then + curl -o "${DATADIR}"/habitat/LR.zip https://s3.eu-west-1.amazonaws.com/data.gaezdev.aws.fao.org/LR.zip fi - unzip -j "${DATADIR}"/food/LR.zip LR/lco/GLCSv11_02_5m.tif -d "${DATADIR}"/food/ + unzip -j "${DATADIR}"/habitat/LR.zip LR/lco/GLCSv11_02_5m.tif -d "${DATADIR}"/habitat/ fi # Get HYDE 3.2 data -if [ ! -f "${DATADIR}"/food/modified_grazing2017AD.asc ]; then - if [ ! -f "${DATADIR}"/food/baseline.zip ]; then - curl -o "${DATADIR}"/food/baseline.zip "https://geo.public.data.uu.nl/vault-hyde/HYDE%203.2%5B1710494848%5D/original/baseline.zip" +if [ ! -f "${DATADIR}"/habitat/modified_grazing2017AD.asc ]; then + if [ ! -f "${DATADIR}"/habitat/baseline.zip ]; then + curl -o "${DATADIR}"/habitat/baseline.zip "https://geo.public.data.uu.nl/vault-hyde/HYDE%203.2%5B1710494848%5D/original/baseline.zip" fi - if [ ! -f "${DATADIR}"/food/2017AD_lu.zip ]; then - unzip -j "${DATADIR}"/food/baseline.zip baseline/zip/2017AD_lu.zip -d "${DATADIR}"/food/ + if [ ! -f "${DATADIR}"/habitat/2017AD_lu.zip ]; then + unzip -j "${DATADIR}"/habitat/baseline.zip baseline/zip/2017AD_lu.zip -d "${DATADIR}"/habitat/ fi - if [ ! -f "${DATADIR}"/food/grazing2017AD.asc ]; then - unzip -j "${DATADIR}"/food/2017AD_lu.zip grazing2017AD.asc -d "${DATADIR}"/food/ + if [ ! -f "${DATADIR}"/habitat/grazing2017AD.asc ]; then + unzip -j "${DATADIR}"/habitat/2017AD_lu.zip grazing2017AD.asc -d "${DATADIR}"/habitat/ fi # The pixel scale in the two files have different rounding despite covering the same area # and so this makes them align. - sed "s/0.0833333/0.08333333333333333/" "${DATADIR}"/food/grazing2017AD.asc > "${DATADIR}"/food/modified_grazing2017AD.asc + sed "s/0.0833333/0.08333333333333333/" "${DATADIR}"/habitat/grazing2017AD.asc > "${DATADIR}"/habitat/modified_grazing2017AD.asc fi -if [ ! -f "${DATADIR}"/food/modified_grazing2017AD.prj ]; then - echo 'GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]' > "${DATADIR}"/food/modified_grazing2017AD.prj +if [ ! -f "${DATADIR}"/habitat/modified_grazing2017AD.prj ]; then + echo 'GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]' > "${DATADIR}"/habitat/modified_grazing2017AD.prj fi -# We need rescaled versions of the current data -if [ ! -d "${DATADIR}"/food/current_layers ]; then - aoh-habitat-process --habitat "${DATADIR}"/habitat/current_raw.tif \ - --scale 0.08333333333333333 \ - --output "${DATADIR}"/food/current_layers/ +if [ ! -f "${DATADIR}"/habitat/crop.tif ] || [ ! -f "${DATADIR}"/habitat/pasture.tif ]; then + python3 ./prepare_layers/build_gaez_hyde.py --gaez "${DATADIR}"/habitat/GLCSv11_02_5m.tif \ + --hyde "${DATADIR}"/habitat/modified_grazing2017AD.asc \ + --output "${DATADIR}"/habitat/ fi -# Combine GAEZ and HYDE data -if [ ! -f "${DATADIR}"/food/crop.tif ] || [ ! -f "${DATADIR}"/food/pasture.tif ]; then - python3 ./prepare_layers/build_gaez_hyde.py --gaez "${DATADIR}"/food/GLCSv11_02_5m.tif \ - --hyde "${DATADIR}"/food/modified_grazing2017AD.asc \ - --output "${DATADIR}"/food/ -fi - -if [ ! -f "${DATADIR}"/food/crop_diff.tif ]; then - python3 ./utils/raster_diff.py --raster_a "${DATADIR}"/food/crop.tif \ - --raster_b "${DATADIR}"/food/current_layers/lcc_1401.tif \ - --output "${DATADIR}"/food/crop_diff.tif -fi - -if [ ! -f "${DATADIR}"/food/pasture_diff.tif ]; then - python3 ./utils/raster_diff.py --raster_a "${DATADIR}"/food/pasture.tif \ - --raster_b "${DATADIR}"/food/current_layers/lcc_1402.tif \ - --output "${DATADIR}"/food/pasture_diff.tif -fi - -if [ ! -f "${DATADIR}"/food/current_raw.tif ]; then - python3 ./prepare_layers/make_food_current_map.py --current_lvl1 "${DATADIR}"/habitat/current_raw.tif \ - --pnv "${DATADIR}"/habitat/pnv_raw.tif \ - --crop_diff "${DATADIR}"/food/crop_diff.tif \ - --pasture_diff "${DATADIR}"/food/pasture_diff.tif \ - --seed 42 \ - --output "${DATADIR}"/food/current_raw.tif +if [ ! -f "${DATADIR}"/habitat/current_raw.tif ]; then + python3 ./prepare_layers/make_food_current_map.py --current_lvl1 "${DATADIR}"/100m/jung_current \ + --pnv "${DATADIR}"/habitat/pnv_100m.tif \ + --crop "${DATADIR}"/habitat/crop.tif \ + --pasture "${DATADIR}"/habitat/pasture.tif \ + --output "${DATADIR}"/100m/current fi From 29bd93037c285b2ff91eeaa5d2ca40eaf4242bdf Mon Sep 17 00:00:00 2001 From: Michael Dales Date: Thu, 19 Mar 2026 11:23:11 +0000 Subject: [PATCH 11/27] End to end run complete --- deltap/delta_p_scaled.py | 57 +++++++-------- local/merge_global_habitat.py | 16 ++++- prepare_layers/make_diff_map.py | 123 ++++++-------------------------- scripts/run.sh | 15 ++-- utils/raster_sum.py | 114 ++--------------------------- 5 files changed, 80 insertions(+), 245 deletions(-) diff --git a/deltap/delta_p_scaled.py b/deltap/delta_p_scaled.py index 3de0383..bf981ba 100644 --- a/deltap/delta_p_scaled.py +++ b/deltap/delta_p_scaled.py @@ -11,7 +11,7 @@ def delta_p_scaled_area( input_path: Path, diff_area_map_path: Path, - totals_path: Path, + species_totals_path: Path, output_path: Path, ): os.makedirs(output_path.parent, exist_ok=True) @@ -22,46 +22,39 @@ def delta_p_scaled_area( if not per_taxa: sys.exit(f"Failed to find any per-taxa maps in {input_path}") - area_restore = yg.read_raster(diff_area_map_path) + species_total_counts = pd.read_csv(species_totals_path) - total_counts = pd.read_csv(totals_path) + diff_area = yg.read_raster(diff_area_map_path) + diff_area_rescaled = yg.where(diff_area < SCALE, float('nan'), diff_area / SCALE) - area_restore_filter = yg.where(area_restore < SCALE, float('nan'), area_restore / SCALE) - - with yg.layers.RasterLayer.empty_raster_layer_like( - area_restore, - filename=output_path, - nodata=float('nan'), - bands=len(per_taxa) + 1 - ) as result: + # Process all species in total + total_species_count = int(species_total_counts[species_total_counts.taxa=="all"]["count"].values[0]) + summed_layer = yg.sum(per_taxa) + all_scaled_filtered_layer = yg.where( + diff_area_rescaled != 0, + ((summed_layer / diff_area_rescaled) * -1.0) / total_species_count, + float('nan') + ) - species_count = int(total_counts[total_counts.taxa=="all"]["count"].values[0]) + bands = [all_scaled_filtered_layer] + labels = ["all"] - result._dataset.GetRasterBand(1).SetDescription("all") # pylint: disable=W0212 - summed_layer = per_taxa[0] - for layer in per_taxa[1:]: - summed_layer = summed_layer + layer + # Now per taxa + for inlayer in per_taxa: + # get the taxa from the filename + _, name = os.path.split(inlayer.name) + taxa = name[:-4] + labels.append(taxa) + taxa_species_count = int(species_total_counts[species_total_counts.taxa==taxa]["count"].values[0]) scaled_filtered_layer = yg.where( - area_restore_filter != 0, - ((summed_layer / area_restore_filter) * -1.0) / species_count, + diff_area_rescaled != 0, + ((inlayer / diff_area_rescaled) * -1.0) / taxa_species_count, float('nan') ) - scaled_filtered_layer.parallel_save(result, band=1) - - for idx, inlayer in enumerate(per_taxa): - assert inlayer.name - _, name = os.path.split(inlayer.name) - taxa = name[:-4] - species_count = int(total_counts[total_counts.taxa==taxa]["count"].values[0]) - result._dataset.GetRasterBand(idx + 2).SetDescription(taxa) # pylint: disable=W0212 - scaled_filtered_layer = yg.where( - area_restore_filter != 0, - ((inlayer / area_restore_filter) * -1.0) / species_count, - float('nan') - ) - scaled_filtered_layer.parallel_save(result, band=idx + 2) + bands.append(scaled_filtered_layer) + yg.to_geotiff(output_path, bands, labels, nodata=float('nan')) def main() -> None: parser = argparse.ArgumentParser(description="Scale final results for publication.") diff --git a/local/merge_global_habitat.py b/local/merge_global_habitat.py index f26b523..e830dfa 100644 --- a/local/merge_global_habitat.py +++ b/local/merge_global_habitat.py @@ -6,14 +6,17 @@ """ import argparse +from contextlib import nullcontext from pathlib import Path import yirgacheffe as yg +from alive_progress import alive_bar # type: ignore def merge_global_habitat( global_layer_path: Path, local_layer_path: Path, output_layer_path: Path, + show_progress: bool, ) -> None: # Note, we assume naively the local data is higher resolution than the global layer for now # In a better world we'd work out which is higher res and make everything in that pixel scale @@ -24,7 +27,9 @@ def merge_global_habitat( local_layer.set_window_for_union(global_layer.area) cleared = local_layer.nan_to_num() combined = yg.where(cleared != 0, local_layer, global_layer) - combined.to_geotiff(output_layer_path, parallelism=True) + ctx = alive_bar(manual=True, title=str(lcc)) if show_progress else nullcontext() + with ctx as bar: + combined.to_geotiff(output_layer_path, callback=bar, parallelism=True) def main() -> None: parser = argparse.ArgumentParser() @@ -49,12 +54,21 @@ def main() -> None: dest="output_layer_path", help="Result combined raster path", ) + parser.add_argument( + '-p', + help="Show progress indicator", + default=False, + required=False, + action='store_true', + dest='show_progress', + ) args = parser.parse_args() merge_global_habitat( args.global_layer_path, args.local_layer_path, args.output_layer_path, + args.show_progress, ) if __name__ == "__main__": diff --git a/prepare_layers/make_diff_map.py b/prepare_layers/make_diff_map.py index 6624880..623e591 100644 --- a/prepare_layers/make_diff_map.py +++ b/prepare_layers/make_diff_map.py @@ -1,125 +1,51 @@ import argparse -import shutil -import tempfile from pathlib import Path -from typing import Optional -from alive_progress import alive_bar -from osgeo import gdal -from yirgacheffe.layers import RasterLayer, UniformAreaLayer -from yirgacheffe.operators import DataType +import yirgacheffe as yg -gdal.SetCacheMax(512 * 1024 * 1024) +POSSIBLE_HABITAT_CLASSES = [100, 200, 300, 400, 500, 600, 700, 800, 900, + 1000, 1100, 1200, 1300, 1400, 1401, 1402, 1403, 1404, + 1405, 1406, 1500, 1600, 1800] def make_diff_map( current_path: Path, scenario_path: Path, - area_path: Path, - pixel_scale: float, - target_projection: Optional[str], output_path: Path, - concurrency: Optional[int], + parallelism: None | int, show_progress: bool, ) -> None: - with tempfile.TemporaryDirectory() as tmpdir: - tmpdir_path = Path(tmpdir) - raw_map_filename = tmpdir_path / "raw.tif" - print("comparing:") - with RasterLayer.layer_from_file(current_path) as current: - with RasterLayer.layer_from_file(scenario_path) as scenario: - - diff_map = current != scenario - - gdal.SetCacheMax(512 * 1024 * 1024) - with RasterLayer.empty_raster_layer_like( - diff_map, - filename=raw_map_filename, - datatype=DataType.Float32, - threads=16 - ) as result: - if show_progress: - with alive_bar(manual=True) as bar: - diff_map.parallel_save(result, callback=bar, parallelism=concurrency) - else: - diff_map.parallel_save(result, parallelism=concurrency) - - gdal.SetCacheMax(256 * 1024 * 1024 * 1024) - rescaled_map_filename = tmpdir_path / "rescaled.tif" - print("reprojecting:") - with alive_bar(manual=True) as bar: - gdal.Warp(rescaled_map_filename, raw_map_filename, options=gdal.WarpOptions( - creationOptions=['COMPRESS=LZW', 'NUM_THREADS=16'], - multithread=True, - dstSRS=target_projection, - outputType=gdal.GDT_Float32, - xRes=pixel_scale, - yRes=0.0 - pixel_scale, - resampleAlg="average", - workingType=gdal.GDT_Float32, - callback=lambda a, _b, _c: bar(a), # pylint: disable=E1102 - )) - - print("scaling result:") - with UniformAreaLayer.layer_from_file(area_path) as area_map: - with RasterLayer.layer_from_file(rescaled_map_filename) as diff_map: - - area_adjusted_map_filename = tmpdir_path / "final.tif" - final = area_map * diff_map - gdal.SetCacheMax(512 * 1024 * 1024) - - with RasterLayer.empty_raster_layer_like( - final, - filename=area_adjusted_map_filename, - datatype=gdal.GDT_Float32, - threads=16 - ) as result: - if show_progress: - with alive_bar(manual=True) as bar: - final.parallel_save(result, callback=bar, parallelism=concurrency) - else: - final.parallel_save(result, parallelism=concurrency) - - shutil.move(area_adjusted_map_filename, output_path) + layers = [] + for habitat in POSSIBLE_HABITAT_CLASSES: + current_habitat_filename = current_path / f"lcc_{habitat}.tif" + scenario_habitat_filename = scenario_path / f"lcc_{habitat}.tif" + if not current_habitat_filename.exists() and not scenario_habitat_filename.exists(): + continue + current_layer = yg.read_raster(current_habitat_filename) if current_habitat_filename.exists() else yg.constant(0.0) + scenario_layer = yg.read_raster(scenario_habitat_filename) if scenario_habitat_filename.exists() else yg.constant(0.0) + habitat_diff = current_layer != scenario_layer + layers.append(habitat_diff) + diff = yg.any(layers) + area = yg.area_raster(diff.map_projection) + scaled_diff = diff * area + scaled_diff.to_geotiff(output_path, parallelism=parallelism) def main() -> None: parser = argparse.ArgumentParser(description="Generate an area difference map.") parser.add_argument( '--current', type=Path, - help='Path of current map', + help='Path of current fractional maps', required=True, dest='current_path', ) parser.add_argument( '--scenario', type=Path, - help='Path of the scenario map', + help='Path of the scenario fractional maps', required=True, dest='scenario_path', ) - parser.add_argument( - '--area', - type=Path, - help='Path of the area per pixel map', - required=True, - dest='area_path', - ) - parser.add_argument( - "--scale", - type=float, - required=True, - dest="pixel_scale", - help="Output pixel scale value." - ) - parser.add_argument( - '--projection', - type=str, - help="Target projection", - required=False, - dest="target_projection", - default=None - ) parser.add_argument( '--output', type=Path, @@ -133,7 +59,7 @@ def main() -> None: help='Number of concurrent threads to use for calculation.', required=False, default=None, - dest='concurrency', + dest='parallelism', ) parser.add_argument( '-p', @@ -148,11 +74,8 @@ def main() -> None: make_diff_map( args.current_path, args.scenario_path, - args.area_path, - args.pixel_scale, - args.target_projection, args.results_path, - args.concurrency, + args.parallelism, args.show_progress, ) diff --git a/scripts/run.sh b/scripts/run.sh index 5557289..88a9fcb 100755 --- a/scripts/run.sh +++ b/scripts/run.sh @@ -69,7 +69,7 @@ if [ ! -f "${DATADIR}"/crosswalk.csv ]; then fi # Get habitat layer and prepare for use -if [ ! -f "${DATADIR}"/habitat/current_raw.tif ]; then +if [ ! -f "${DATADIR}"/5_arc_seconds/current/.sentinel ]; then if [ ! -f "${DATADIR}"/habitat/jung_l2_raw.tif ]; then reclaimer zenodo --zenodo_id 4058819 \ --filename iucn_habitatclassification_composite_lvl2_ver004.zip \ @@ -203,11 +203,16 @@ if check_scenario "restore"; then touch "${DATADIR}"/5_arc_seconds/restore/.sentinel fi + if [ ! -f "${DATADIR}"/100m/restore_diff_area.tif ]; then + python3 ./prepare_layers/make_diff_map.py --current "${DATADIR}"/100m/current \ + --scenario "${DATADIR}"/100m/restore \ + --output "${DATADIR}"/100m/restore_diff_area.tif + fi - if [ ! -f "${DATADIR}"/habitat/restore_diff_area.tif ]; then - python3 ./prepare_layers/make_diff_map.py --current "${DATADIR}"/5_arc_seconds/current \ - --scenario "${DATADIR}"/5_arc_seconds/restore \ - --output "${DATADIR}"/habitat/restore_diff_area.tif + if [ ! -f "${DATADIR}"/5_arc_seconds/restore_diff_area.tif ]; then + # In theory we should just use gdalwarp here, but it kept running out of + # memory, so we use our own Yirgacheffe version that just does the same thing in small + # chunks. fi fi # diff --git a/utils/raster_sum.py b/utils/raster_sum.py index d375863..ad615c6 100644 --- a/utils/raster_sum.py +++ b/utils/raster_sum.py @@ -1,115 +1,24 @@ import argparse import os -import queue import resource -import sys -import tempfile -import time from pathlib import Path -from multiprocessing import Manager, Process, Queue, cpu_count -from typing import Optional import yirgacheffe as yg - -def worker( - compress: bool, - filename: str, - result_dir: Path, - input_queue: Queue, -) -> None: - output_tif = result_dir / filename - - merged_result: Optional[RasterLayer] = None - - while True: - try: - path: Path = input_queue.get_nowait() - except queue.Empty: - break - - with RasterLayer.layer_from_file(path) as partial_raster: - if merged_result is None: - merged_result = RasterLayer.empty_raster_layer_like(partial_raster, datatype=DataType.Float64) - cleaned_raster = partial_raster.nan_to_num(nan=0.0) - cleaned_raster.save(merged_result) - else: - calc = merged_result + partial_raster.nan_to_num(nan=0.0) - temp = RasterLayer.empty_raster_layer_like(calc, datatype=DataType.Float64) - calc.save(temp) - merged_result = temp - - if merged_result: - with RasterLayer.empty_raster_layer_like(merged_result, filename=output_tif, compress=compress) as final: - merged_result.save(final) - input_queue.put(output_tif) +from alive_progress import alive_bar # type: ignore def raster_sum( images_dir: Path, output_filename: Path, - processes_count: int ) -> None: - print(f"process count set to {processes_count}") - + # We'll be opening all the deltap files per taxa in one, so we'll need to raise + # the number of files we can open. _, max_fd_limit = resource.getrlimit(resource.RLIMIT_NOFILE) resource.setrlimit(resource.RLIMIT_NOFILE, (max_fd_limit, max_fd_limit)) - print(f"Set fd limit to {max_fd_limit}") - - os.makedirs(output_filename.parent, exist_ok=True) - - files = list(images_dir.glob("*.tif")) - if not files: - sys.exit(f"No files in {images_dir}, aborting") - print(f"Found {len(files)} images to process") - - with tempfile.TemporaryDirectory() as tempdir: - with Manager() as manager: - source_queue = manager.Queue() - - for file in files: - source_queue.put(file) - - workers = [Process(target=worker, args=( - False, - f"{index}.tif", - Path(tempdir), - source_queue - )) for index in range(processes_count)] - for worker_process in workers: - worker_process.start() - - processes = workers - while processes: - candidates = [x for x in processes if not x.is_alive()] - for candidate in candidates: - candidate.join() - if candidate.exitcode: - for victim in processes: - victim.kill() - sys.exit(candidate.exitcode) - processes.remove(candidate) - time.sleep(0.1) - - # here we should have now a set of images in tempdir to merge - single_worker = Process(target=worker, args=( - True, - output_filename.name, - output_filename.parent, - source_queue - )) - single_worker.start() - - processes = [single_worker] - while processes: - candidates = [x for x in processes if not x.is_alive()] - for candidate in candidates: - candidate.join() - if candidate.exitcode: - for victim in processes: - victim.kill() - sys.exit(candidate.exitcode) - processes.remove(candidate) - time.sleep(1) + layers = [yg.read_raster(x) for x in images_dir.glob("*.tif")] + total = yg.sum(layers) + with alive_bar(manual=True) as bar: + total.to_geotiff(output_filename, callback=bar, parallelism=True) def main() -> None: parser = argparse.ArgumentParser(description="Sums many rasters into a single raster") @@ -127,20 +36,11 @@ def main() -> None: dest="output_filename", help="Destination geotiff file for results." ) - parser.add_argument( - "-j", - type=int, - required=False, - default=round(cpu_count() / 2), - dest="processes_count", - help="Number of concurrent threads to use." - ) args = parser.parse_args() raster_sum( args.rasters_directory, args.output_filename, - args.processes_count ) if __name__ == "__main__": From 0a21f912e03b4ea18795bade12f62e70bfd3604a Mon Sep 17 00:00:00 2001 From: Michael Dales Date: Fri, 20 Mar 2026 09:54:44 +0000 Subject: [PATCH 12/27] Fix linter/mypy warnings --- deltap/global_code_residents_pixel.py | 9 +- local/merge_global_habitat.py | 2 +- prepare_layers/make_diff_map.py | 25 ++++-- prepare_layers/make_food_current_map.py | 4 +- tests/test_food_layer.py | 105 ++++++++++++++++-------- utils/footprint_of_humanity.py | 2 +- utils/raster_sum.py | 6 +- 7 files changed, 105 insertions(+), 48 deletions(-) diff --git a/deltap/global_code_residents_pixel.py b/deltap/global_code_residents_pixel.py index 0dbe88f..5d5f5da 100644 --- a/deltap/global_code_residents_pixel.py +++ b/deltap/global_code_residents_pixel.py @@ -50,7 +50,7 @@ def calc_persistence_value( def process_delta_p( current: yg.YirgacheffeLayer, - scenario: yg.YirgacheffeLayer, + scenario: yg.YirgacheffeLayer | float, current_aoh: float, historic_aoh: float, exponent: str | float @@ -88,6 +88,7 @@ def global_code_residents_pixel_ae( sys.exit(f"Failed to open current layer {current_aohs_path / filename}") try: + scenario: yg.YirgacheffeLayer | float scenario, _ = open_layer(scenario_aohs_path / filename) except FileNotFoundError: # If there is a current but now scenario file it's because the species went extinct under the scenario @@ -143,8 +144,8 @@ def global_code_residents_pixel_ae( non_breeding_scenario_path = scenario_aohs_path / nonbreeding_filename breeding_scenario_path = scenario_aohs_path / breeding_filename else: - non_breeding_scenario_path = "nan" - breeding_scenario_path = "nan" + non_breeding_scenario_path = Path("nan") # nan path is the sentinel from csv inputs + breeding_scenario_path = Path("nan") try: current_breeding, current_aoh_breeding = open_layer(current_aohs_path / breeding_filename) @@ -155,11 +156,13 @@ def global_code_residents_pixel_ae( except FileNotFoundError: sys.exit(f"Failed to open current non breeding {current_aohs_path / nonbreeding_filename}") try: + scenario_breeding: yg.YirgacheffeLayer | float scenario_breeding, _ = open_layer(breeding_scenario_path) except FileNotFoundError: # If there is a current but now scenario file it's because the species went extinct under the scenario scenario_breeding = 0.0 try: + scenario_non_breeding: yg.YirgacheffeLayer | float scenario_non_breeding, _ = open_layer(non_breeding_scenario_path) except FileNotFoundError: # If there is a current but now scenario file it's because the species went extinct under the scenario diff --git a/local/merge_global_habitat.py b/local/merge_global_habitat.py index e830dfa..aa4d660 100644 --- a/local/merge_global_habitat.py +++ b/local/merge_global_habitat.py @@ -27,7 +27,7 @@ def merge_global_habitat( local_layer.set_window_for_union(global_layer.area) cleared = local_layer.nan_to_num() combined = yg.where(cleared != 0, local_layer, global_layer) - ctx = alive_bar(manual=True, title=str(lcc)) if show_progress else nullcontext() + ctx = alive_bar(manual=True) if show_progress else nullcontext() with ctx as bar: combined.to_geotiff(output_layer_path, callback=bar, parallelism=True) diff --git a/prepare_layers/make_diff_map.py b/prepare_layers/make_diff_map.py index 623e591..e9fe7c6 100644 --- a/prepare_layers/make_diff_map.py +++ b/prepare_layers/make_diff_map.py @@ -1,7 +1,10 @@ import argparse +from contextlib import nullcontext from pathlib import Path import yirgacheffe as yg +from alive_progress import alive_bar # type: ignore +from snakemake_argparse_bridge import snakemake_compatible # type: ignore POSSIBLE_HABITAT_CLASSES = [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1400, 1401, 1402, 1403, 1404, @@ -21,15 +24,27 @@ def make_diff_map( if not current_habitat_filename.exists() and not scenario_habitat_filename.exists(): continue - current_layer = yg.read_raster(current_habitat_filename) if current_habitat_filename.exists() else yg.constant(0.0) - scenario_layer = yg.read_raster(scenario_habitat_filename) if scenario_habitat_filename.exists() else yg.constant(0.0) + current_layer = yg.read_raster(current_habitat_filename) if current_habitat_filename.exists() \ + else yg.constant(0.0) + scenario_layer = yg.read_raster(scenario_habitat_filename) if scenario_habitat_filename.exists() \ + else yg.constant(0.0) habitat_diff = current_layer != scenario_layer layers.append(habitat_diff) + diff = yg.any(layers) area = yg.area_raster(diff.map_projection) scaled_diff = diff * area - scaled_diff.to_geotiff(output_path, parallelism=parallelism) + ctx = alive_bar(manual=True) if show_progress else nullcontext() + with ctx as bar: + scaled_diff.to_geotiff(output_path, callback=bar, parallelism=parallelism) + +@snakemake_compatible(mapping={ + "current_path": "input.current", + "scenario_path": "params.scenario", + "parallelism": "threads", + "output_path": "output[0]", +}) def main() -> None: parser = argparse.ArgumentParser(description="Generate an area difference map.") parser.add_argument( @@ -51,7 +66,7 @@ def main() -> None: type=Path, help='Path where final map should be stored', required=True, - dest='results_path', + dest='output_path', ) parser.add_argument( '-j', @@ -74,7 +89,7 @@ def main() -> None: make_diff_map( args.current_path, args.scenario_path, - args.results_path, + args.output_path, args.parallelism, args.show_progress, ) diff --git a/prepare_layers/make_food_current_map.py b/prepare_layers/make_food_current_map.py index 0de7bc8..8036c06 100644 --- a/prepare_layers/make_food_current_map.py +++ b/prepare_layers/make_food_current_map.py @@ -346,7 +346,9 @@ def make_food_current_map( os.makedirs(output_path.parent, exist_ok=True) lcc_list = get_lcc_list(current_lvl1_path) - result_queues: dict[int,multiprocessing.queues.Queue] = { lcc: multiprocessing.Queue(maxsize=10) for lcc in lcc_list} + result_queues: dict[int,multiprocessing.queues.Queue] = { + lcc: multiprocessing.Queue(maxsize=10) for lcc in lcc_list + } assembly_processes = [ Process(target=assemble_map, args=( diff --git a/tests/test_food_layer.py b/tests/test_food_layer.py index d62e024..a44481e 100644 --- a/tests/test_food_layer.py +++ b/tests/test_food_layer.py @@ -1,20 +1,27 @@ import numpy as np import pytest -import yirgacheffe as yg -from prepare_layers.make_food_current_map import TileInfo, process_tile, balance_crop_and_pasture_differences, \ +from prepare_layers.make_food_current_map import balance_crop_and_pasture_differences, \ CROP_CODE, PASTURE_CODE, remove_land_cover, add_land_cover -@pytest.mark.parametrize("initial_crop_diff,initial_pasture_diff,expected_crop_diff,expected_pasture_diff", [ - (0.0, 0.0, 0.0, 0.0), - (0.8, 0.0, 0.8, 0.0), - (-0.8, 0.0, -0.8, 0.0), - (0.0, 0.8, 0.0, 0.8), - (0.0, -0.8, 0.0, -0.8), - (0.4, 0.2, 0.4, 0.2), - (-0.4, -0.2, -0.4, -0.2), -]) +@pytest.mark.parametrize( + [ + "initial_crop_diff", + "initial_pasture_diff", + "expected_crop_diff", + "expected_pasture_diff" + ], + [ + (0.0, 0.0, 0.0, 0.0), + (0.8, 0.0, 0.8, 0.0), + (-0.8, 0.0, -0.8, 0.0), + (0.0, 0.8, 0.0, 0.8), + (0.0, -0.8, 0.0, -0.8), + (0.4, 0.2, 0.4, 0.2), + (-0.4, -0.2, -0.4, -0.2), + ] +) def test_balance_not_needed( initial_crop_diff: float, initial_pasture_diff: float, @@ -24,30 +31,35 @@ def test_balance_not_needed( result_crop_diff, result_pasture_diff = balance_crop_and_pasture_differences( initial_crop_diff, initial_pasture_diff, - dict(), + {}, ) assert expected_crop_diff == result_crop_diff assert expected_pasture_diff == result_pasture_diff -def test_one_sided_balance() -> None: +def test_brokend_balance() -> None: + # Testing that we spot if we've said to remove land where there isn't any lcc_data_map = { CROP_CODE: np.zeros((10, 10)), PASTURE_CODE: np.zeros((10, 10)), } - result_crop_diff, result_pasture_diff = balance_crop_and_pasture_differences( - 0.5, - -0.1, - lcc_data_map, - ) - # assert expected_crop_diff == result_crop_diff - # assert expected_pasture_diff == result_pasture_diff - # assert (lcc_data_map[CROP_CODE] == crop_cell_value).all() - # assert (lcc_data_map[PASTURE_CODE] == pasture_cell_value).all() + with pytest.raises(ValueError): + _ = balance_crop_and_pasture_differences( + 0.5, + -0.1, + lcc_data_map, + ) @pytest.mark.parametrize( - "initial_crop_diff,initial_pasture_diff,expected_crop_diff,expected_pasture_diff,crop_cell_value,pasture_cell_value", + [ + "initial_crop_diff", + "initial_pasture_diff", + "expected_crop_diff", + "expected_pasture_diff", + "crop_cell_value", + "pasture_cell_value" + ], [ (0.25, -0.5, 0.0, -0.25, 0.25, 0.75), ] @@ -77,7 +89,14 @@ def test_simple_balance_transfer( @pytest.mark.parametrize( - "initial_crop_diff,initial_pasture_diff,expected_crop_diff,expected_pasture_diff,crop_cell_value,pasture_cell_value", + [ + "initial_crop_diff", + "initial_pasture_diff", + "expected_crop_diff", + "expected_pasture_diff", + "crop_cell_value", + "pasture_cell_value" + ], [ (0.25, -0.5, 0.0, -0.25, 0.5, 0.5), (0.5, -0.25, 0.25, 0.0, 0.5, 0.5), @@ -110,9 +129,16 @@ def test_simple_half_balance_transfer( assert (expected_pasture_map == lcc_data_map[PASTURE_CODE]).all() -@pytest.mark.parametrize("crop_diff,expected_crop_cell,expected_other_cell", [ - (-0.5, 0.5, 0.5), -]) +@pytest.mark.parametrize( + [ + "crop_diff", + "expected_crop_cell", + "expected_other_cell", + ], + [ + (-0.5, 0.5, 0.5), + ] +) def test_remove_land_simple( crop_diff: float, expected_crop_cell: float, @@ -138,13 +164,22 @@ def test_remove_land_simple( assert (expected_other_map == lcc_data_map[1]).all() -@pytest.mark.parametrize("crop_diff,pnv_value,expected_crop_cell,expected_other_1_cell,expected_other_2_cell", [ - (-0.5, 1, 0.0, 1.0, 0.0), - (-0.75, 1, 0.0, 1.0, 0.0), # too much - (-0.25, 1, 0.5, 0.5, 0.0), - (-0.5, 2, 0.0, 0.0, 1.0), - (-0.25, 2, 0.5, 0.0, 0.5), -]) +@pytest.mark.parametrize( + [ + "crop_diff", + "pnv_value", + "expected_crop_cell", + "expected_other_1_cell", + "expected_other_2_cell", + ], + [ + (-0.5, 1, 0.0, 1.0, 0.0), + (-0.75, 1, 0.0, 1.0, 0.0), # too much + (-0.25, 1, 0.5, 0.5, 0.0), + (-0.5, 2, 0.0, 0.0, 1.0), + (-0.25, 2, 0.5, 0.0, 0.5), + ] +) def test_remove_land_less_simple( crop_diff: float, pnv_value: int, @@ -189,7 +224,6 @@ def test_add_land_simple( 1: np.ones((10, 10)), CROP_CODE: np.zeros((10, 10)), } - pnv_data = np.full((10, 10), 1) add_land_cover( CROP_CODE, @@ -217,7 +251,6 @@ def test_add_land_avoid_excluded( PASTURE_CODE: np.array([[i % 2] * 10 for i in range(10)]).astype(float), CROP_CODE: np.zeros((10, 10)), } - pnv_data = np.full((10, 10), 1) add_land_cover( CROP_CODE, diff --git a/utils/footprint_of_humanity.py b/utils/footprint_of_humanity.py index 7399833..77d60a6 100644 --- a/utils/footprint_of_humanity.py +++ b/utils/footprint_of_humanity.py @@ -24,7 +24,7 @@ def absolute( merge1 = pd.merge(current_cleaned, pnv_cleaned, on=["id_no", "season"], suffixes=["_current", "_pnv"]) merge2 = pd.merge(merge1, scenario, on=["id_no", "season"], how="left", indicator=True) merged = merge2[["id_no", "season", "class_name", "aoh_total_current", "aoh_total_pnv", "aoh_total", "_merge"]].copy() - merged.aoh_total = merged.aoh_total.fillna(0) + merged["aoh_total"] = merged.aoh_total.fillna(0) merged.rename(columns={"aoh_total": "aoh_total_scenario"}, inplace=True) merged["current_persistence"] = (merged.aoh_total_current / merged.aoh_total_pnv) ** 0.25 diff --git a/utils/raster_sum.py b/utils/raster_sum.py index ad615c6..2b57ef6 100644 --- a/utils/raster_sum.py +++ b/utils/raster_sum.py @@ -1,10 +1,10 @@ import argparse -import os import resource from pathlib import Path import yirgacheffe as yg from alive_progress import alive_bar # type: ignore +from snakemake_argparse_bridge import snakemake_compatible # type: ignore def raster_sum( images_dir: Path, @@ -20,6 +20,10 @@ def raster_sum( with alive_bar(manual=True) as bar: total.to_geotiff(output_filename, callback=bar, parallelism=True) +@snakemake_compatible(mapping={ + "rasters_directory": "input.rasters", + "output_filename": "output[0]", +}) def main() -> None: parser = argparse.ArgumentParser(description="Sums many rasters into a single raster") parser.add_argument( From 7fefa8b426c8e6adfb56002a3a8eafbc4c616370 Mon Sep 17 00:00:00 2001 From: Michael Dales Date: Fri, 17 Apr 2026 09:44:31 +0100 Subject: [PATCH 13/27] WiP snakemake improvements --- README.md | 84 +++++++++++++- config/config.yaml | 18 +++ workflow/Snakefile | 158 ++++++++++++++++++++++++--- workflow/rules/habitat.smk | 217 +++++++++++++++++++++++++++++++------ workflow/rules/prepare.smk | 126 ++++++++++++++++----- 5 files changed, 525 insertions(+), 78 deletions(-) diff --git a/README.md b/README.md index bffd518..bf6c58d 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,89 @@ This repository implements the LIFE extinction risk methodology as published in ## Running the code -The methodology is explained in more detail in [method.md](method.md), but there is also a script set up to just do an entire run of the pipeline in `./scripts/run.sh`. +The methodology is explained in more detail in [method.md](method.md). The pipeline is implemented as a [Snakemake](https://snakemake.readthedocs.io/) workflow in `workflow/`. + +### Prerequisites + +The following tools must be available on `PATH`: + +- `reclaimer` — downloads inputs from Zenodo ([quantifyearth/reclaimer](https://github.com/quantifyearth/reclaimer)) +- `aoh-calc`, `aoh-habitat-process`, `aoh-collate-data`, `aoh-species-richness`, `aoh-endemism`, `aoh-validate-prevalence` — from the `aoh` Python package +- `gdalwarp` — part of GDAL + +The following environment variables must be set: + +| Variable | Purpose | +|----------|---------| +| `DATADIR` | Root directory for all input/output data | +| `DB_HOST`, `DB_NAME`, `DB_USER`, `DB_PASSWORD` | PostgreSQL credentials for IUCN species database | + +For GBIF occurrence validation (optional): + +| Variable | Purpose | +|----------|---------| +| `GBIF_USERNAME`, `GBIF_EMAIL`, `GBIF_PASSWORD` | GBIF account credentials | + +### Running the pipeline + +```bash +# Full pipeline: delta P maps for all scenarios + model validation +snakemake --cores N all + +# Delta P maps only, without validation +snakemake --cores N life + +# Species richness and endemism maps (not included in 'all') +snakemake --cores N summaries + +# Model validation only +snakemake --cores N validation + +# GBIF occurrence validation (expensive — downloads gigabytes of data) +snakemake --cores N occurrence_validation +``` + +Other useful targets for incremental runs: + +```bash +snakemake --cores N prepare # Download and warp all base layers +snakemake --cores N species_data # Extract species data from the database +snakemake --cores N aohs # Generate all AOH rasters and collated CSVs +``` + +### Precious layers + +The habitat and elevation layers are expensive to generate and are treated as **precious**: they will only be rebuilt if their output files are explicitly deleted, even if upstream inputs or code have changed. This applies to: + +- `habitat_layers/current/` — food-enhanced current habitat map at ~1.67km resolution +- `habitat_layers/{scenario}/` — per-scenario fractional habitat maps +- `habitat_layers/pnv/` — potential natural vegetation fractional maps +- `elevation-max.tif`, `elevation-min.tif` — warped elevation layers + +To force a rebuild of any of these, delete the relevant `.sentinel` file (or the output file itself) before re-running. + +### Configuration + +Pipeline parameters are in `config/config.yaml`: + +- `taxa` — taxonomic classes to process (default: AMPHIBIA, AVES, MAMMALIA, REPTILIA) +- `scenarios` — habitat change scenarios to evaluate (default: arable, restore) +- `curve` — extinction curve exponent for delta P (default: `"0.25"`) +- `pixel_scale` — output raster resolution in degrees (default: ~5 arc-seconds) + +### Inspecting the pipeline graph + +The rule graph can be generated with: + +```bash +snakemake --forceall --rulegraph 2>/dev/null | dot -Tsvg > graph.svg +``` + +Note that rules whose inputs are determined by the species-extraction checkpoint (`generate_aoh`, `calculate_delta_p`) will not appear in the static rule graph — this is a known snakemake limitation with checkpoint-based expansion. To see the complete job graph after species data has been extracted: + +```bash +snakemake --forceall --dag 2>/dev/null | unflatten -f -l 5 | dot -Tsvg > dag.svg +``` ## Credits diff --git a/config/config.yaml b/config/config.yaml index 99ec52c..c260e71 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -15,6 +15,17 @@ pixel_scale: 0.016666666666667 hyde_projection: 'GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]' hyde_pixel_scale: 0.08333333333333333 +# Scenarios to run +scenarios: + - arable + - restore + +# Z-curve value for delta P calculation +curve: "0.25" + +# Projection for species data extraction +projection: "EPSG:4326" + # Zenodo configuration for downloading raw habitat zenodo: jung_habitat: @@ -26,3 +37,10 @@ zenodo: jung_pnv: zenodo_id: 4038749 filename: "pnv_lvl1_004.zip" + elevation: + zenodo_id: 5719984 + filename: "dem-100m-esri54017.tif" + +# Optional inputs +optional_inputs: + species_overrides: "overrides.csv" diff --git a/workflow/Snakefile b/workflow/Snakefile index c7e72ad..0780aef 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -1,14 +1,15 @@ # LIFE Pipeline - Snakemake Workflow # =================================== # -# This workflow implements the LIFE biodivesity metric methodology. -# It processes species data, generates Area of Habitat (AoH) rasters, and -# produces the different scenario output rasters +# This workflow implements the LIFE biodiversity metric methodology. +# It processes species data, generates Area of Habitat (AoH) rasters, and +# produces the different scenario output rasters. # # Usage: # snakemake --cores N all -# snakemake --cores N validation # snakemake --cores N life +# snakemake --cores N summaries +# snakemake --cores N validation # # Environment variables required: # DATADIR - Directory for input/output data @@ -26,34 +27,159 @@ os.environ["OGR_GEOJSON_MAX_OBJ_SIZE"] = "0" # Source directory (where this repo lives) SRCDIR = Path(workflow.basedir).parent - # Load configuration configfile: SRCDIR / "config/config.yaml" - # Data directory from environment variable DATADIR = Path(os.environ.get("DATADIR", "/data")) # Taxa list from config TAXA = config["taxa"] +# Scenarios from config +SCENARIOS = config["scenarios"] + +# Z-curve value (single value, not a wildcard) +CURVE = config["curve"] + +# All scenarios used for AOH generation +ALL_AOH_SCENARIOS = SCENARIOS + ["current", "pnv"] + + +# ============================================================================= +# Utility Functions for Checkpoint-Based Expansion +# ============================================================================= +# These must be defined before the includes so they are available when +# the rule modules are parsed. + + +def get_species_era(scenario): + """Return the species era (current/historic) for a given scenario.""" + return "historic" if scenario == "pnv" else "current" + + +def get_species_ids_for_taxa_scenario(wildcards): + """ + Return species IDs for a given taxa and scenario. + Uses the extract_species_data checkpoint output. + """ + era = get_species_era(wildcards.scenario) + checkpoint_output = checkpoints.extract_species_data.get(taxa=wildcards.taxa).output + if era == "current": + report_path = Path(str(checkpoint_output.current_report)) + else: + report_path = Path(str(checkpoint_output.historic_report)) + geojson_dir = report_path.parent + return [p.stem for p in sorted(geojson_dir.glob("*.geojson"))] + + +def get_all_aoh_metadata_for_taxa_scenario(wildcards): + """ + Return paths to all AOH metadata JSON files for a taxa and scenario. + """ + species_ids = get_species_ids_for_taxa_scenario(wildcards) + return [ + DATADIR / "aohs" / wildcards.scenario / wildcards.taxa / f"aoh_{sid}.json" + for sid in species_ids + ] + + +def get_species_ids_for_delta_p(wildcards): + """Return species IDs for delta P (always from current/ era).""" + checkpoint_output = checkpoints.extract_species_data.get(taxa=wildcards.taxa).output + geojson_dir = Path(str(checkpoint_output.current_report)).parent + return [p.stem for p in sorted(geojson_dir.glob("*.geojson"))] + + +def get_delta_p_sentinels_for_taxa_scenario(wildcards): + """Return all delta P sentinel files for a taxa and scenario.""" + species_ids = get_species_ids_for_delta_p(wildcards) + return [ + DATADIR / "deltap" / wildcards.scenario / CURVE / wildcards.taxa / f".{sid}.done" + for sid in species_ids + ] + # Include rule modules include: "rules/prepare.smk" include: "rules/habitat.smk" -include: "rules/aoh.smk" include: "rules/scenario.smk" include: "rules/species.smk" +include: "rules/aoh.smk" +include: "rules/delta_p.smk" +include: "rules/validation.smk" + + +# ============================================================================= +# Target Rules +# ============================================================================= + + +rule all: + """Default target: full LIFE pipeline + model validation.""" + input: + expand( + str(DATADIR / "deltap_final" / "scaled_{scenario}_{curve}.tif"), + scenario=SCENARIOS, + curve=[CURVE], + ), + DATADIR / "validation" / "model_validation.csv", + +rule life: + """Target: run the LIFE pipeline without validation.""" + input: + expand( + str(DATADIR / "deltap_final" / "scaled_{scenario}_{curve}.tif"), + scenario=SCENARIOS, + curve=[CURVE], + ), + + +rule summaries: + """ + Target: generate species richness and endemism maps. + NOT included in 'all' — run explicitly with: snakemake summaries + """ + input: + DATADIR / "summaries" / "species_richness.tif", + DATADIR / "summaries" / "endemism.tif", + + +rule aohs: + """Target: generate all AOH rasters and collated summary CSVs.""" + input: + expand( + str(DATADIR / "aohs" / "{scenario}" / "{taxa}" / ".complete"), + scenario=ALL_AOH_SCENARIOS, + taxa=TAXA, + ), + expand( + str(DATADIR / "aohs" / "{scenario}.csv"), + scenario=ALL_AOH_SCENARIOS, + ), rule species_data: -""" -Target: extract species data from PostgreSQL. -""" -input: - DATADIR / "species-info" / "AVES" / ".birdlife_applied", - expand( - str(DATADIR / "species-info" / "{taxa}" / SCENARIO / "report.csv"), - taxa=TAXA, - ), + """Target: extract species data from PostgreSQL.""" + input: + expand( + str(DATADIR / "species-info" / "{taxa}" / "current" / "report.csv"), + taxa=TAXA, + ), + + +rule prepare: + """Target: prepare all base layers.""" + input: + DATADIR / "crosswalk.csv", + DATADIR / "habitat_layers" / "current" / ".sentinel", + DATADIR / "habitat_layers" / "pnv" / ".sentinel", + DATADIR / "elevation-max.tif", + DATADIR / "elevation-min.tif", + + +rule validation: + """Target: run model validation.""" + input: + DATADIR / "validation" / "model_validation.csv", diff --git a/workflow/rules/habitat.smk b/workflow/rules/habitat.smk index 35d7f49..d4313f6 100644 --- a/workflow/rules/habitat.smk +++ b/workflow/rules/habitat.smk @@ -1,14 +1,28 @@ -# LIFE Pipeline - Updated habitat map rules -# ========================================== +# LIFE Pipeline - Habitat layer rules +# ===================================== # -# These rules handle combining the Jung habitat map with -# farming data from GAEZ and Hyde to generate a more suitable -# base map. +# Handles the two-phase habitat map generation: +# +# Phase 1 (generate_food_map.sh equivalent): +# - Download GAEZ and HYDE data +# - Combine GAEZ/HYDE into crop/pasture fractional layers +# - Build Jung current map at 100m +# - Rescale PNV to 100m +# - Build food-enhanced current map at 100m (PRECIOUS) +# +# Phase 2 (run.sh equivalent): +# - Warp current 100m → habitat_layers/current/ (PRECIOUS) +# - Process PNV → habitat_layers/pnv/ via aoh-habitat-process (PRECIOUS) + + +import os +from pathlib import Path # ============================================================================= # GAEZ data # ============================================================================= + rule gaez_download: """ Fetch the compressed GAEZ data. @@ -21,6 +35,7 @@ rule gaez_download: DATADIR / "logs" / "download_gaez.log", shell: """ + mkdir -p $(dirname {output.archive}) curl -o {output.archive} \ {params.url} \ 2>&1 | tee {log} @@ -47,14 +62,12 @@ rule gaez_expand: 2>&1 | tee {log} """ - # ============================================================================= # Hyde data # ============================================================================= + rule hyde_download: - """ - Fetch the compressed Hyde data. - """ + """Fetch the compressed HYDE data.""" output: archive=DATADIR / "food" / "baseline.zip" params: @@ -63,6 +76,7 @@ rule hyde_download: DATADIR / "logs" / "download_hyde.log", shell: """ + mkdir -p $(dirname {output.archive}) curl -o {output.archive} \ {params.url} \ 2>&1 | tee {log} @@ -70,15 +84,13 @@ rule hyde_download: rule hyde_expand_land_usage_archive: - """ - Extract the inner land usage archive. - """ + """Extract the inner land usage archive from HYDE.""" + input: + archive=DATADIR / "food" / "baseline.zip" output: inner_hyde=DATADIR / "food" / "2017AD_lu.zip" params: filepath="baseline/zip/2017AD_lu.zip" - input: - archive=DATADIR / "food" / "baseline.zip" log: DATADIR / "logs" / "expand_hyde_1.log", shell: @@ -91,15 +103,13 @@ rule hyde_expand_land_usage_archive: rule hyde_expand_land_usage_raster: - """ - Extract raster from the inner land usage archive. - """ + """Extract raster from the inner HYDE land usage archive.""" + input: + archive=DATADIR / "food" / "2017AD_lu.zip" output: raw_hyde_raster=DATADIR / "food" / "grazing2017AD.asc" params: filepath="grazing2017AD.asc" - input: - archive=DATADIR / "food" / "2017AD_lu.zip" log: DATADIR / "logs" / "expand_hyde_2.log", shell: @@ -113,29 +123,32 @@ rule hyde_expand_land_usage_raster: rule modify_hyde_pixel_scale: """ - Modify the pixel scale value in the Hyde data as it isn't precise enough to match the GAEZ data. + Fix the pixel scale value in the HYDE data — the rounding is not precise + enough to align with the GAEZ data. """ - output: - modified_hyde_raster=DATADIR / "food" / "modified_grazing2017AD.asc" input: raw_hyde_raster=DATADIR / "food" / "grazing2017AD.asc" + output: + modified_hyde_raster=DATADIR / "food" / "modified_grazing2017AD.asc" shell: """ - sed "s/0.0833333/0.08333333333333333/" {input.raw_hyde_raster} > {output.modified_hyde_raster} + sed "s/0.0833333/0.08333333333333333/" \ + {input.raw_hyde_raster} > {output.modified_hyde_raster} """ rule add_hyde_projection_info: """ - The Hyde data ships without a projection specified, so we need to add one so that - the rest of the workflow doesn't complain when we try to mix it in with other projected - raster data. + The HYDE data ships without a projection, so add one so that + the rest of the workflow can mix it with projected raster data. """ output: hyde_projection_file=DATADIR / "food" / "modified_grazing2017AD.prj" + params: + projection=config["hyde_projection"] shell: """ - echo {config["hyde_projection"]} > {output.hyde_projection_file} + echo '{params.projection}' > {output.hyde_projection_file} """ @@ -154,26 +167,158 @@ rule combine_gaez_hyde: crop=DATADIR / "food" / "crop.tif", pasture=DATADIR / "food" / "pasture.tif" params: - output_dir=DATADIR / "food" + updates_dir=DATADIR / "habitat" / "lvl2_changemasks_ver004", + output_dir=DATADIR / "100m" / "jung_current", + output: + sentinel=DATADIR / "100m" / "jung_current" / ".sentinel", + threads: workflow.cores script: - str(SRCDIR / "prepare_layers" / "build_gaez_hyde.py") + str(SRCDIR / "prepare_layers" / "make_current_map.py") + + +# ============================================================================= +# PNV rescaled to 100m +# ============================================================================= + +rule pnv_100m: + """ + Rescale the PNV map to 100m resolution. + Yirgacheffe can rescale dynamically, but pre-scaling is faster at the cost + of some extra disk space. + """ + input: + pnv=DATADIR / "habitat" / "pnv_raw.tif", + output: + pnv_100m=DATADIR / "habitat" / "pnv_100m.tif", + log: + DATADIR / "logs" / "pnv_100m.log", + shell: + """ + gdalwarp \ + -t_srs EPSG:4326 \ + -tr 0.000898315284120 -0.000898315284120 \ + -r near \ + -tap \ + -multi -wo NUM_THREADS=ALL_CPUS \ + -co COMPRESS=LZW -co NUM_THREADS=ALL_CPUS \ + {input.pnv} \ + {output.pnv_100m} \ + 2>&1 | tee {log} + """ # ============================================================================= -# Combine Jung, Hyde, and GAEZ +# Food-enhanced current map at 100m (PRECIOUS) # ============================================================================= rule build_food_map: """ - Pull together all the pieces into a new L1 habitat map at the original Jung scale. + Build the food-enhanced current habitat map at 100m resolution by combining + the Jung current map with GAEZ/HYDE crop and pasture fractions. + + PRECIOUS: Only rebuilds if the sentinel is explicitly deleted. + This is the most expensive step in the pipeline. """ input: + jung=ancient(DATADIR / "100m" / "jung_current" / ".sentinel"), + pnv=ancient(DATADIR / "habitat" / "pnv_100m.tif"), + crop=ancient(DATADIR / "food" / "crop.tif"), + pasture=ancient(DATADIR / "food" / "pasture.tif"), + output: + sentinel=DATADIR / "100m" / "current" / ".sentinel", + params: + jung_dir=DATADIR / "100m" / "jung_current", + output_dir=DATADIR / "100m" / "current", + pnv=DATADIR / "habitat" / "pnv_100m.tif", crop=DATADIR / "food" / "crop.tif", pasture=DATADIR / "food" / "pasture.tif", - pnv=DATADIR / "habitat" / "pnv_raw.tif", - jung=DATADIR / "habitat" / "current_raw.tif", + threads: workflow.cores + log: + DATADIR / "logs" / "build_food_map.log", + shell: + """ + python3 {SRCDIR}/prepare_layers/make_food_current_map.py \ + --current_lvl1 {params.jung_dir} \ + --pnv {params.pnv} \ + --crop {params.crop} \ + --pasture {params.pasture} \ + --output {params.output_dir} \ + -j {threads} \ + 2>&1 | tee {log} + touch {output.sentinel} + """ + + +# ============================================================================= +# Warp current map to habitat_layers resolution (PRECIOUS) +# ============================================================================= + +rule warp_current: + """ + Warp the food-enhanced current map from 100m to the target pixel scale + (5 arc-seconds, ~1.67km at the equator). + + PRECIOUS: Only rebuilds if the sentinel is explicitly deleted. + """ + input: + sentinel=ancient(DATADIR / "100m" / "current" / ".sentinel"), output: - rasters=DATADIR / "food" / "current_raw", + sentinel=DATADIR / "habitat_layers" / "current" / ".sentinel", + params: + input_dir=DATADIR / "100m" / "current", + output_dir=DATADIR / "habitat_layers" / "current", + pixel_scale=config["pixel_scale"], threads: workflow.cores - script: - str(SRCDIR / "prepare_layers" / "make_food_current_map.py") + log: + DATADIR / "logs" / "warp_current.log", + shell: + """ + mkdir -p {params.output_dir} + for d in {params.input_dir}/*.tif; do + basename=$(basename "$d") + gdalwarp \ + -t_srs EPSG:4326 \ + -tr {params.pixel_scale} -{params.pixel_scale} \ + -r average \ + -multi \ + -co COMPRESS=LZW \ + -co NUM_THREADS={threads} \ + -wo NUM_THREADS={threads} \ + "$d" \ + {params.output_dir}/"$basename" \ + 2>&1 + done 2>&1 | tee {log} + touch {output.sentinel} + """ + + +# ============================================================================= +# Process PNV map to habitat_layers (PRECIOUS) +# ============================================================================= + +rule pnv_processed: + """ + Process the PNV map into per-class fractional rasters at the target pixel scale + using aoh-habitat-process. + + PRECIOUS: Only rebuilds if the sentinel is explicitly deleted. + """ + input: + pnv=ancient(DATADIR / "habitat" / "pnv_raw.tif"), + output: + sentinel=DATADIR / "habitat_layers" / "pnv" / ".sentinel", + params: + output_dir=DATADIR / "habitat_layers" / "pnv", + pixel_scale=config["pixel_scale"], + log: + DATADIR / "logs" / "pnv_processed.log", + shell: + """ + set -e + aoh-habitat-process \ + --habitat {input.pnv} \ + --scale {params.pixel_scale} \ + --output {params.output_dir} \ + 2>&1 | tee {log} + touch {output.sentinel} + """ diff --git a/workflow/rules/prepare.smk b/workflow/rules/prepare.smk index e389d27..ac4b2d1 100644 --- a/workflow/rules/prepare.smk +++ b/workflow/rules/prepare.smk @@ -1,5 +1,15 @@ -# LIFE Pipeline - Prepare base layers +# LIFE Pipeline - Prepare base layers # ===================================== +# +# Handles: +# - Crosswalk table generation +# - Jung L2 habitat map download +# - Jung habitat updates download +# - Jung PNV map download +# - Elevation download and warp (precious) + +import os +from pathlib import Path # ============================================================================= @@ -22,7 +32,7 @@ rule convert_crosswalk: rule jung_habitat_map: """ - Fetch the Jung habitat map used as the current map. + Fetch the Jung L2 habitat map from Zenodo. """ output: habitat=DATADIR / "habitat" / "jung_l2_raw.tif" @@ -33,6 +43,7 @@ rule jung_habitat_map: DATADIR / "logs" / "download_habitat.log", shell: """ + mkdir -p $(dirname {output.habitat}) reclaimer zenodo --zenodo_id {params.zenodo_id} \ --filename "{params.filename}" \ --extract \ @@ -40,46 +51,30 @@ rule jung_habitat_map: 2>&1 | tee {log} """ + rule jung_habitat_updates: """ - Fetch the Jung habitat map updates. + Fetch the Jung habitat map update masks from Zenodo. """ output: sentinel=DATADIR / "habitat" / ".downloaded_updates" params: habitat_dir=DATADIR / "habitat", - zenodo_id=config["zenodo"]["jung_habitat"]["zenodo_id"], - filename=config["zenodo"]["jung_habitat"]["filename"], + zenodo_id=config["zenodo"]["jung_habitat_updates"]["zenodo_id"], + filename=config["zenodo"]["jung_habitat_updates"]["filename"], log: - DATADIR / "logs" / "download_habitat_update.log", + DATADIR / "logs" / "download_habitat_updates.log", shell: """ + mkdir -p {params.habitat_dir} reclaimer zenodo --zenodo_id {params.zenodo_id} \ --filename "{params.filename}" \ --extract \ --output {params.habitat_dir} \ 2>&1 | tee {log} - touch {output.sentinel} """ -rule current_raws: - """ - Build the LIFE current map, which is Jung with updates applied - and restricted to L1 to match the PNV map restrictions. - """ - input: - updates_sentinel=DATADIR / "habitat" / ".downloaded_updates", - habitat=DATADIR / "habitat" / "jung_l2_raw.tif", - crosswalk=DATADIR / "crosswalk.csv", - params: - updates_dir=DATADIR / "habitat" / "lvl2_changemasks_ver004", - output_dir=DATADIR / "habitat" / "current_raw", - output: - sentinel=DATADIR / "habitat" / "current_raw" / ".current_raw" - threads: workflow.cores - script: - str(SRCDIR / "prepare_layers" / "make_current_map.py") # ============================================================================= # Jung PNV map @@ -87,7 +82,7 @@ rule current_raws: rule jung_pnv_map: """ - Fetch the Jung PNV map from zenodo. + Fetch the Jung PNV map from Zenodo. """ output: habitat=DATADIR / "habitat" / "pnv_raw.tif" @@ -98,9 +93,90 @@ rule jung_pnv_map: DATADIR / "logs" / "download_pnv.log", shell: """ + mkdir -p $(dirname {output.habitat}) reclaimer zenodo --zenodo_id {params.zenodo_id} \ --filename "{params.filename}" \ --extract \ --output {output.habitat} \ 2>&1 | tee {log} """ + + +# ============================================================================= +# Elevation layers (precious — only rebuild if explicitly deleted) +# ============================================================================= + +rule download_elevation: + """ + Download raw elevation DEM from Zenodo. + """ + output: + elevation=DATADIR / "elevation.tif", + params: + zenodo_id=config["zenodo"]["elevation"]["zenodo_id"], + filename=config["zenodo"]["elevation"]["filename"], + log: + DATADIR / "logs" / "download_elevation.log", + shell: + """ + reclaimer zenodo --zenodo_id {params.zenodo_id} \ + --filename "{params.filename}" \ + --output {output.elevation} \ + 2>&1 | tee {log} + """ + + +rule elevation_max: + """ + Warp elevation to target projection and scale using max resampling. + Precious: only runs if output doesn't exist. + """ + input: + elevation=ancient(DATADIR / "elevation.tif"), + output: + elevation_max=DATADIR / "elevation-max.tif", + params: + pixel_scale=config["pixel_scale"], + threads: workflow.cores + log: + DATADIR / "logs" / "elevation_max.log", + shell: + """ + gdalwarp \ + -t_srs EPSG:4326 \ + -tr {params.pixel_scale} -{params.pixel_scale} \ + -r max \ + -co COMPRESS=LZW \ + -wo NUM_THREADS={threads} \ + {input.elevation} \ + {output.elevation_max} \ + 2>&1 | tee {log} + """ + + +rule elevation_min: + """ + Warp elevation to target projection and scale using min resampling. + Precious: only runs if output doesn't exist. + """ + input: + elevation=ancient(DATADIR / "elevation.tif"), + output: + elevation_min=DATADIR / "elevation-min.tif", + params: + pixel_scale=config["pixel_scale"], + threads: workflow.cores + log: + DATADIR / "logs" / "elevation_min.log", + shell: + """ + gdalwarp \ + -t_srs EPSG:4326 \ + -tr {params.pixel_scale} -{params.pixel_scale} \ + -r min \ + -co COMPRESS=LZW \ + -wo NUM_THREADS={threads} \ + {input.elevation} \ + {output.elevation_min} \ + 2>&1 | tee {log} + """ From 83557c672e7c361cbf54f377c12e9af68fbac9f7 Mon Sep 17 00:00:00 2001 From: Michael Dales Date: Fri, 17 Apr 2026 09:45:46 +0100 Subject: [PATCH 14/27] Add more snakemake files --- workflow/profiles/default/config.yaml | 2 + workflow/rules/aoh.smk | 163 ++++++++++++++++++++ workflow/rules/delta_p.smk | 181 ++++++++++++++++++++++ workflow/rules/scenario.smk | 209 ++++++++++++++++++++++++++ workflow/rules/species.smk | 53 +++++++ workflow/rules/validation.smk | 157 +++++++++++++++++++ 6 files changed, 765 insertions(+) create mode 100644 workflow/profiles/default/config.yaml create mode 100644 workflow/rules/aoh.smk create mode 100644 workflow/rules/delta_p.smk create mode 100644 workflow/rules/scenario.smk create mode 100644 workflow/rules/species.smk create mode 100644 workflow/rules/validation.smk diff --git a/workflow/profiles/default/config.yaml b/workflow/profiles/default/config.yaml new file mode 100644 index 0000000..238b1e0 --- /dev/null +++ b/workflow/profiles/default/config.yaml @@ -0,0 +1,2 @@ +resources: + db_connections: 1 diff --git a/workflow/rules/aoh.smk b/workflow/rules/aoh.smk new file mode 100644 index 0000000..6e60a40 --- /dev/null +++ b/workflow/rules/aoh.smk @@ -0,0 +1,163 @@ +# LIFE Pipeline - Area of Habitat (AoH) Generation Rules +# ======================================================= +# +# Generates AoH rasters for each species across all scenarios. +# Scenarios: current, pnv, and user-defined scenarios (arable, restore, ...) +# +# For current/scenario AOHs: uses species-info/{taxa}/current/ geojsons +# For pnv AOHs: uses species-info/{taxa}/historic/ geojsons +# +# Code-sensitive: rebuilds if the aoh package version changes. + +import os +from pathlib import Path + + +# ============================================================================= +# Version Sentinel for AOH Code +# ============================================================================= + + +rule aoh_version_sentinel: + """ + Track the aoh package version. AOH rules depend on this to trigger + rebuilds when the package updates. + """ + output: + sentinel=DATADIR / ".sentinels" / "aoh_version.txt", + run: + import subprocess + os.makedirs(os.path.dirname(output.sentinel), exist_ok=True) + try: + result = subprocess.run( + ["aoh-calc", "--version"], + capture_output=True, + text=True, + check=True, + ) + aoh_version = result.stdout.strip() + except Exception: + aoh_version = "unknown" + with open(output.sentinel, "w") as f: + f.write(f"aoh: {aoh_version}\n") + + +# ============================================================================= +# Per-Species AOH Generation +# ============================================================================= + + +def aoh_species_inputs(wildcards): + """Return inputs for generate_aoh.""" + era = "historic" if wildcards.scenario == "pnv" else "current" + return { + "species_data": DATADIR + / "species-info" + / wildcards.taxa + / era + / f"{wildcards.species_id}.geojson", + "habitat_sentinel": ancient( + DATADIR / "habitat_layers" / wildcards.scenario / ".sentinel" + ), + "elevation_max": ancient(DATADIR / "elevation-max.tif"), + "elevation_min": ancient(DATADIR / "elevation-min.tif"), + "crosswalk": DATADIR / "crosswalk.csv", + "version_sentinel": DATADIR / ".sentinels" / "aoh_version.txt", + } + + +rule generate_aoh: + """ + Generate Area of Habitat raster for a single species in a given scenario. + + Parallelizable: run with `snakemake --cores N` to process multiple species + concurrently. Uses --force-habitat and --pixel-area flags (no mask). + + For current/scenario AOHs: uses current/ era species data. + For pnv AOHs: uses historic/ era species data. + """ + input: + unpack(aoh_species_inputs), + output: + metadata=DATADIR / "aohs" / "{scenario}" / "{taxa}" / "aoh_{species_id}.json", + params: + habitat_dir=lambda wildcards: DATADIR / "habitat_layers" / wildcards.scenario, + output_dir=lambda wildcards: DATADIR / "aohs" / wildcards.scenario / wildcards.taxa, + log: + DATADIR / "logs" / "aoh" / "{scenario}" / "{taxa}" / "{species_id}.log", + resources: + aoh_slots=1, + shell: + """ + mkdir -p $(dirname {log}) + mkdir -p {params.output_dir} + aoh-calc \ + --fractional_habitats {params.habitat_dir} \ + --elevation-max {input.elevation_max} \ + --elevation-min {input.elevation_min} \ + --crosswalk {input.crosswalk} \ + --speciesdata {input.species_data} \ + --output {params.output_dir} \ + --force-habitat \ + --pixel-area \ + 2>&1 | tee {log} + """ + + +# ============================================================================= +# Per-Taxa AOH Aggregation (Checkpoint) +# ============================================================================= + + +checkpoint aggregate_aohs_per_taxa: + """ + Checkpoint that ensures all AOHs for a taxa/scenario are generated. + Creates a sentinel file when complete. + + This is a checkpoint so downstream rules (delta P) can re-evaluate the DAG + after AOHs are created. + """ + input: + metadata=get_all_aoh_metadata_for_taxa_scenario, + version_sentinel=DATADIR / ".sentinels" / "aoh_version.txt", + output: + sentinel=DATADIR / "aohs" / "{scenario}" / "{taxa}" / ".complete", + shell: + """ + echo "Generated $(echo {input.metadata} | wc -w) AOHs for {wildcards.taxa}/{wildcards.scenario}" + touch {output.sentinel} + """ + + +# ============================================================================= +# Collate AOH Data (per scenario) +# ============================================================================= + + +rule collate_aoh_data: + """ + Collate metadata from all AOH JSON files for a scenario into a single CSV. + + Used by validation (current scenario) and for downstream analysis. + """ + input: + sentinels=lambda wildcards: expand( + str(DATADIR / "aohs" / "{scenario}" / "{taxa}" / ".complete"), + scenario=wildcards.scenario, + taxa=TAXA, + ), + version_sentinel=DATADIR / ".sentinels" / "aoh_version.txt", + output: + collated=DATADIR / "aohs" / "{scenario}.csv", + params: + aoh_results_dir=lambda wildcards: DATADIR / "aohs" / wildcards.scenario, + log: + DATADIR / "logs" / "collate_aoh_{scenario}.log", + shell: + """ + mkdir -p $(dirname {output.collated}) + aoh-collate-data \ + --aoh_results {params.aoh_results_dir} \ + --output {output.collated} \ + 2>&1 | tee {log} + """ diff --git a/workflow/rules/delta_p.smk b/workflow/rules/delta_p.smk new file mode 100644 index 0000000..cae4964 --- /dev/null +++ b/workflow/rules/delta_p.smk @@ -0,0 +1,181 @@ +# LIFE Pipeline - Delta P (Persistence) Rules +# ============================================ +# +# Calculates per-species change in probability of persistence (Delta P) +# for each user-defined scenario, then aggregates to produce final maps. +# +# Pipeline per scenario: +# 1. calculate_delta_p: per species, uses current + scenario + pnv AOHs +# 2. aggregate_delta_p_per_taxa: sentinel that all species are done +# 3. raster_sum_per_taxa: sum per-species delta P values per taxa +# 4. species_totals: count species per taxa for normalisation +# 5. delta_p_scaled: final scaled output map + + +import os +from pathlib import Path + + +# ============================================================================= +# Per-Species Delta P Calculation +# ============================================================================= + + +rule calculate_delta_p: + """ + Calculate the change in probability of persistence for a single species + under a given scenario. + + Uses current, scenario, and PNV (historic) AOH rasters. + RESIDENT species produce one TIF; NONBREEDING species produce one TIF + (which covers the migratory pair); BREEDING species produce no output. + + A sentinel is always created to track completion regardless of season. + """ + input: + current_sentinel=DATADIR / "aohs" / "current" / "{taxa}" / ".complete", + scenario_sentinel=DATADIR / "aohs" / "{scenario}" / "{taxa}" / ".complete", + pnv_sentinel=DATADIR / "aohs" / "pnv" / "{taxa}" / ".complete", + output: + sentinel=DATADIR / "deltap" / "{scenario}" / CURVE / "{taxa}" / ".{species_id}.done", + params: + current_path=lambda wildcards: DATADIR / "aohs" / "current" / wildcards.taxa, + scenario_path=lambda wildcards: DATADIR / "aohs" / wildcards.scenario / wildcards.taxa, + pnv_path=lambda wildcards: DATADIR / "aohs" / "pnv" / wildcards.taxa, + output_path=lambda wildcards: DATADIR / "deltap" / wildcards.scenario / CURVE / wildcards.taxa, + curve=CURVE, + log: + DATADIR / "logs" / "deltap" / "{scenario}" / "{taxa}" / "{species_id}.log", + shell: + """ + mkdir -p $(dirname {log}) + mkdir -p {params.output_path} + taxid=$(echo "{wildcards.species_id}" | cut -d_ -f1) + season=$(echo "{wildcards.species_id}" | cut -d_ -f2) + python3 {SRCDIR}/deltap/global_code_residents_pixel.py \ + --taxid "$taxid" \ + --season "$season" \ + --current_path {params.current_path} \ + --scenario_path {params.scenario_path} \ + --historic_path {params.pnv_path} \ + --output_path {params.output_path} \ + --z {params.curve} \ + 2>&1 | tee {log} + touch {output.sentinel} + """ + + +# ============================================================================= +# Per-Taxa Delta P Aggregation +# ============================================================================= + + +rule aggregate_delta_p_per_taxa: + """ + Wait for all per-species delta P calculations for a taxa to complete. + Creates a sentinel that downstream rules depend on. + """ + input: + sentinels=get_delta_p_sentinels_for_taxa_scenario, + output: + sentinel=DATADIR / "deltap" / "{scenario}" / CURVE / "{taxa}" / ".complete", + shell: + """ + touch {output.sentinel} + """ + + +# ============================================================================= +# Per-Taxa Raster Sum +# ============================================================================= + + +rule raster_sum_per_taxa: + """ + Sum all per-species delta P rasters for a taxa into a single raster. + """ + input: + sentinel=DATADIR / "deltap" / "{scenario}" / CURVE / "{taxa}" / ".complete", + output: + DATADIR / "deltap_sum" / "{scenario}" / CURVE / "{taxa}.tif", + params: + rasters_dir=lambda wildcards: DATADIR / "deltap" / wildcards.scenario / CURVE / wildcards.taxa, + threads: workflow.cores + log: + DATADIR / "logs" / "raster_sum" / "{scenario}" / "{taxa}.log", + shell: + """ + mkdir -p $(dirname {output}) + mkdir -p $(dirname {log}) + python3 {SRCDIR}/utils/raster_sum.py \ + --rasters_directory {params.rasters_dir} \ + --output {output} \ + 2>&1 | tee {log} + """ + + +# ============================================================================= +# Species Totals +# ============================================================================= + + +rule species_totals: + """ + Count the number of species per taxa used in the delta P calculation. + Used by delta_p_scaled for normalisation. + """ + input: + sentinels=expand( + str(DATADIR / "deltap" / "{{scenario}}" / CURVE / "{taxa}" / ".complete"), + taxa=TAXA, + ), + output: + totals=DATADIR / "deltap" / "{scenario}" / CURVE / "totals.csv", + params: + deltaps_dir=lambda wildcards: DATADIR / "deltap" / wildcards.scenario / CURVE, + log: + DATADIR / "logs" / "species_totals_{scenario}.log", + shell: + """ + python3 {SRCDIR}/utils/species_totals.py \ + --deltaps {params.deltaps_dir} \ + --output {output.totals} \ + 2>&1 | tee {log} + """ + + +# ============================================================================= +# Final Scaled Delta P Map +# ============================================================================= + + +rule delta_p_scaled: + """ + Generate the final scaled delta P map for a scenario. + + Combines per-taxa delta P sums with the habitat difference map and + species totals to produce the final normalised LIFE output. + """ + input: + taxa_rasters=expand( + str(DATADIR / "deltap_sum" / "{{scenario}}" / CURVE / "{taxa}.tif"), + taxa=TAXA, + ), + diffmap=DATADIR / "habitat" / "{scenario}_diff_area.tif", + totals=DATADIR / "deltap" / "{scenario}" / CURVE / "totals.csv", + output: + DATADIR / "deltap_final" / f"scaled_{{scenario}}_{CURVE}.tif", + params: + input_dir=lambda wildcards: DATADIR / "deltap_sum" / wildcards.scenario / CURVE, + log: + DATADIR / "logs" / "delta_p_scaled_{scenario}.log", + shell: + """ + mkdir -p $(dirname {output}) + python3 {SRCDIR}/deltap/delta_p_scaled.py \ + --input {params.input_dir} \ + --diffmap {input.diffmap} \ + --totals {input.totals} \ + --output {output} \ + 2>&1 | tee {log} + """ diff --git a/workflow/rules/scenario.smk b/workflow/rules/scenario.smk new file mode 100644 index 0000000..1d0f86e --- /dev/null +++ b/workflow/rules/scenario.smk @@ -0,0 +1,209 @@ +# LIFE Pipeline - Scenario map rules +# ===================================== +# +# Handles generating scenario-specific habitat maps and difference maps: +# +# - Arable: all non-urban land converted to arable +# - Restore: agricultural land restored to PNV +# +# Each scenario: +# 1. Generate 100m scenario map from current map +# 2. Warp to habitat_layers/{scenario}/ at target pixel scale (PRECIOUS) +# 3. Generate diff map comparing current vs scenario + + +import os +from pathlib import Path + + +# ============================================================================= +# Arable scenario +# ============================================================================= + +rule make_arable_map: + """ + Generate the arable scenario map at 100m resolution. + All non-urban land is converted to arable. + """ + input: + current_sentinel=DATADIR / "100m" / "current" / ".sentinel", + output: + sentinel=DATADIR / "100m" / "arable" / ".sentinel", + params: + current_dir=DATADIR / "100m" / "current", + output_dir=DATADIR / "100m" / "arable", + threads: workflow.cores + log: + DATADIR / "logs" / "make_arable_map.log", + shell: + """ + python3 {SRCDIR}/prepare_layers/make_arable_map.py \ + --current {params.current_dir} \ + --output {params.output_dir} \ + -j {threads} \ + 2>&1 | tee {log} + touch {output.sentinel} + """ + + +rule warp_arable: + """ + Warp the arable scenario map from 100m to the target pixel scale. + PRECIOUS: Only rebuilds if the sentinel is explicitly deleted. + """ + input: + sentinel=ancient(DATADIR / "100m" / "arable" / ".sentinel"), + output: + sentinel=DATADIR / "habitat_layers" / "arable" / ".sentinel", + params: + input_dir=DATADIR / "100m" / "arable", + output_dir=DATADIR / "habitat_layers" / "arable", + pixel_scale=config["pixel_scale"], + threads: workflow.cores + log: + DATADIR / "logs" / "warp_arable.log", + shell: + """ + mkdir -p {params.output_dir} + for d in {params.input_dir}/*.tif; do + basename=$(basename "$d") + gdalwarp \ + -t_srs EPSG:4326 \ + -tr {params.pixel_scale} -{params.pixel_scale} \ + -r average \ + -multi \ + -co COMPRESS=LZW \ + -co NUM_THREADS={threads} \ + -wo NUM_THREADS={threads} \ + "$d" \ + {params.output_dir}/"$basename" \ + 2>&1 + done 2>&1 | tee {log} + touch {output.sentinel} + """ + + +rule diff_map_arable: + """ + Generate the area difference map between current and arable habitat layers. + Used by the delta P scaling step. + """ + input: + current_sentinel=DATADIR / "habitat_layers" / "current" / ".sentinel", + arable_sentinel=DATADIR / "habitat_layers" / "arable" / ".sentinel", + output: + DATADIR / "habitat" / "arable_diff_area.tif", + params: + current_dir=DATADIR / "habitat_layers" / "current", + arable_dir=DATADIR / "habitat_layers" / "arable", + threads: workflow.cores + log: + DATADIR / "logs" / "diff_map_arable.log", + shell: + """ + mkdir -p $(dirname {output}) + python3 {SRCDIR}/prepare_layers/make_diff_map.py \ + --current {params.current_dir} \ + --scenario {params.arable_dir} \ + --output {output} \ + -j {threads} \ + 2>&1 | tee {log} + """ + + +# ============================================================================= +# Restore scenario +# ============================================================================= + +rule make_restore_map: + """ + Generate the restore scenario map at 100m resolution. + Agricultural land is restored to PNV habitat. + """ + input: + current_sentinel=DATADIR / "100m" / "current" / ".sentinel", + pnv=DATADIR / "habitat" / "pnv_raw.tif", + crosswalk=DATADIR / "crosswalk.csv", + output: + sentinel=DATADIR / "100m" / "restore" / ".sentinel", + params: + current_dir=DATADIR / "100m" / "current", + output_dir=DATADIR / "100m" / "restore", + threads: workflow.cores + log: + DATADIR / "logs" / "make_restore_map.log", + shell: + """ + python3 {SRCDIR}/prepare_layers/make_restore_map.py \ + --pnv {input.pnv} \ + --current {params.current_dir} \ + --crosswalk {input.crosswalk} \ + --output {params.output_dir} \ + -j {threads} \ + 2>&1 | tee {log} + touch {output.sentinel} + """ + + +rule warp_restore: + """ + Warp the restore scenario map from 100m to the target pixel scale. + PRECIOUS: Only rebuilds if the sentinel is explicitly deleted. + """ + input: + sentinel=ancient(DATADIR / "100m" / "restore" / ".sentinel"), + output: + sentinel=DATADIR / "habitat_layers" / "restore" / ".sentinel", + params: + input_dir=DATADIR / "100m" / "restore", + output_dir=DATADIR / "habitat_layers" / "restore", + pixel_scale=config["pixel_scale"], + threads: workflow.cores + log: + DATADIR / "logs" / "warp_restore.log", + shell: + """ + mkdir -p {params.output_dir} + for d in {params.input_dir}/*.tif; do + basename=$(basename "$d") + gdalwarp \ + -t_srs EPSG:4326 \ + -tr {params.pixel_scale} -{params.pixel_scale} \ + -r average \ + -multi \ + -co COMPRESS=LZW \ + -co NUM_THREADS={threads} \ + -wo NUM_THREADS={threads} \ + "$d" \ + {params.output_dir}/"$basename" \ + 2>&1 + done 2>&1 | tee {log} + touch {output.sentinel} + """ + + +rule diff_map_restore: + """ + Generate the area difference map between current and restore habitat layers. + """ + input: + current_sentinel=DATADIR / "habitat_layers" / "current" / ".sentinel", + restore_sentinel=DATADIR / "habitat_layers" / "restore" / ".sentinel", + output: + DATADIR / "habitat" / "restore_diff_area.tif", + params: + current_dir=DATADIR / "habitat_layers" / "current", + restore_dir=DATADIR / "habitat_layers" / "restore", + threads: workflow.cores + log: + DATADIR / "logs" / "diff_map_restore.log", + shell: + """ + mkdir -p $(dirname {output}) + python3 {SRCDIR}/prepare_layers/make_diff_map.py \ + --current {params.current_dir} \ + --scenario {params.restore_dir} \ + --output {output} \ + -j {threads} \ + 2>&1 | tee {log} + """ diff --git a/workflow/rules/species.smk b/workflow/rules/species.smk new file mode 100644 index 0000000..1afaa91 --- /dev/null +++ b/workflow/rules/species.smk @@ -0,0 +1,53 @@ +# LIFE Pipeline - Species Data Extraction Rules +# ============================================== +# +# Extracts species data from the IUCN PostgreSQL database. +# Creates both current/ (presence codes 1,2) and historic/ (presence codes 1,2,4,5) +# subdirectories, each with per-species GeoJSON files and a report.csv. +# +# Environment variables required: +# DB_HOST, DB_NAME, DB_USER, DB_PASSWORD + + +# ============================================================================= +# Species Data Extraction (Checkpoint) +# ============================================================================= + + +checkpoint extract_species_data: + """ + Extract species data from PostgreSQL database. + + This is a checkpoint because the number of output GeoJSON files is only + known after extraction. Each taxa produces N species files in both current/ + and historic/ subdirectories. + + Environment variables required: + DB_HOST, DB_NAME, DB_USER, DB_PASSWORD + """ + output: + current_report=DATADIR / "species-info" / "{taxa}" / "current" / "report.csv", + historic_report=DATADIR / "species-info" / "{taxa}" / "historic" / "report.csv", + params: + classname="{taxa}", + output_dir=lambda wildcards: DATADIR / "species-info" / wildcards.taxa, + projection=config["projection"], + overrides=lambda wildcards: DATADIR / config["optional_inputs"]["species_overrides"], + resources: + db_connections=1, + threads: workflow.cores + log: + DATADIR / "logs" / "extract_species_{taxa}.log", + shell: + """ + OVERRIDES_ARG="" + if [ -f "{params.overrides}" ]; then + OVERRIDES_ARG="--overrides {params.overrides}" + fi + python3 {SRCDIR}/prepare_species/extract_species_psql.py \ + --class {wildcards.taxa} \ + --output {params.output_dir} \ + --projection "{params.projection}" \ + $OVERRIDES_ARG \ + 2>&1 | tee {log} + """ diff --git a/workflow/rules/validation.smk b/workflow/rules/validation.smk new file mode 100644 index 0000000..bc5ea90 --- /dev/null +++ b/workflow/rules/validation.smk @@ -0,0 +1,157 @@ +# LIFE Pipeline - Validation Rules +# ================================= +# +# Validates AOH models for the "current" scenario only: +# +# 1. Model validation: Statistical analysis (requires R) +# 2. GBIF occurrence validation: Expensive, explicit-only + +# ============================================================================= +# Model Validation +# ============================================================================= + + +rule model_validation: + """ + Perform statistical validation of AOH models (Dahal et al. methodology). + Runs on the "current" scenario AOHs only. + """ + input: + collated=DATADIR / "aohs" / "current.csv", + version_sentinel=DATADIR / ".sentinels" / "aoh_version.txt", + output: + validation=DATADIR / "validation" / "model_validation.csv", + log: + DATADIR / "logs" / "model_validation.log", + shell: + """ + mkdir -p $(dirname {output.validation}) + aoh-validate-prevalence \ + --collated_aoh_data {input.collated} \ + --output {output.validation} \ + 2>&1 | tee {log} + """ + + +# ============================================================================= +# Species Richness and Endemism (current scenario) +# ============================================================================= + + +rule species_richness: + """ + Calculate species richness from current AOH rasters. + NOT included in 'all' — use the 'summaries' target explicitly. + """ + input: + aoh_sentinel=DATADIR / "aohs" / "current.csv", + version_sentinel=DATADIR / ".sentinels" / "aoh_version.txt", + output: + richness=DATADIR / "summaries" / "species_richness.tif", + params: + aohs_folder=DATADIR / "aohs" / "current", + threads: workflow.cores + log: + DATADIR / "logs" / "species_richness.log", + shell: + """ + mkdir -p $(dirname {output.richness}) + aoh-species-richness \ + --aohs_folder {params.aohs_folder} \ + --output {output.richness} \ + 2>&1 | tee {log} + """ + + +rule endemism: + """ + Calculate endemism from current AOH rasters. + NOT included in 'all' — use the 'summaries' target explicitly. + """ + input: + aoh_sentinel=DATADIR / "aohs" / "current.csv", + species_richness=DATADIR / "summaries" / "species_richness.tif", + version_sentinel=DATADIR / ".sentinels" / "aoh_version.txt", + output: + endemism=DATADIR / "summaries" / "endemism.tif", + params: + aohs_folder=DATADIR / "aohs" / "current", + threads: workflow.cores + log: + DATADIR / "logs" / "endemism.log", + shell: + """ + aoh-endemism \ + --aohs_folder {params.aohs_folder} \ + --species_richness {input.species_richness} \ + --output {output.endemism} \ + 2>&1 | tee {log} + """ + + +# ============================================================================= +# Occurrence Validation (EXPENSIVE!) +# ============================================================================= + + +rule fetch_gbif_data: + """ + Fetch GBIF occurrence data for a taxa. + Expensive (hours) — only runs if output doesn't exist. + + Environment variables required: + GBIF_USERNAME, GBIF_EMAIL, GBIF_PASSWORD + """ + input: + collated=ancient(DATADIR / "aohs" / "current.csv"), + output: + sentinel=DATADIR / "validation" / "occurrences" / ".{taxa}_fetched", + params: + output_dir=DATADIR / "validation" / "occurrences", + log: + DATADIR / "logs" / "fetch_gbif_{taxa}.log", + shell: + """ + mkdir -p {params.output_dir} + aoh-fetch-gbif-data \ + --collated_aoh_data {input.collated} \ + --taxa {wildcards.taxa} \ + --output_dir {params.output_dir} \ + 2>&1 | tee {log} + touch {output.sentinel} + """ + + +rule validate_gbif_occurrences: + """ + Validate current AOH models against GBIF occurrence data. + """ + input: + gbif_sentinel=DATADIR / "validation" / "occurrences" / ".{taxa}_fetched", + aoh_sentinel=ancient(DATADIR / "aohs" / "current" / "{taxa}" / ".complete"), + output: + validation=DATADIR / "validation" / "occurrences" / "{taxa}.csv", + params: + gbif_data=lambda wildcards: DATADIR / "validation" / "occurrences" / wildcards.taxa, + species_data=lambda wildcards: DATADIR / "species-info" / wildcards.taxa / "current", + aoh_results=lambda wildcards: DATADIR / "aohs" / "current" / wildcards.taxa, + log: + DATADIR / "logs" / "validate_gbif_{taxa}.log", + shell: + """ + aoh-validate-occurrences \ + --gbif_data_path {params.gbif_data} \ + --species_data {params.species_data} \ + --aoh_results {params.aoh_results} \ + --output {output.validation} \ + 2>&1 | tee {log} + """ + + +rule occurrence_validation: + """ + Target rule for GBIF validation for all taxa. + WARNING: Expensive. Only run explicitly: snakemake occurrence_validation + """ + input: + expand(str(DATADIR / "validation" / "occurrences" / "{taxa}.csv"), taxa=TAXA), From 10ee8448e04a9faa6db3f6f8ee0d285460f13662 Mon Sep 17 00:00:00 2001 From: Michael Dales Date: Fri, 17 Apr 2026 09:47:37 +0100 Subject: [PATCH 15/27] Lock down aoh and yirgacheffe dependancies --- requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/requirements.txt b/requirements.txt index eed4e1c..1d91e7a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,8 +14,8 @@ rasterio requests alive-progress matplotlib -yirgacheffe>=1.12.7 -aoh>=2.0.5 +yirgacheffe>=1.12.7,<2.0 +aoh>=2.1.2,<3.0 gdal[numpy] git+https://github.com/quantifyearth/iucn_modlib.git git+https://github.com/quantifyearth/seasonality @@ -24,7 +24,7 @@ git+https://github.com/quantifyearth/seasonality # Snakemake workflow management snakemake>=8.0 # We require a bug fix on this package that isn't yet upstream -snakemake-argparse-bridge @ git+https://github.com/mdales/snakemake-argparse-bridge@77ac272d0bc22c370aeb9d1670882fd9dac9b098 +snakemake-argparse-bridge # developer requirements pytest From 8fc5f41f69d8837ceabe7c370cce5ed984fc5272 Mon Sep 17 00:00:00 2001 From: Michael Dales Date: Fri, 24 Apr 2026 10:04:40 +0100 Subject: [PATCH 16/27] First end to end with snakemake --- .github/workflows/python-package.yml | 17 ++-- deltap/delta_p_scaled.py | 7 ++ deltap/global_code_residents_pixel.py | 95 ++++++++++++++------- prepare_layers/make_current_map.py | 30 ++++--- prepare_layers/make_food_current_map.py | 21 ++++- prepare_species/common.py | 2 +- requirements.txt | 2 +- utils/raster_sum.py | 2 +- utils/species_totals.py | 5 ++ workflow/Snakefile | 7 +- workflow/rules/aoh.smk | 4 +- workflow/rules/delta_p.smk | 105 ++++++------------------ workflow/rules/habitat.smk | 55 +++++++------ workflow/rules/prepare.smk | 2 +- workflow/rules/scenario.smk | 4 +- 15 files changed, 193 insertions(+), 165 deletions(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 21e53da..2877a6d 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -12,7 +12,7 @@ on: jobs: build: runs-on: ubuntu-latest - container: ghcr.io/osgeo/gdal:ubuntu-small-3.11.0 + container: ghcr.io/osgeo/gdal:ubuntu-small-3.12.2 strategy: fail-fast: false matrix: @@ -23,28 +23,33 @@ jobs: run: | apt-get update -qqy apt-get install -y git python3-pip libpq5 libpq-dev shellcheck + - uses: actions/checkout@v4 with: submodules: 'true' + - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v3 with: python-version: ${{ matrix.python-version }} + - name: Install dependencies run: | python -m pip install --upgrade pip - python -m pip install gdal[numpy]==3.11.0 + python -m pip install gdal[numpy]==3.12.2 python -m pip install -r requirements.txt + - name: Lint with pylint run: | python3 -m pylint deltap prepare_layers prepare_species usecases utils local + - name: Type checking run: | python3 -m mypy deltap prepare_layers prepare_species usecases utils local + - name: Tests run: | python3 -m pytest ./tests - - name: Script checks - run: | - shellcheck ./scripts/run.sh - shellcheck ./scripts/generate_food_map.sh + + - name: Snakemake format check + run: snakefmt --check workflow/ diff --git a/deltap/delta_p_scaled.py b/deltap/delta_p_scaled.py index bf981ba..3925e93 100644 --- a/deltap/delta_p_scaled.py +++ b/deltap/delta_p_scaled.py @@ -5,6 +5,7 @@ import pandas as pd import yirgacheffe as yg +from snakemake_argparse_bridge import snakemake_compatible # type: ignore SCALE = 1e6 @@ -56,6 +57,12 @@ def delta_p_scaled_area( yg.to_geotiff(output_path, bands, labels, nodata=float('nan')) +@snakemake_compatible(mapping={ + "input_path": "params.input_dir", + "diff_area_map_path": "input.diffmap", + "totals_path": "input.totals", + "output_path": "output.final", +}) def main() -> None: parser = argparse.ArgumentParser(description="Scale final results for publication.") parser.add_argument( diff --git a/deltap/global_code_residents_pixel.py b/deltap/global_code_residents_pixel.py index 5d5f5da..bbae6f6 100644 --- a/deltap/global_code_residents_pixel.py +++ b/deltap/global_code_residents_pixel.py @@ -5,7 +5,10 @@ import sys from pathlib import Path -import yirgacheffe as yg +from snakemake_argparse_bridge import snakemake_compatible # type: ignore + +os.environ['YIRGACHEFFE_BACKEND'] = 'NUMPY' +import yirgacheffe as yg # pylint: disable=C0413 # This isn't a hard requirement, but in practice most experiments use 0.25, and the original # paper used the other three values for comparison. Other values are valid, but to save wasted @@ -69,23 +72,29 @@ def process_delta_p( return new_p def global_code_residents_pixel_ae( - taxid: int, + taxid: str, season: str, current_aohs_path: Path, scenario_aohs_path: Path, historic_aohs_path: Path, exponent: str | float, - output_folder: Path, + output_path: Path, ) -> None: - os.makedirs(output_folder, exist_ok=True) + os.makedirs(output_path.parent, exist_ok=True) + + # snakemake demands we write a file to show we've done something, even if there + # is no tiff generated + sentinel_path = output_path.parent / f".{taxid}_{season}.done" match season: case "RESIDENT": - filename = f"{taxid}_{season}.tif" + filename = f"aoh_{taxid}_{season}.tif" try: current, current_aoh = open_layer(current_aohs_path / filename) except FileNotFoundError: - sys.exit(f"Failed to open current layer {current_aohs_path / filename}") + print(f"Failed to open current layer {current_aohs_path / filename}", file=sys.stderr) + sentinel_path.touch() + return try: scenario: yg.YirgacheffeLayer | float @@ -97,10 +106,14 @@ def global_code_residents_pixel_ae( try: _, historic_aoh = open_layer(historic_aohs_path / filename) except FileNotFoundError: - sys.exit(f"Failed to open historic layer {historic_aohs_path / filename}") + print(f"Failed to open historic layer {historic_aohs_path / filename}", file=sys.stderr) + sentinel_path.touch() + return if historic_aoh == 0.0: - sys.exit(f"Historic AoH for {taxid} is zero, aborting") + print(f"Historic AoH for {taxid} is zero, skipping", file=sys.stderr) + sentinel_path.touch() + return old_persistence = calc_persistence_value(current_aoh, historic_aoh, exponent) @@ -118,27 +131,38 @@ def global_code_residents_pixel_ae( delta_p = new_p_layer - old_persistence try: - delta_p.to_geotiff(output_folder / filename) + delta_p.to_geotiff(output_path) except ValueError: - sys.exit(f"Failed to align layers for {taxid}_{season}") + print(f"Failed to align layers for {taxid}_{season}", file=sys.stderr) + sentinel_path.touch() + return + sentinel_path.touch() case "NONBREEDING": - nonbreeding_filename = f"{taxid}_NONBREEDING.tif" - breeding_filename = f"{taxid}_BREEDING.tif" + nonbreeding_filename = f"aoh_{taxid}_NONBREEDING.tif" + breeding_filename = f"aoh_{taxid}_BREEDING.tif" try: _, historic_aoh_breeding = open_layer(historic_aohs_path / breeding_filename) if historic_aoh_breeding == 0.0: - sys.exit(f"Historic AoH breeding for {taxid} is zero, aborting") + print(f"Historic AoH breeding for {taxid} is zero, skipping", file=sys.stderr) + sentinel_path.touch() + return except FileNotFoundError: - sys.exit(f"Historic AoH for breeding {taxid} not found, aborting") + print(f"Historic AoH for breeding {taxid} not found, skipping", file=sys.stderr) + sentinel_path.touch() + return try: _, historic_aoh_non_breeding = open_layer(historic_aohs_path / nonbreeding_filename) if historic_aoh_non_breeding == 0.0: - sys.exit(f"Historic AoH for non breeding {taxid} is zero, aborting") + print(f"Historic AoH for non breeding {taxid} is zero, skipping", file=sys.stderr) + sentinel_path.touch() + return except FileNotFoundError: - sys.exit(f"Historic AoH for non breeding {taxid} not found, aborting") + print(f"Historic AoH for non breeding {taxid} not found, skipping", file=sys.stderr) + sentinel_path.touch() + return if scenario_aohs_path.name != "nan": non_breeding_scenario_path = scenario_aohs_path / nonbreeding_filename @@ -150,11 +174,16 @@ def global_code_residents_pixel_ae( try: current_breeding, current_aoh_breeding = open_layer(current_aohs_path / breeding_filename) except FileNotFoundError: - sys.exit(f"Failed to open current breeding {current_aohs_path / breeding_filename}") + print(f"Failed to open current breeding {current_aohs_path / breeding_filename}", file=sys.stderr) + sentinel_path.touch() + return try: current_non_breeding, current_aoh_non_breeding = open_layer(current_aohs_path / nonbreeding_filename) except FileNotFoundError: - sys.exit(f"Failed to open current non breeding {current_aohs_path / nonbreeding_filename}") + print(f"Failed to open current non breeding {current_aohs_path / nonbreeding_filename}", + file=sys.stderr) + sentinel_path.touch() + return try: scenario_breeding: yg.YirgacheffeLayer | float scenario_breeding, _ = open_layer(breeding_scenario_path) @@ -182,9 +211,8 @@ def global_code_residents_pixel_ae( old_persistence = (persistence_breeding ** 0.5) * (persistence_non_breeding ** 0.5) - # # In general Yirgacheffe can infer the behaviour needed for area intersections based on - # operator, but in this instance we want to force the caclulation to take place for the + # operator, but in this instance we want to force the calculation to take place for the # union of the areas involved. src_layers = [current_breeding, scenario_breeding, current_non_breeding, scenario_non_breeding] layers = [x for x in src_layers if isinstance(x, yg.YirgacheffeLayer)] @@ -192,8 +220,6 @@ def global_code_residents_pixel_ae( for layer in layers: layer.set_window_for_union(union) - - new_p_breeding = process_delta_p( current_breeding, scenario_breeding, @@ -213,14 +239,18 @@ def global_code_residents_pixel_ae( delta_p_layer = new_p_layer - old_persistence try: - delta_p_layer.to_geotiff(output_folder / nonbreeding_filename) + delta_p_layer.to_geotiff(output_path) except ValueError: - delta_p_layer.pretty_print() - sys.exit(f"Failed to align layers for {taxid}_{season}") + print(f"Failed to align layers for {taxid}_{season}", file=sys.stderr) + sentinel_path.touch() + return + sentinel_path.touch() case "BREEDING": - pass # covered by the nonbreeding case + # covered by the nonbreeding case + sentinel_path.touch() case _: + sentinel_path.touch() sys.exit(f"Unexpected season for species {taxid}: {season}") def exponent_type(value: str): @@ -234,11 +264,20 @@ def exponent_type(value: str): raise argparse.ArgumentTypeError(f"numeric exponent must be one of {sorted(FLOAT_EXPONENTS)}, got {f}") return f +@snakemake_compatible(mapping={ + "taxid": "params.taxon_id", + "season": "params.season", + "current_path": "params.current_path", + "historic_path": "params.pnv_path", + "scenario_path": "params.scenario_path", + "output_path": "params.output_tif", + "exponent": "params.curve", +}) def main() -> None: parser = argparse.ArgumentParser() parser.add_argument( '--taxid', - type=int, + type=str, required=True, dest='taxid', help="Species taxon id", @@ -275,7 +314,7 @@ def main() -> None: type=Path, required=True, dest="output_path", - help="path to save output csv" + help="path to save output tif" ) parser.add_argument( '--z', diff --git a/prepare_layers/make_current_map.py b/prepare_layers/make_current_map.py index a650f61..7be9547 100644 --- a/prepare_layers/make_current_map.py +++ b/prepare_layers/make_current_map.py @@ -1,11 +1,11 @@ import argparse import itertools +import logging import math import os from contextlib import nullcontext from pathlib import Path from multiprocessing import set_start_method -from typing import Dict, List, Optional import pandas as pd import yirgacheffe as yg @@ -15,6 +15,10 @@ from osgeo import gdal # type: ignore gdal.SetCacheMax(1 * 1024 * 1024 * 1024) +logger = logging.getLogger(__name__) +logging.basicConfig() +logger.setLevel(logging.INFO) + # From Eyres et al: The current layer maps IUCN level 1 and 2 habitats, but habitats in the PNV layer are mapped # only at IUCN level 1, so to estimate species’ proportion of original AOH now remaining we could only use natural # habitats mapped at level 1 and artificial habitats at level 2. @@ -22,9 +26,9 @@ "14", "14.1", "14.2", "14.3", "14.4", "14.5", "14.6" ] -def load_crosswalk_table(table_file_name: Path) -> Dict[str,List[int]]: +def load_crosswalk_table(table_file_name: Path) -> dict[str,list[int]]: rawdata = pd.read_csv(table_file_name) - result: Dict[str,List[int]] = {} + result: dict[str,list[int]] = {} for _, row in rawdata.iterrows(): try: result[row.code].append(int(row.value)) @@ -34,15 +38,18 @@ def load_crosswalk_table(table_file_name: Path) -> Dict[str,List[int]]: def make_current_maps( jung_path: Path, - update_masks_path: Optional[Path], + update_masks_path: Path | None, crosswalk_path: Path, output_dir_path: Path, - parallelism: Optional[int], + parallelism: int | None, show_progress: bool, sentinel_path: Path | None, ) -> None: os.makedirs(output_dir_path, exist_ok=True) - print(f"Using {parallelism} workers") + if parallelism: + logger.info("Using %d workers", parallelism) + else: + logger.info("No parallelism specified") if update_masks_path is not None: update_masks = [ @@ -58,7 +65,7 @@ def make_current_maps( updated_jung = jung for update in update_masks: - updated_jung = yg.where(update != 0, update, updated_jung) + updated_jung = yg.where(update.isnan(), updated_jung, update) current_map = yg.where( updated_jung.isin(map_preserve_code), @@ -66,14 +73,15 @@ def make_current_maps( (yg.floor(updated_jung / 100) * 100), ) - print("Calculating unique land cover types...") + logger.info("Calculating unique land cover types...") vals = current_map.unique() + logger.info("Found %s land cover classes", set(vals)) for lcc in vals: # Seems there are some NaN values in Jung if math.isnan(lcc): continue - print(f"Processing {lcc}...") + logger.info("Processing %s...", lcc) per_class = current_map == lcc cast_per_class = per_class.astype(yg.DataType.Float32) ctx = alive_bar(manual=True, title=str(lcc)) if show_progress else nullcontext() @@ -95,7 +103,7 @@ def make_current_maps( "jung_path": "input.habitat", "update_masks_path": "params.updates_dir", "crosswalk_path": "input.crosswalk", - "concurrency": "threads", + "parallelism": "threads", "output_dir_path": "params.output_dir", "sentinel_path": "output.sentinel", }) @@ -134,7 +142,7 @@ def main() -> None: parser.add_argument( '-j', type=int, - help='Number of concurrent threads to use for calculation.', + help='Number of parallel threads to use for calculation.', required=False, default=None, dest='parallelism', diff --git a/prepare_layers/make_food_current_map.py b/prepare_layers/make_food_current_map.py index 8036c06..85d709d 100644 --- a/prepare_layers/make_food_current_map.py +++ b/prepare_layers/make_food_current_map.py @@ -337,6 +337,7 @@ def make_food_current_map( pasture_adjustment_path: Path, output_path: Path, processes_count: int, + sentinel_path: Path | None, ) -> None: # We'll use a lot of processes which will talk back to the main process, so # we need to adjust the ulimit, which is quite low by default @@ -395,13 +396,18 @@ def make_food_current_map( processes.remove(candidate) time.sleep(0.1) + if sentinel_path: + sentinel_path.touch() + @snakemake_compatible(mapping={ - "current_lvl1_path": "input.jung", + "current_lvl1_path": "params.jung_dir", "pnv_path": "input.pnv", "crop_adjustment_path": "input.crop", "pasture_adjustment_path": "input.pasture", "processes_count": "threads", - "output_path": "output.rasters", + "output_path": "params.output_dir", + "sentinel_path": "output.sentinel", + "parallelism": "threads", }) def main() -> None: parser = argparse.ArgumentParser(description="Build the food current map") @@ -440,12 +446,18 @@ def main() -> None: required=True, dest='output_path', ) + parser.add_argument( + '--sentinel', + type=Path, + required=False, + dest='sentinel_path', + ) parser.add_argument( "-j", type=int, required=False, default=cpu_count() // 2, - dest="processes_count", + dest="parallelism", help="Number of concurrent threads to use." ) args = parser.parse_args() @@ -456,7 +468,8 @@ def main() -> None: args.crop_adjustment_path, args.pasture_adjustment_path, args.output_path, - args.processes_count, + args.parallelism, + args.sentinel_path, ) if __name__ == "__main__": diff --git a/prepare_species/common.py b/prepare_species/common.py index 6869d77..48a28f0 100644 --- a/prepare_species/common.py +++ b/prepare_species/common.py @@ -212,7 +212,7 @@ def tidy_reproject_save( graw = gdf.loc[0].copy() grow = aoh.tidy_data(graw) # type: ignore - output_path = output_directory_path / f"{grow.id_no}_{grow.season}.geojson" + output_path = output_directory_path / f"range_T{grow.id_no}A{grow.assessment_id}_{grow.season}.geojson" res = gpd.GeoDataFrame(grow.to_frame().transpose(), crs=src_crs, geometry="geometry") res_projected = res.to_crs(target_crs) report.filename = output_path diff --git a/requirements.txt b/requirements.txt index 1d91e7a..20c3fc9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,7 +16,7 @@ alive-progress matplotlib yirgacheffe>=1.12.7,<2.0 aoh>=2.1.2,<3.0 -gdal[numpy] +gdal[numpy]==3.12.2 git+https://github.com/quantifyearth/iucn_modlib.git git+https://github.com/quantifyearth/seasonality diff --git a/utils/raster_sum.py b/utils/raster_sum.py index 2b57ef6..c52469b 100644 --- a/utils/raster_sum.py +++ b/utils/raster_sum.py @@ -21,7 +21,7 @@ def raster_sum( total.to_geotiff(output_filename, callback=bar, parallelism=True) @snakemake_compatible(mapping={ - "rasters_directory": "input.rasters", + "rasters_directory": "params.rasters_dir", "output_filename": "output[0]", }) def main() -> None: diff --git a/utils/species_totals.py b/utils/species_totals.py index dfac10b..351b8f5 100644 --- a/utils/species_totals.py +++ b/utils/species_totals.py @@ -3,6 +3,7 @@ from typing import Dict import pandas as pd +from snakemake_argparse_bridge import snakemake_compatible # type: ignore TAXA = ["AMPHIBIA", "AVES", "MAMMALIA", "REPTILIA"] @@ -20,6 +21,10 @@ def species_totals( df.loc[-1] = ["all", df["count"].sum()] df.to_csv(output_path, index=False) +@snakemake_compatible(mapping={ + "deltaps_path": "params.deltaps_dir", + "output_filename": "output.totals", +}) def main() -> None: parser = argparse.ArgumentParser() parser.add_argument( diff --git a/workflow/Snakefile b/workflow/Snakefile index 0780aef..6466d94 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -70,7 +70,7 @@ def get_species_ids_for_taxa_scenario(wildcards): else: report_path = Path(str(checkpoint_output.historic_report)) geojson_dir = report_path.parent - return [p.stem for p in sorted(geojson_dir.glob("*.geojson"))] + return [p.stem[6:] for p in sorted(geojson_dir.glob("*.geojson"))] def get_all_aoh_metadata_for_taxa_scenario(wildcards): @@ -92,8 +92,9 @@ def get_species_ids_for_delta_p(wildcards): def get_delta_p_sentinels_for_taxa_scenario(wildcards): - """Return all delta P sentinel files for a taxa and scenario.""" - species_ids = get_species_ids_for_delta_p(wildcards) + # Whatever logic you had in get_delta_p_sentinels_for_taxa_scenario, + # but returning .tif paths instead of sentinel paths + species_ids = get_species_ids_for_taxa_scenario(wildcards) return [ DATADIR / "deltap" / wildcards.scenario / CURVE / wildcards.taxa / f".{sid}.done" for sid in species_ids diff --git a/workflow/rules/aoh.smk b/workflow/rules/aoh.smk index 6e60a40..489fb0b 100644 --- a/workflow/rules/aoh.smk +++ b/workflow/rules/aoh.smk @@ -55,7 +55,7 @@ def aoh_species_inputs(wildcards): / "species-info" / wildcards.taxa / era - / f"{wildcards.species_id}.geojson", + / f"range_{wildcards.species_id}.geojson", "habitat_sentinel": ancient( DATADIR / "habitat_layers" / wildcards.scenario / ".sentinel" ), @@ -84,7 +84,7 @@ rule generate_aoh: habitat_dir=lambda wildcards: DATADIR / "habitat_layers" / wildcards.scenario, output_dir=lambda wildcards: DATADIR / "aohs" / wildcards.scenario / wildcards.taxa, log: - DATADIR / "logs" / "aoh" / "{scenario}" / "{taxa}" / "{species_id}.log", + DATADIR / "logs" / "aoh" / "{scenario}" / "{taxa}" / "aoh_{species_id}.log", resources: aoh_slots=1, shell: diff --git a/workflow/rules/delta_p.smk b/workflow/rules/delta_p.smk index cae4964..c5a583b 100644 --- a/workflow/rules/delta_p.smk +++ b/workflow/rules/delta_p.smk @@ -14,7 +14,7 @@ import os from pathlib import Path - +from types import SimpleNamespace # ============================================================================= # Per-Species Delta P Calculation @@ -26,11 +26,8 @@ rule calculate_delta_p: Calculate the change in probability of persistence for a single species under a given scenario. - Uses current, scenario, and PNV (historic) AOH rasters. - RESIDENT species produce one TIF; NONBREEDING species produce one TIF - (which covers the migratory pair); BREEDING species produce no output. - - A sentinel is always created to track completion regardless of season. + species_id wildcard is of the form T{taxon_id}A{assessment_id}_{SEASON}, + e.g. T22685505A261477056_RESIDENT. """ input: current_sentinel=DATADIR / "aohs" / "current" / "{taxa}" / ".complete", @@ -42,82 +39,48 @@ rule calculate_delta_p: current_path=lambda wildcards: DATADIR / "aohs" / "current" / wildcards.taxa, scenario_path=lambda wildcards: DATADIR / "aohs" / wildcards.scenario / wildcards.taxa, pnv_path=lambda wildcards: DATADIR / "aohs" / "pnv" / wildcards.taxa, - output_path=lambda wildcards: DATADIR / "deltap" / wildcards.scenario / CURVE / wildcards.taxa, + taxon_id=lambda wildcards: wildcards.species_id.rsplit("_", 1)[0], + season=lambda wildcards: wildcards.species_id.rsplit("_", 1)[1], curve=CURVE, + output_tif=lambda wildcards: DATADIR / "deltap" / wildcards.scenario / CURVE / wildcards.taxa / f"deltap_{wildcards.species_id}.tif", log: DATADIR / "logs" / "deltap" / "{scenario}" / "{taxa}" / "{species_id}.log", - shell: - """ - mkdir -p $(dirname {log}) - mkdir -p {params.output_path} - taxid=$(echo "{wildcards.species_id}" | cut -d_ -f1) - season=$(echo "{wildcards.species_id}" | cut -d_ -f2) - python3 {SRCDIR}/deltap/global_code_residents_pixel.py \ - --taxid "$taxid" \ - --season "$season" \ - --current_path {params.current_path} \ - --scenario_path {params.scenario_path} \ - --historic_path {params.pnv_path} \ - --output_path {params.output_path} \ - --z {params.curve} \ - 2>&1 | tee {log} - touch {output.sentinel} - """ - - -# ============================================================================= -# Per-Taxa Delta P Aggregation -# ============================================================================= - - -rule aggregate_delta_p_per_taxa: - """ - Wait for all per-species delta P calculations for a taxa to complete. - Creates a sentinel that downstream rules depend on. - """ - input: - sentinels=get_delta_p_sentinels_for_taxa_scenario, - output: - sentinel=DATADIR / "deltap" / "{scenario}" / CURVE / "{taxa}" / ".complete", - shell: - """ - touch {output.sentinel} - """ + script: + str(SRCDIR / "deltap" / "global_code_residents_pixel.py") # ============================================================================= # Per-Taxa Raster Sum # ============================================================================= - rule raster_sum_per_taxa: """ Sum all per-species delta P rasters for a taxa into a single raster. + Implicitly waits for all calculate_delta_p jobs via direct tif dependencies. """ input: - sentinel=DATADIR / "deltap" / "{scenario}" / CURVE / "{taxa}" / ".complete", + rasters=get_delta_p_sentinels_for_taxa_scenario, output: - DATADIR / "deltap_sum" / "{scenario}" / CURVE / "{taxa}.tif", + tif=DATADIR / "deltap_sum" / "{scenario}" / CURVE / "{taxa}.tif", params: rasters_dir=lambda wildcards: DATADIR / "deltap" / wildcards.scenario / CURVE / wildcards.taxa, + curve=CURVE, threads: workflow.cores log: DATADIR / "logs" / "raster_sum" / "{scenario}" / "{taxa}.log", - shell: - """ - mkdir -p $(dirname {output}) - mkdir -p $(dirname {log}) - python3 {SRCDIR}/utils/raster_sum.py \ - --rasters_directory {params.rasters_dir} \ - --output {output} \ - 2>&1 | tee {log} - """ - + script: + str(SRCDIR / "utils" / "raster_sum.py") # ============================================================================= # Species Totals # ============================================================================= +def get_all_delta_p_tifs_for_scenario(wildcards): + tifs = [] + for taxa in TAXA: + mock = SimpleNamespace(taxa=taxa, scenario=wildcards.scenario) + tifs.extend(get_delta_p_sentinels_for_taxa_scenario(mock)) + return tifs rule species_totals: """ @@ -125,23 +88,15 @@ rule species_totals: Used by delta_p_scaled for normalisation. """ input: - sentinels=expand( - str(DATADIR / "deltap" / "{{scenario}}" / CURVE / "{taxa}" / ".complete"), - taxa=TAXA, - ), + rasters=get_all_delta_p_tifs_for_scenario, output: totals=DATADIR / "deltap" / "{scenario}" / CURVE / "totals.csv", params: deltaps_dir=lambda wildcards: DATADIR / "deltap" / wildcards.scenario / CURVE, log: DATADIR / "logs" / "species_totals_{scenario}.log", - shell: - """ - python3 {SRCDIR}/utils/species_totals.py \ - --deltaps {params.deltaps_dir} \ - --output {output.totals} \ - 2>&1 | tee {log} - """ + script: + str(SRCDIR / "utils" / "species_totals.py") # ============================================================================= @@ -164,18 +119,10 @@ rule delta_p_scaled: diffmap=DATADIR / "habitat" / "{scenario}_diff_area.tif", totals=DATADIR / "deltap" / "{scenario}" / CURVE / "totals.csv", output: - DATADIR / "deltap_final" / f"scaled_{{scenario}}_{CURVE}.tif", + final=DATADIR / "deltap_final" / f"scaled_{{scenario}}_{CURVE}.tif", params: input_dir=lambda wildcards: DATADIR / "deltap_sum" / wildcards.scenario / CURVE, log: DATADIR / "logs" / "delta_p_scaled_{scenario}.log", - shell: - """ - mkdir -p $(dirname {output}) - python3 {SRCDIR}/deltap/delta_p_scaled.py \ - --input {params.input_dir} \ - --diffmap {input.diffmap} \ - --totals {input.totals} \ - --output {output} \ - 2>&1 | tee {log} - """ + script: + str(SRCDIR / "deltap" / "delta_p_scaled.py") diff --git a/workflow/rules/habitat.smk b/workflow/rules/habitat.smk index d4313f6..88df282 100644 --- a/workflow/rules/habitat.smk +++ b/workflow/rules/habitat.smk @@ -47,7 +47,7 @@ rule gaez_expand: Decompress the GAEZ download. """ output: - gaez_raster=DATADIR / "food" / "GLCSv11_02_5m.tif" + raster=DATADIR / "food" / "GLCSv11_02_5m.tif" params: filepath="LR/lco/GLCSv11_02_5m.tif" input: @@ -160,20 +160,16 @@ rule combine_gaez_hyde: Combine the GAEZ and Hyde data, adjusting for overflow in cells. """ input: - hyde_projection_file=DATADIR / "food" / "modied_grazing2017AD.prj", - hyde_raster=DATADIR / "food" / "modified_ifgrazing2017AD.asc", + hyde_projection_file=DATADIR / "food" / "modified_grazing2017AD.prj", + hyde_raster=DATADIR / "food" / "modified_grazing2017AD.asc", gaez_raster=DATADIR / "food" / "GLCSv11_02_5m.tif", output: crop=DATADIR / "food" / "crop.tif", pasture=DATADIR / "food" / "pasture.tif" params: - updates_dir=DATADIR / "habitat" / "lvl2_changemasks_ver004", - output_dir=DATADIR / "100m" / "jung_current", - output: - sentinel=DATADIR / "100m" / "jung_current" / ".sentinel", - threads: workflow.cores + output_dir=DATADIR / "food" script: - str(SRCDIR / "prepare_layers" / "make_current_map.py") + str(SRCDIR / "prepare_layers" / "build_gaez_hyde.py") # ============================================================================= @@ -189,7 +185,7 @@ rule pnv_100m: input: pnv=DATADIR / "habitat" / "pnv_raw.tif", output: - pnv_100m=DATADIR / "habitat" / "pnv_100m.tif", + pnv_100m=DATADIR / "100m" / "pnv.tif", log: DATADIR / "logs" / "pnv_100m.log", shell: @@ -211,6 +207,24 @@ rule pnv_100m: # Food-enhanced current map at 100m (PRECIOUS) # ============================================================================= +rule current_raws: + """ + Build the LIFE current map, which is Jung with updates applied + and restricted to L1 to match the PNV map restrictions. + """ + input: + updates_sentinel=DATADIR / "habitat" / ".downloaded_updates", + habitat=DATADIR / "100m" / "jung_l2_raw.tif", + crosswalk=DATADIR / "crosswalk.csv", + params: + updates_dir=DATADIR / "habitat" / "lvl2_changemasks_ver004", + output_dir=DATADIR / "100m" / "jung_current", + output: + sentinel=DATADIR / "100m" / "jung_current" / ".sentinel", + threads: workflow.cores + script: + str(SRCDIR / "prepare_layers" / "make_current_map.py") + rule build_food_map: """ Build the food-enhanced current habitat map at 100m resolution by combining @@ -221,7 +235,7 @@ rule build_food_map: """ input: jung=ancient(DATADIR / "100m" / "jung_current" / ".sentinel"), - pnv=ancient(DATADIR / "habitat" / "pnv_100m.tif"), + pnv=ancient(DATADIR / "100m" / "pnv.tif"), crop=ancient(DATADIR / "food" / "crop.tif"), pasture=ancient(DATADIR / "food" / "pasture.tif"), output: @@ -229,24 +243,11 @@ rule build_food_map: params: jung_dir=DATADIR / "100m" / "jung_current", output_dir=DATADIR / "100m" / "current", - pnv=DATADIR / "habitat" / "pnv_100m.tif", - crop=DATADIR / "food" / "crop.tif", - pasture=DATADIR / "food" / "pasture.tif", threads: workflow.cores log: DATADIR / "logs" / "build_food_map.log", - shell: - """ - python3 {SRCDIR}/prepare_layers/make_food_current_map.py \ - --current_lvl1 {params.jung_dir} \ - --pnv {params.pnv} \ - --crop {params.crop} \ - --pasture {params.pasture} \ - --output {params.output_dir} \ - -j {threads} \ - 2>&1 | tee {log} - touch {output.sentinel} - """ + script: + str(SRCDIR / "prepare_layers" / "make_food_current_map.py") # ============================================================================= @@ -310,6 +311,7 @@ rule pnv_processed: params: output_dir=DATADIR / "habitat_layers" / "pnv", pixel_scale=config["pixel_scale"], + projection=config["projection"], log: DATADIR / "logs" / "pnv_processed.log", shell: @@ -318,6 +320,7 @@ rule pnv_processed: aoh-habitat-process \ --habitat {input.pnv} \ --scale {params.pixel_scale} \ + --projection {params.projection} \ --output {params.output_dir} \ 2>&1 | tee {log} touch {output.sentinel} diff --git a/workflow/rules/prepare.smk b/workflow/rules/prepare.smk index ac4b2d1..52db68e 100644 --- a/workflow/rules/prepare.smk +++ b/workflow/rules/prepare.smk @@ -35,7 +35,7 @@ rule jung_habitat_map: Fetch the Jung L2 habitat map from Zenodo. """ output: - habitat=DATADIR / "habitat" / "jung_l2_raw.tif" + habitat=DATADIR / "100m" / "jung_l2_raw.tif" params: zenodo_id=config["zenodo"]["jung_habitat"]["zenodo_id"], filename=config["zenodo"]["jung_habitat"]["filename"], diff --git a/workflow/rules/scenario.smk b/workflow/rules/scenario.smk index 1d0f86e..2a2cb91 100644 --- a/workflow/rules/scenario.smk +++ b/workflow/rules/scenario.smk @@ -40,7 +40,7 @@ rule make_arable_map: python3 {SRCDIR}/prepare_layers/make_arable_map.py \ --current {params.current_dir} \ --output {params.output_dir} \ - -j {threads} \ + -j 2 \ 2>&1 | tee {log} touch {output.sentinel} """ @@ -139,7 +139,7 @@ rule make_restore_map: --current {params.current_dir} \ --crosswalk {input.crosswalk} \ --output {params.output_dir} \ - -j {threads} \ + -j 2 \ 2>&1 | tee {log} touch {output.sentinel} """ From 3430fb5cad2744911c3602fa70b711baed6f2795 Mon Sep 17 00:00:00 2001 From: Michael Dales Date: Fri, 24 Apr 2026 10:11:24 +0100 Subject: [PATCH 17/27] Snakefmt run --- workflow/Snakefile | 15 +++- workflow/rules/aoh.smk | 57 ++++++------- workflow/rules/delta_p.smk | 77 +++++++++++------- workflow/rules/habitat.smk | 145 ++++++++++++++++++---------------- workflow/rules/prepare.smk | 71 +++++++++-------- workflow/rules/scenario.smk | 73 ++++++++--------- workflow/rules/species.smk | 27 ++++--- workflow/rules/validation.smk | 68 ++++++++-------- 8 files changed, 291 insertions(+), 242 deletions(-) diff --git a/workflow/Snakefile b/workflow/Snakefile index 6466d94..b179ea9 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -27,9 +27,11 @@ os.environ["OGR_GEOJSON_MAX_OBJ_SIZE"] = "0" # Source directory (where this repo lives) SRCDIR = Path(workflow.basedir).parent + # Load configuration configfile: SRCDIR / "config/config.yaml" + # Data directory from environment variable DATADIR = Path(os.environ.get("DATADIR", "/data")) @@ -96,7 +98,12 @@ def get_delta_p_sentinels_for_taxa_scenario(wildcards): # but returning .tif paths instead of sentinel paths species_ids = get_species_ids_for_taxa_scenario(wildcards) return [ - DATADIR / "deltap" / wildcards.scenario / CURVE / wildcards.taxa / f".{sid}.done" + DATADIR + / "deltap" + / wildcards.scenario + / CURVE + / wildcards.taxa + / f".{sid}.done" for sid in species_ids ] @@ -139,9 +146,9 @@ rule life: rule summaries: """ - Target: generate species richness and endemism maps. - NOT included in 'all' — run explicitly with: snakemake summaries - """ +Target: generate species richness and endemism maps. +NOT included in 'all' — run explicitly with: snakemake summaries +""" input: DATADIR / "summaries" / "species_richness.tif", DATADIR / "summaries" / "endemism.tif", diff --git a/workflow/rules/aoh.smk b/workflow/rules/aoh.smk index 489fb0b..ce7f99c 100644 --- a/workflow/rules/aoh.smk +++ b/workflow/rules/aoh.smk @@ -12,7 +12,6 @@ import os from pathlib import Path - # ============================================================================= # Version Sentinel for AOH Code # ============================================================================= @@ -20,13 +19,14 @@ from pathlib import Path rule aoh_version_sentinel: """ - Track the aoh package version. AOH rules depend on this to trigger - rebuilds when the package updates. - """ +Track the aoh package version. AOH rules depend on this to trigger +rebuilds when the package updates. +""" output: sentinel=DATADIR / ".sentinels" / "aoh_version.txt", run: import subprocess + os.makedirs(os.path.dirname(output.sentinel), exist_ok=True) try: result = subprocess.run( @@ -52,10 +52,10 @@ def aoh_species_inputs(wildcards): era = "historic" if wildcards.scenario == "pnv" else "current" return { "species_data": DATADIR - / "species-info" - / wildcards.taxa - / era - / f"range_{wildcards.species_id}.geojson", + / "species-info" + / wildcards.taxa + / era + / f"range_{wildcards.species_id}.geojson", "habitat_sentinel": ancient( DATADIR / "habitat_layers" / wildcards.scenario / ".sentinel" ), @@ -68,25 +68,28 @@ def aoh_species_inputs(wildcards): rule generate_aoh: """ - Generate Area of Habitat raster for a single species in a given scenario. +Generate Area of Habitat raster for a single species in a given scenario. - Parallelizable: run with `snakemake --cores N` to process multiple species - concurrently. Uses --force-habitat and --pixel-area flags (no mask). +Parallelizable: run with `snakemake --cores N` to process multiple species +concurrently. Uses --force-habitat and --pixel-area flags (no mask). - For current/scenario AOHs: uses current/ era species data. - For pnv AOHs: uses historic/ era species data. - """ +For current/scenario AOHs: uses current/ era species data. +For pnv AOHs: uses historic/ era species data. +""" input: unpack(aoh_species_inputs), output: metadata=DATADIR / "aohs" / "{scenario}" / "{taxa}" / "aoh_{species_id}.json", - params: - habitat_dir=lambda wildcards: DATADIR / "habitat_layers" / wildcards.scenario, - output_dir=lambda wildcards: DATADIR / "aohs" / wildcards.scenario / wildcards.taxa, log: DATADIR / "logs" / "aoh" / "{scenario}" / "{taxa}" / "aoh_{species_id}.log", resources: aoh_slots=1, + params: + habitat_dir=lambda wildcards: DATADIR / "habitat_layers" / wildcards.scenario, + output_dir=lambda wildcards: DATADIR + / "aohs" + / wildcards.scenario + / wildcards.taxa, shell: """ mkdir -p $(dirname {log}) @@ -111,12 +114,12 @@ rule generate_aoh: checkpoint aggregate_aohs_per_taxa: """ - Checkpoint that ensures all AOHs for a taxa/scenario are generated. - Creates a sentinel file when complete. +Checkpoint that ensures all AOHs for a taxa/scenario are generated. +Creates a sentinel file when complete. - This is a checkpoint so downstream rules (delta P) can re-evaluate the DAG - after AOHs are created. - """ +This is a checkpoint so downstream rules (delta P) can re-evaluate the DAG +after AOHs are created. +""" input: metadata=get_all_aoh_metadata_for_taxa_scenario, version_sentinel=DATADIR / ".sentinels" / "aoh_version.txt", @@ -136,10 +139,10 @@ checkpoint aggregate_aohs_per_taxa: rule collate_aoh_data: """ - Collate metadata from all AOH JSON files for a scenario into a single CSV. +Collate metadata from all AOH JSON files for a scenario into a single CSV. - Used by validation (current scenario) and for downstream analysis. - """ +Used by validation (current scenario) and for downstream analysis. +""" input: sentinels=lambda wildcards: expand( str(DATADIR / "aohs" / "{scenario}" / "{taxa}" / ".complete"), @@ -149,10 +152,10 @@ rule collate_aoh_data: version_sentinel=DATADIR / ".sentinels" / "aoh_version.txt", output: collated=DATADIR / "aohs" / "{scenario}.csv", - params: - aoh_results_dir=lambda wildcards: DATADIR / "aohs" / wildcards.scenario, log: DATADIR / "logs" / "collate_aoh_{scenario}.log", + params: + aoh_results_dir=lambda wildcards: DATADIR / "aohs" / wildcards.scenario, shell: """ mkdir -p $(dirname {output.collated}) diff --git a/workflow/rules/delta_p.smk b/workflow/rules/delta_p.smk index c5a583b..9ee11dc 100644 --- a/workflow/rules/delta_p.smk +++ b/workflow/rules/delta_p.smk @@ -23,28 +23,41 @@ from types import SimpleNamespace rule calculate_delta_p: """ - Calculate the change in probability of persistence for a single species - under a given scenario. +Calculate the change in probability of persistence for a single species +under a given scenario. - species_id wildcard is of the form T{taxon_id}A{assessment_id}_{SEASON}, - e.g. T22685505A261477056_RESIDENT. - """ +species_id wildcard is of the form T{taxon_id}A{assessment_id}_{SEASON}, +e.g. T22685505A261477056_RESIDENT. +""" input: current_sentinel=DATADIR / "aohs" / "current" / "{taxa}" / ".complete", scenario_sentinel=DATADIR / "aohs" / "{scenario}" / "{taxa}" / ".complete", pnv_sentinel=DATADIR / "aohs" / "pnv" / "{taxa}" / ".complete", output: - sentinel=DATADIR / "deltap" / "{scenario}" / CURVE / "{taxa}" / ".{species_id}.done", + sentinel=DATADIR + / "deltap" + / "{scenario}" + / CURVE + / "{taxa}" + / ".{species_id}.done", + log: + DATADIR / "logs" / "deltap" / "{scenario}" / "{taxa}" / "{species_id}.log", params: current_path=lambda wildcards: DATADIR / "aohs" / "current" / wildcards.taxa, - scenario_path=lambda wildcards: DATADIR / "aohs" / wildcards.scenario / wildcards.taxa, + scenario_path=lambda wildcards: DATADIR + / "aohs" + / wildcards.scenario + / wildcards.taxa, pnv_path=lambda wildcards: DATADIR / "aohs" / "pnv" / wildcards.taxa, taxon_id=lambda wildcards: wildcards.species_id.rsplit("_", 1)[0], season=lambda wildcards: wildcards.species_id.rsplit("_", 1)[1], curve=CURVE, - output_tif=lambda wildcards: DATADIR / "deltap" / wildcards.scenario / CURVE / wildcards.taxa / f"deltap_{wildcards.species_id}.tif", - log: - DATADIR / "logs" / "deltap" / "{scenario}" / "{taxa}" / "{species_id}.log", + output_tif=lambda wildcards: DATADIR + / "deltap" + / wildcards.scenario + / CURVE + / wildcards.taxa + / f"deltap_{wildcards.species_id}.tif", script: str(SRCDIR / "deltap" / "global_code_residents_pixel.py") @@ -53,28 +66,35 @@ rule calculate_delta_p: # Per-Taxa Raster Sum # ============================================================================= + rule raster_sum_per_taxa: """ - Sum all per-species delta P rasters for a taxa into a single raster. - Implicitly waits for all calculate_delta_p jobs via direct tif dependencies. - """ +Sum all per-species delta P rasters for a taxa into a single raster. +Implicitly waits for all calculate_delta_p jobs via direct tif dependencies. +""" input: rasters=get_delta_p_sentinels_for_taxa_scenario, output: tif=DATADIR / "deltap_sum" / "{scenario}" / CURVE / "{taxa}.tif", - params: - rasters_dir=lambda wildcards: DATADIR / "deltap" / wildcards.scenario / CURVE / wildcards.taxa, - curve=CURVE, - threads: workflow.cores log: DATADIR / "logs" / "raster_sum" / "{scenario}" / "{taxa}.log", + threads: workflow.cores + params: + rasters_dir=lambda wildcards: DATADIR + / "deltap" + / wildcards.scenario + / CURVE + / wildcards.taxa, + curve=CURVE, script: str(SRCDIR / "utils" / "raster_sum.py") + # ============================================================================= # Species Totals # ============================================================================= + def get_all_delta_p_tifs_for_scenario(wildcards): tifs = [] for taxa in TAXA: @@ -82,19 +102,20 @@ def get_all_delta_p_tifs_for_scenario(wildcards): tifs.extend(get_delta_p_sentinels_for_taxa_scenario(mock)) return tifs + rule species_totals: """ - Count the number of species per taxa used in the delta P calculation. - Used by delta_p_scaled for normalisation. - """ +Count the number of species per taxa used in the delta P calculation. +Used by delta_p_scaled for normalisation. +""" input: rasters=get_all_delta_p_tifs_for_scenario, output: totals=DATADIR / "deltap" / "{scenario}" / CURVE / "totals.csv", - params: - deltaps_dir=lambda wildcards: DATADIR / "deltap" / wildcards.scenario / CURVE, log: DATADIR / "logs" / "species_totals_{scenario}.log", + params: + deltaps_dir=lambda wildcards: DATADIR / "deltap" / wildcards.scenario / CURVE, script: str(SRCDIR / "utils" / "species_totals.py") @@ -106,11 +127,11 @@ rule species_totals: rule delta_p_scaled: """ - Generate the final scaled delta P map for a scenario. +Generate the final scaled delta P map for a scenario. - Combines per-taxa delta P sums with the habitat difference map and - species totals to produce the final normalised LIFE output. - """ +Combines per-taxa delta P sums with the habitat difference map and +species totals to produce the final normalised LIFE output. +""" input: taxa_rasters=expand( str(DATADIR / "deltap_sum" / "{{scenario}}" / CURVE / "{taxa}.tif"), @@ -120,9 +141,9 @@ rule delta_p_scaled: totals=DATADIR / "deltap" / "{scenario}" / CURVE / "totals.csv", output: final=DATADIR / "deltap_final" / f"scaled_{{scenario}}_{CURVE}.tif", - params: - input_dir=lambda wildcards: DATADIR / "deltap_sum" / wildcards.scenario / CURVE, log: DATADIR / "logs" / "delta_p_scaled_{scenario}.log", + params: + input_dir=lambda wildcards: DATADIR / "deltap_sum" / wildcards.scenario / CURVE, script: str(SRCDIR / "deltap" / "delta_p_scaled.py") diff --git a/workflow/rules/habitat.smk b/workflow/rules/habitat.smk index 88df282..caa3948 100644 --- a/workflow/rules/habitat.smk +++ b/workflow/rules/habitat.smk @@ -18,21 +18,21 @@ import os from pathlib import Path - # ============================================================================= # GAEZ data # ============================================================================= + rule gaez_download: """ - Fetch the compressed GAEZ data. - """ +Fetch the compressed GAEZ data. +""" output: - archive=DATADIR / "food" / "LR.zip" - params: - url="https://s3.eu-west-1.amazonaws.com/data.gaezdev.aws.fao.org/LR.zip", + archive=DATADIR / "food" / "LR.zip", log: DATADIR / "logs" / "download_gaez.log", + params: + url="https://s3.eu-west-1.amazonaws.com/data.gaezdev.aws.fao.org/LR.zip", shell: """ mkdir -p $(dirname {output.archive}) @@ -43,17 +43,17 @@ rule gaez_download: rule gaez_expand: + input: + archive=DATADIR / "food" / "LR.zip", """ - Decompress the GAEZ download. - """ +Decompress the GAEZ download. +""" output: - raster=DATADIR / "food" / "GLCSv11_02_5m.tif" - params: - filepath="LR/lco/GLCSv11_02_5m.tif" - input: - archive=DATADIR / "food" / "LR.zip" + raster=DATADIR / "food" / "GLCSv11_02_5m.tif", log: DATADIR / "logs" / "expand_gaez.log", + params: + filepath="LR/lco/GLCSv11_02_5m.tif", shell: """ unzip -j {input.archive} \ @@ -62,18 +62,20 @@ rule gaez_expand: 2>&1 | tee {log} """ + # ============================================================================= # Hyde data # ============================================================================= + rule hyde_download: """Fetch the compressed HYDE data.""" output: - archive=DATADIR / "food" / "baseline.zip" - params: - url="https://geo.public.data.uu.nl/vault-hyde/HYDE%203.2%5B1710494848%5D/original/baseline.zip", + archive=DATADIR / "food" / "baseline.zip", log: DATADIR / "logs" / "download_hyde.log", + params: + url="https://geo.public.data.uu.nl/vault-hyde/HYDE%203.2%5B1710494848%5D/original/baseline.zip", shell: """ mkdir -p $(dirname {output.archive}) @@ -86,13 +88,13 @@ rule hyde_download: rule hyde_expand_land_usage_archive: """Extract the inner land usage archive from HYDE.""" input: - archive=DATADIR / "food" / "baseline.zip" + archive=DATADIR / "food" / "baseline.zip", output: - inner_hyde=DATADIR / "food" / "2017AD_lu.zip" - params: - filepath="baseline/zip/2017AD_lu.zip" + inner_hyde=DATADIR / "food" / "2017AD_lu.zip", log: DATADIR / "logs" / "expand_hyde_1.log", + params: + filepath="baseline/zip/2017AD_lu.zip", shell: """ unzip -j {input.archive} \ @@ -105,13 +107,13 @@ rule hyde_expand_land_usage_archive: rule hyde_expand_land_usage_raster: """Extract raster from the inner HYDE land usage archive.""" input: - archive=DATADIR / "food" / "2017AD_lu.zip" + archive=DATADIR / "food" / "2017AD_lu.zip", output: - raw_hyde_raster=DATADIR / "food" / "grazing2017AD.asc" - params: - filepath="grazing2017AD.asc" + raw_hyde_raster=DATADIR / "food" / "grazing2017AD.asc", log: DATADIR / "logs" / "expand_hyde_2.log", + params: + filepath="grazing2017AD.asc", shell: """ unzip -j {input.archive} \ @@ -123,13 +125,13 @@ rule hyde_expand_land_usage_raster: rule modify_hyde_pixel_scale: """ - Fix the pixel scale value in the HYDE data — the rounding is not precise - enough to align with the GAEZ data. - """ +Fix the pixel scale value in the HYDE data — the rounding is not precise +enough to align with the GAEZ data. +""" input: - raw_hyde_raster=DATADIR / "food" / "grazing2017AD.asc" + raw_hyde_raster=DATADIR / "food" / "grazing2017AD.asc", output: - modified_hyde_raster=DATADIR / "food" / "modified_grazing2017AD.asc" + modified_hyde_raster=DATADIR / "food" / "modified_grazing2017AD.asc", shell: """ sed "s/0.0833333/0.08333333333333333/" \ @@ -139,13 +141,13 @@ rule modify_hyde_pixel_scale: rule add_hyde_projection_info: """ - The HYDE data ships without a projection, so add one so that - the rest of the workflow can mix it with projected raster data. - """ +The HYDE data ships without a projection, so add one so that +the rest of the workflow can mix it with projected raster data. +""" output: - hyde_projection_file=DATADIR / "food" / "modified_grazing2017AD.prj" + hyde_projection_file=DATADIR / "food" / "modified_grazing2017AD.prj", params: - projection=config["hyde_projection"] + projection=config["hyde_projection"], shell: """ echo '{params.projection}' > {output.hyde_projection_file} @@ -157,17 +159,17 @@ rule add_hyde_projection_info: # ============================================================================= rule combine_gaez_hyde: """ - Combine the GAEZ and Hyde data, adjusting for overflow in cells. - """ +Combine the GAEZ and Hyde data, adjusting for overflow in cells. +""" input: hyde_projection_file=DATADIR / "food" / "modified_grazing2017AD.prj", hyde_raster=DATADIR / "food" / "modified_grazing2017AD.asc", gaez_raster=DATADIR / "food" / "GLCSv11_02_5m.tif", output: crop=DATADIR / "food" / "crop.tif", - pasture=DATADIR / "food" / "pasture.tif" + pasture=DATADIR / "food" / "pasture.tif", params: - output_dir=DATADIR / "food" + output_dir=DATADIR / "food", script: str(SRCDIR / "prepare_layers" / "build_gaez_hyde.py") @@ -176,12 +178,13 @@ rule combine_gaez_hyde: # PNV rescaled to 100m # ============================================================================= + rule pnv_100m: """ - Rescale the PNV map to 100m resolution. - Yirgacheffe can rescale dynamically, but pre-scaling is faster at the cost - of some extra disk space. - """ +Rescale the PNV map to 100m resolution. +Yirgacheffe can rescale dynamically, but pre-scaling is faster at the cost +of some extra disk space. +""" input: pnv=DATADIR / "habitat" / "pnv_raw.tif", output: @@ -207,32 +210,34 @@ rule pnv_100m: # Food-enhanced current map at 100m (PRECIOUS) # ============================================================================= + rule current_raws: """ - Build the LIFE current map, which is Jung with updates applied - and restricted to L1 to match the PNV map restrictions. - """ +Build the LIFE current map, which is Jung with updates applied +and restricted to L1 to match the PNV map restrictions. +""" input: updates_sentinel=DATADIR / "habitat" / ".downloaded_updates", habitat=DATADIR / "100m" / "jung_l2_raw.tif", crosswalk=DATADIR / "crosswalk.csv", - params: - updates_dir=DATADIR / "habitat" / "lvl2_changemasks_ver004", - output_dir=DATADIR / "100m" / "jung_current", output: sentinel=DATADIR / "100m" / "jung_current" / ".sentinel", threads: workflow.cores + params: + updates_dir=DATADIR / "habitat" / "lvl2_changemasks_ver004", + output_dir=DATADIR / "100m" / "jung_current", script: str(SRCDIR / "prepare_layers" / "make_current_map.py") + rule build_food_map: """ - Build the food-enhanced current habitat map at 100m resolution by combining - the Jung current map with GAEZ/HYDE crop and pasture fractions. +Build the food-enhanced current habitat map at 100m resolution by combining +the Jung current map with GAEZ/HYDE crop and pasture fractions. - PRECIOUS: Only rebuilds if the sentinel is explicitly deleted. - This is the most expensive step in the pipeline. - """ +PRECIOUS: Only rebuilds if the sentinel is explicitly deleted. +This is the most expensive step in the pipeline. +""" input: jung=ancient(DATADIR / "100m" / "jung_current" / ".sentinel"), pnv=ancient(DATADIR / "100m" / "pnv.tif"), @@ -240,12 +245,12 @@ rule build_food_map: pasture=ancient(DATADIR / "food" / "pasture.tif"), output: sentinel=DATADIR / "100m" / "current" / ".sentinel", + log: + DATADIR / "logs" / "build_food_map.log", + threads: workflow.cores params: jung_dir=DATADIR / "100m" / "jung_current", output_dir=DATADIR / "100m" / "current", - threads: workflow.cores - log: - DATADIR / "logs" / "build_food_map.log", script: str(SRCDIR / "prepare_layers" / "make_food_current_map.py") @@ -254,24 +259,25 @@ rule build_food_map: # Warp current map to habitat_layers resolution (PRECIOUS) # ============================================================================= + rule warp_current: """ - Warp the food-enhanced current map from 100m to the target pixel scale - (5 arc-seconds, ~1.67km at the equator). +Warp the food-enhanced current map from 100m to the target pixel scale +(5 arc-seconds, ~1.67km at the equator). - PRECIOUS: Only rebuilds if the sentinel is explicitly deleted. - """ +PRECIOUS: Only rebuilds if the sentinel is explicitly deleted. +""" input: sentinel=ancient(DATADIR / "100m" / "current" / ".sentinel"), output: sentinel=DATADIR / "habitat_layers" / "current" / ".sentinel", + log: + DATADIR / "logs" / "warp_current.log", + threads: workflow.cores params: input_dir=DATADIR / "100m" / "current", output_dir=DATADIR / "habitat_layers" / "current", pixel_scale=config["pixel_scale"], - threads: workflow.cores - log: - DATADIR / "logs" / "warp_current.log", shell: """ mkdir -p {params.output_dir} @@ -297,23 +303,24 @@ rule warp_current: # Process PNV map to habitat_layers (PRECIOUS) # ============================================================================= + rule pnv_processed: """ - Process the PNV map into per-class fractional rasters at the target pixel scale - using aoh-habitat-process. +Process the PNV map into per-class fractional rasters at the target pixel scale +using aoh-habitat-process. - PRECIOUS: Only rebuilds if the sentinel is explicitly deleted. - """ +PRECIOUS: Only rebuilds if the sentinel is explicitly deleted. +""" input: pnv=ancient(DATADIR / "habitat" / "pnv_raw.tif"), output: sentinel=DATADIR / "habitat_layers" / "pnv" / ".sentinel", + log: + DATADIR / "logs" / "pnv_processed.log", params: output_dir=DATADIR / "habitat_layers" / "pnv", pixel_scale=config["pixel_scale"], projection=config["projection"], - log: - DATADIR / "logs" / "pnv_processed.log", shell: """ set -e diff --git a/workflow/rules/prepare.smk b/workflow/rules/prepare.smk index 52db68e..e69be79 100644 --- a/workflow/rules/prepare.smk +++ b/workflow/rules/prepare.smk @@ -11,15 +11,15 @@ import os from pathlib import Path - # ============================================================================= # Crosswalk Table # ============================================================================= + rule convert_crosswalk: """ - Generate a crosswalk between IUCN Habitat classes and Jung raster pixel values. - """ +Generate a crosswalk between IUCN Habitat classes and Jung raster pixel values. +""" output: crosswalk=DATADIR / "crosswalk.csv", script: @@ -30,17 +30,18 @@ rule convert_crosswalk: # Jung L2 habitat map # ============================================================================= + rule jung_habitat_map: """ - Fetch the Jung L2 habitat map from Zenodo. - """ +Fetch the Jung L2 habitat map from Zenodo. +""" output: - habitat=DATADIR / "100m" / "jung_l2_raw.tif" + habitat=DATADIR / "100m" / "jung_l2_raw.tif", + log: + DATADIR / "logs" / "download_habitat.log", params: zenodo_id=config["zenodo"]["jung_habitat"]["zenodo_id"], filename=config["zenodo"]["jung_habitat"]["filename"], - log: - DATADIR / "logs" / "download_habitat.log", shell: """ mkdir -p $(dirname {output.habitat}) @@ -54,16 +55,16 @@ rule jung_habitat_map: rule jung_habitat_updates: """ - Fetch the Jung habitat map update masks from Zenodo. - """ +Fetch the Jung habitat map update masks from Zenodo. +""" output: - sentinel=DATADIR / "habitat" / ".downloaded_updates" + sentinel=DATADIR / "habitat" / ".downloaded_updates", + log: + DATADIR / "logs" / "download_habitat_updates.log", params: habitat_dir=DATADIR / "habitat", zenodo_id=config["zenodo"]["jung_habitat_updates"]["zenodo_id"], filename=config["zenodo"]["jung_habitat_updates"]["filename"], - log: - DATADIR / "logs" / "download_habitat_updates.log", shell: """ mkdir -p {params.habitat_dir} @@ -80,17 +81,18 @@ rule jung_habitat_updates: # Jung PNV map # ============================================================================= + rule jung_pnv_map: """ - Fetch the Jung PNV map from Zenodo. - """ +Fetch the Jung PNV map from Zenodo. +""" output: - habitat=DATADIR / "habitat" / "pnv_raw.tif" + habitat=DATADIR / "habitat" / "pnv_raw.tif", + log: + DATADIR / "logs" / "download_pnv.log", params: zenodo_id=config["zenodo"]["jung_pnv"]["zenodo_id"], filename=config["zenodo"]["jung_pnv"]["filename"], - log: - DATADIR / "logs" / "download_pnv.log", shell: """ mkdir -p $(dirname {output.habitat}) @@ -106,17 +108,18 @@ rule jung_pnv_map: # Elevation layers (precious — only rebuild if explicitly deleted) # ============================================================================= + rule download_elevation: """ - Download raw elevation DEM from Zenodo. - """ +Download raw elevation DEM from Zenodo. +""" output: elevation=DATADIR / "elevation.tif", + log: + DATADIR / "logs" / "download_elevation.log", params: zenodo_id=config["zenodo"]["elevation"]["zenodo_id"], filename=config["zenodo"]["elevation"]["filename"], - log: - DATADIR / "logs" / "download_elevation.log", shell: """ reclaimer zenodo --zenodo_id {params.zenodo_id} \ @@ -128,18 +131,18 @@ rule download_elevation: rule elevation_max: """ - Warp elevation to target projection and scale using max resampling. - Precious: only runs if output doesn't exist. - """ +Warp elevation to target projection and scale using max resampling. +Precious: only runs if output doesn't exist. +""" input: elevation=ancient(DATADIR / "elevation.tif"), output: elevation_max=DATADIR / "elevation-max.tif", - params: - pixel_scale=config["pixel_scale"], - threads: workflow.cores log: DATADIR / "logs" / "elevation_max.log", + threads: workflow.cores + params: + pixel_scale=config["pixel_scale"], shell: """ gdalwarp \ @@ -156,18 +159,18 @@ rule elevation_max: rule elevation_min: """ - Warp elevation to target projection and scale using min resampling. - Precious: only runs if output doesn't exist. - """ +Warp elevation to target projection and scale using min resampling. +Precious: only runs if output doesn't exist. +""" input: elevation=ancient(DATADIR / "elevation.tif"), output: elevation_min=DATADIR / "elevation-min.tif", - params: - pixel_scale=config["pixel_scale"], - threads: workflow.cores log: DATADIR / "logs" / "elevation_min.log", + threads: workflow.cores + params: + pixel_scale=config["pixel_scale"], shell: """ gdalwarp \ diff --git a/workflow/rules/scenario.smk b/workflow/rules/scenario.smk index 2a2cb91..efd92b9 100644 --- a/workflow/rules/scenario.smk +++ b/workflow/rules/scenario.smk @@ -15,26 +15,26 @@ import os from pathlib import Path - # ============================================================================= # Arable scenario # ============================================================================= + rule make_arable_map: """ - Generate the arable scenario map at 100m resolution. - All non-urban land is converted to arable. - """ +Generate the arable scenario map at 100m resolution. +All non-urban land is converted to arable. +""" input: current_sentinel=DATADIR / "100m" / "current" / ".sentinel", output: sentinel=DATADIR / "100m" / "arable" / ".sentinel", + log: + DATADIR / "logs" / "make_arable_map.log", + threads: workflow.cores params: current_dir=DATADIR / "100m" / "current", output_dir=DATADIR / "100m" / "arable", - threads: workflow.cores - log: - DATADIR / "logs" / "make_arable_map.log", shell: """ python3 {SRCDIR}/prepare_layers/make_arable_map.py \ @@ -48,20 +48,20 @@ rule make_arable_map: rule warp_arable: """ - Warp the arable scenario map from 100m to the target pixel scale. - PRECIOUS: Only rebuilds if the sentinel is explicitly deleted. - """ +Warp the arable scenario map from 100m to the target pixel scale. +PRECIOUS: Only rebuilds if the sentinel is explicitly deleted. +""" input: sentinel=ancient(DATADIR / "100m" / "arable" / ".sentinel"), output: sentinel=DATADIR / "habitat_layers" / "arable" / ".sentinel", + log: + DATADIR / "logs" / "warp_arable.log", + threads: workflow.cores params: input_dir=DATADIR / "100m" / "arable", output_dir=DATADIR / "habitat_layers" / "arable", pixel_scale=config["pixel_scale"], - threads: workflow.cores - log: - DATADIR / "logs" / "warp_arable.log", shell: """ mkdir -p {params.output_dir} @@ -85,20 +85,20 @@ rule warp_arable: rule diff_map_arable: """ - Generate the area difference map between current and arable habitat layers. - Used by the delta P scaling step. - """ +Generate the area difference map between current and arable habitat layers. +Used by the delta P scaling step. +""" input: current_sentinel=DATADIR / "habitat_layers" / "current" / ".sentinel", arable_sentinel=DATADIR / "habitat_layers" / "arable" / ".sentinel", output: DATADIR / "habitat" / "arable_diff_area.tif", + log: + DATADIR / "logs" / "diff_map_arable.log", + threads: workflow.cores params: current_dir=DATADIR / "habitat_layers" / "current", arable_dir=DATADIR / "habitat_layers" / "arable", - threads: workflow.cores - log: - DATADIR / "logs" / "diff_map_arable.log", shell: """ mkdir -p $(dirname {output}) @@ -115,23 +115,24 @@ rule diff_map_arable: # Restore scenario # ============================================================================= + rule make_restore_map: """ - Generate the restore scenario map at 100m resolution. - Agricultural land is restored to PNV habitat. - """ +Generate the restore scenario map at 100m resolution. +Agricultural land is restored to PNV habitat. +""" input: current_sentinel=DATADIR / "100m" / "current" / ".sentinel", pnv=DATADIR / "habitat" / "pnv_raw.tif", crosswalk=DATADIR / "crosswalk.csv", output: sentinel=DATADIR / "100m" / "restore" / ".sentinel", + log: + DATADIR / "logs" / "make_restore_map.log", + threads: workflow.cores params: current_dir=DATADIR / "100m" / "current", output_dir=DATADIR / "100m" / "restore", - threads: workflow.cores - log: - DATADIR / "logs" / "make_restore_map.log", shell: """ python3 {SRCDIR}/prepare_layers/make_restore_map.py \ @@ -147,20 +148,20 @@ rule make_restore_map: rule warp_restore: """ - Warp the restore scenario map from 100m to the target pixel scale. - PRECIOUS: Only rebuilds if the sentinel is explicitly deleted. - """ +Warp the restore scenario map from 100m to the target pixel scale. +PRECIOUS: Only rebuilds if the sentinel is explicitly deleted. +""" input: sentinel=ancient(DATADIR / "100m" / "restore" / ".sentinel"), output: sentinel=DATADIR / "habitat_layers" / "restore" / ".sentinel", + log: + DATADIR / "logs" / "warp_restore.log", + threads: workflow.cores params: input_dir=DATADIR / "100m" / "restore", output_dir=DATADIR / "habitat_layers" / "restore", pixel_scale=config["pixel_scale"], - threads: workflow.cores - log: - DATADIR / "logs" / "warp_restore.log", shell: """ mkdir -p {params.output_dir} @@ -184,19 +185,19 @@ rule warp_restore: rule diff_map_restore: """ - Generate the area difference map between current and restore habitat layers. - """ +Generate the area difference map between current and restore habitat layers. +""" input: current_sentinel=DATADIR / "habitat_layers" / "current" / ".sentinel", restore_sentinel=DATADIR / "habitat_layers" / "restore" / ".sentinel", output: DATADIR / "habitat" / "restore_diff_area.tif", + log: + DATADIR / "logs" / "diff_map_restore.log", + threads: workflow.cores params: current_dir=DATADIR / "habitat_layers" / "current", restore_dir=DATADIR / "habitat_layers" / "restore", - threads: workflow.cores - log: - DATADIR / "logs" / "diff_map_restore.log", shell: """ mkdir -p $(dirname {output}) diff --git a/workflow/rules/species.smk b/workflow/rules/species.smk index 1afaa91..db9aa27 100644 --- a/workflow/rules/species.smk +++ b/workflow/rules/species.smk @@ -16,28 +16,29 @@ checkpoint extract_species_data: """ - Extract species data from PostgreSQL database. +Extract species data from PostgreSQL database. - This is a checkpoint because the number of output GeoJSON files is only - known after extraction. Each taxa produces N species files in both current/ - and historic/ subdirectories. +This is a checkpoint because the number of output GeoJSON files is only +known after extraction. Each taxa produces N species files in both current/ +and historic/ subdirectories. - Environment variables required: - DB_HOST, DB_NAME, DB_USER, DB_PASSWORD - """ +Environment variables required: + DB_HOST, DB_NAME, DB_USER, DB_PASSWORD +""" output: current_report=DATADIR / "species-info" / "{taxa}" / "current" / "report.csv", historic_report=DATADIR / "species-info" / "{taxa}" / "historic" / "report.csv", + log: + DATADIR / "logs" / "extract_species_{taxa}.log", + threads: workflow.cores + resources: + db_connections=1, params: classname="{taxa}", output_dir=lambda wildcards: DATADIR / "species-info" / wildcards.taxa, projection=config["projection"], - overrides=lambda wildcards: DATADIR / config["optional_inputs"]["species_overrides"], - resources: - db_connections=1, - threads: workflow.cores - log: - DATADIR / "logs" / "extract_species_{taxa}.log", + overrides=lambda wildcards: DATADIR + / config["optional_inputs"]["species_overrides"], shell: """ OVERRIDES_ARG="" diff --git a/workflow/rules/validation.smk b/workflow/rules/validation.smk index bc5ea90..48e20ec 100644 --- a/workflow/rules/validation.smk +++ b/workflow/rules/validation.smk @@ -13,9 +13,9 @@ rule model_validation: """ - Perform statistical validation of AOH models (Dahal et al. methodology). - Runs on the "current" scenario AOHs only. - """ +Perform statistical validation of AOH models (Dahal et al. methodology). +Runs on the "current" scenario AOHs only. +""" input: collated=DATADIR / "aohs" / "current.csv", version_sentinel=DATADIR / ".sentinels" / "aoh_version.txt", @@ -40,19 +40,19 @@ rule model_validation: rule species_richness: """ - Calculate species richness from current AOH rasters. - NOT included in 'all' — use the 'summaries' target explicitly. - """ +Calculate species richness from current AOH rasters. +NOT included in 'all' — use the 'summaries' target explicitly. +""" input: aoh_sentinel=DATADIR / "aohs" / "current.csv", version_sentinel=DATADIR / ".sentinels" / "aoh_version.txt", output: richness=DATADIR / "summaries" / "species_richness.tif", - params: - aohs_folder=DATADIR / "aohs" / "current", - threads: workflow.cores log: DATADIR / "logs" / "species_richness.log", + threads: workflow.cores + params: + aohs_folder=DATADIR / "aohs" / "current", shell: """ mkdir -p $(dirname {output.richness}) @@ -65,20 +65,20 @@ rule species_richness: rule endemism: """ - Calculate endemism from current AOH rasters. - NOT included in 'all' — use the 'summaries' target explicitly. - """ +Calculate endemism from current AOH rasters. +NOT included in 'all' — use the 'summaries' target explicitly. +""" input: aoh_sentinel=DATADIR / "aohs" / "current.csv", species_richness=DATADIR / "summaries" / "species_richness.tif", version_sentinel=DATADIR / ".sentinels" / "aoh_version.txt", output: endemism=DATADIR / "summaries" / "endemism.tif", - params: - aohs_folder=DATADIR / "aohs" / "current", - threads: workflow.cores log: DATADIR / "logs" / "endemism.log", + threads: workflow.cores + params: + aohs_folder=DATADIR / "aohs" / "current", shell: """ aoh-endemism \ @@ -96,20 +96,20 @@ rule endemism: rule fetch_gbif_data: """ - Fetch GBIF occurrence data for a taxa. - Expensive (hours) — only runs if output doesn't exist. +Fetch GBIF occurrence data for a taxa. +Expensive (hours) — only runs if output doesn't exist. - Environment variables required: - GBIF_USERNAME, GBIF_EMAIL, GBIF_PASSWORD - """ +Environment variables required: + GBIF_USERNAME, GBIF_EMAIL, GBIF_PASSWORD +""" input: collated=ancient(DATADIR / "aohs" / "current.csv"), output: sentinel=DATADIR / "validation" / "occurrences" / ".{taxa}_fetched", - params: - output_dir=DATADIR / "validation" / "occurrences", log: DATADIR / "logs" / "fetch_gbif_{taxa}.log", + params: + output_dir=DATADIR / "validation" / "occurrences", shell: """ mkdir -p {params.output_dir} @@ -124,19 +124,25 @@ rule fetch_gbif_data: rule validate_gbif_occurrences: """ - Validate current AOH models against GBIF occurrence data. - """ +Validate current AOH models against GBIF occurrence data. +""" input: gbif_sentinel=DATADIR / "validation" / "occurrences" / ".{taxa}_fetched", aoh_sentinel=ancient(DATADIR / "aohs" / "current" / "{taxa}" / ".complete"), output: validation=DATADIR / "validation" / "occurrences" / "{taxa}.csv", - params: - gbif_data=lambda wildcards: DATADIR / "validation" / "occurrences" / wildcards.taxa, - species_data=lambda wildcards: DATADIR / "species-info" / wildcards.taxa / "current", - aoh_results=lambda wildcards: DATADIR / "aohs" / "current" / wildcards.taxa, log: DATADIR / "logs" / "validate_gbif_{taxa}.log", + params: + gbif_data=lambda wildcards: DATADIR + / "validation" + / "occurrences" + / wildcards.taxa, + species_data=lambda wildcards: DATADIR + / "species-info" + / wildcards.taxa + / "current", + aoh_results=lambda wildcards: DATADIR / "aohs" / "current" / wildcards.taxa, shell: """ aoh-validate-occurrences \ @@ -150,8 +156,8 @@ rule validate_gbif_occurrences: rule occurrence_validation: """ - Target rule for GBIF validation for all taxa. - WARNING: Expensive. Only run explicitly: snakemake occurrence_validation - """ +Target rule for GBIF validation for all taxa. +WARNING: Expensive. Only run explicitly: snakemake occurrence_validation +""" input: expand(str(DATADIR / "validation" / "occurrences" / "{taxa}.csv"), taxa=TAXA), From eb6ee7b6f4500976495b125b122806eb5f0cd789 Mon Sep 17 00:00:00 2001 From: Michael Dales Date: Mon, 27 Apr 2026 13:18:31 +0100 Subject: [PATCH 18/27] Fix bug in fractional coverage when both farm types go up --- prepare_layers/make_food_current_map.py | 41 +++++++++------- tests/test_food_layer.py | 64 ++++++++++++++++++++++++- 2 files changed, 87 insertions(+), 18 deletions(-) diff --git a/prepare_layers/make_food_current_map.py b/prepare_layers/make_food_current_map.py index 85d709d..8475002 100644 --- a/prepare_layers/make_food_current_map.py +++ b/prepare_layers/make_food_current_map.py @@ -40,7 +40,7 @@ def balance_crop_and_pasture_differences( lcc_data_map: dict[int,np.ndarray], ) -> tuple[float,float]: """ - If we remove one time of agricultural land but expand another, keep them in the same area where possible. + If we remove one type of agricultural land but expand another, keep them in the same area where possible. One thing we know is that reductions are always achievable, as the difference is generated as (GAEZ and HYDE - Jung), so if we have a negative value the initial state is enough to cover that. @@ -115,6 +115,7 @@ def remove_land_cover( lcc_data[pnv_match] += removed_grid[pnv_match] def add_land_cover( + eligible_mask: np.ndarray, lcc_code: int, diff: float, lcc_data_map: dict[int,np.ndarray], @@ -123,16 +124,6 @@ def add_land_cover( agri_raster = lcc_data_map[lcc_code] - # Find areas we can put the new data. This is anywhere we don't already - # have agricultural land, and other places unlikely to be converted (cities, lakes, etc.) - # We know that there should be no partial cells involving crop/pasture at this stage - # because of the balancing we did initially. - excluded_codes = [CROP_CODE, PASTURE_CODE] + PRESERVE_CODES - eligible_mask = np.ones_like(agri_raster, dtype=bool) - for excluded_code in excluded_codes: - if excluded_code in lcc_data_map: - eligible_mask &= (lcc_data_map[excluded_code] == 0) - eligible_count = eligible_mask.sum() if eligible_count == 0: return @@ -149,7 +140,7 @@ def add_land_cover( agri_raster[eligible_mask] += per_cell_addition for lcc, lcc_data in lcc_data_map.items(): - if lcc == lcc_code: + if lcc in [CROP_CODE, PASTURE_CODE]: continue lcc_data[eligible_mask & (lcc_data > 0)] -= per_cell_addition @@ -158,7 +149,6 @@ def process_tile( pnv: yg.YirgacheffeLayer, tile: TileInfo, ) -> dict[int,np.ndarray]: - lcc_data_map = { lcc: current_map.read_array(tile.x_position, tile.y_position, tile.width, tile.height) for lcc, current_map in current_maps.items() @@ -174,11 +164,13 @@ def process_tile( if not np.isnan(tile.crop_target): crop_data = lcc_data_map[CROP_CODE] crop_diff = tile.crop_target - (crop_data.sum() / crop_data.size) + assert not np.isnan(crop_diff) else: crop_diff = 0 if not np.isnan(tile.pasture_target): pasture_data = lcc_data_map[PASTURE_CODE] pasture_diff = tile.pasture_target - (pasture_data.sum() / pasture_data.size) + assert not np.isnan(pasture_diff) else: pasture_diff = 0 if (crop_diff == 0) and (pasture_diff == 0): @@ -190,15 +182,16 @@ def process_tile( lcc_data_map, ) - # Order the changes by removals first then additions. In random sampling this was - # important, but not so with a fractional approach. However, we need some consistent - # ordering so we leave this in for consistency. + # Order the changes by removals first then additions. We should only generate the mask + # of eligible cells after we've done removals. diffs = [ (crop_diff, CROP_CODE), (pasture_diff, PASTURE_CODE), ] diffs.sort(key=lambda a: a[0]) + eligible_mask = None + pnv_data = None for diff_value, habitat_code in diffs: if diff_value == 0 or np.isnan(diff_value): @@ -209,7 +202,21 @@ def process_tile( pnv_data = pnv.read_array(tile.x_position, tile.y_position, tile.width, tile.height) remove_land_cover(habitat_code, diff_value, pnv_data, lcc_data_map) else: - add_land_cover(habitat_code, diff_value, lcc_data_map) + if eligible_mask is None: + # This mask has to be generated after we've done all removals, before + # we start doing additions, so relies on the sort we do above. + # + # Find areas we can put the new data. This is anywhere we don't already + # have agricultural land, and other places unlikely to be converted (cities, lakes, etc.) + # We know that there should be no partial cells involving crop/pasture at this stage + # because of the balancing we did initially. + excluded_codes = [CROP_CODE, PASTURE_CODE] + PRESERVE_CODES + eligible_mask = np.ones_like(lcc_data_map[CROP_CODE], dtype=bool) + for excluded_code in excluded_codes: + if excluded_code in lcc_data_map: + eligible_mask &= (lcc_data_map[excluded_code] == 0) + + add_land_cover(eligible_mask, habitat_code, diff_value, lcc_data_map) return lcc_data_map diff --git a/tests/test_food_layer.py b/tests/test_food_layer.py index a44481e..7b65266 100644 --- a/tests/test_food_layer.py +++ b/tests/test_food_layer.py @@ -1,9 +1,12 @@ +import math import numpy as np import pytest +from pytest import param as P +import yirgacheffe as yg from prepare_layers.make_food_current_map import balance_crop_and_pasture_differences, \ - CROP_CODE, PASTURE_CODE, remove_land_cover, add_land_cover + CROP_CODE, PASTURE_CODE, remove_land_cover, add_land_cover, TileInfo, process_tile @pytest.mark.parametrize( [ @@ -210,6 +213,7 @@ def test_remove_land_less_simple( (np.array([[i % 2] * 10 for i in range(10)]).astype(float) * expected_other_2_cell) assert (expected_other_2_map == lcc_data_map[2]).all() + @pytest.mark.parametrize("crop_diff,expected_crop_cell,expected_other_cell", [ (0.5, 0.5, 0.5), (1.0, 1.0, 0.0), @@ -226,6 +230,7 @@ def test_add_land_simple( } add_land_cover( + np.ones((10, 10), dtype=bool), CROP_CODE, crop_diff, lcc_data_map, @@ -236,6 +241,7 @@ def test_add_land_simple( assert (expected_crop_map == lcc_data_map[CROP_CODE]).all() assert (expected_other_map == lcc_data_map[1]).all() + @pytest.mark.parametrize("crop_diff,expected_crop_cell,expected_other_cell", [ (0.25, 0.5, 0.5), (0.5, 1.0, 0.0), @@ -253,6 +259,7 @@ def test_add_land_avoid_excluded( } add_land_cover( + np.array([[(i + 1) % 2] * 10 for i in range(10)]).astype(bool), CROP_CODE, crop_diff, lcc_data_map, @@ -264,3 +271,58 @@ def test_add_land_avoid_excluded( assert (expected_crop_map == lcc_data_map[CROP_CODE]).all() assert (expected_pasture_map == lcc_data_map[PASTURE_CODE]).all() assert (expected_other_map == lcc_data_map[1]).all() + + +@pytest.mark.parametrize(["crop_diff", "pasture_diff", "expected_totals"], [ + P(0.25, 0.25, {1: 25, CROP_CODE: 25, PASTURE_CODE: 25, 4: 25}, id="no change"), + P(0.0, 0.0, {1: 75, CROP_CODE: 0, PASTURE_CODE: 0, 4: 25}, id="remove both"), + P(0.25, 0.0, {1: 50, CROP_CODE: 25, PASTURE_CODE: 0, 4: 25}, id="remove pasture, leave crop"), + P(0.0, 0.25, {1: 50, CROP_CODE: 0, PASTURE_CODE: 25, 4: 25}, id="remove crop, leave pasture"), + P(0.5, 0.25, {1: 12.5, CROP_CODE: 50, PASTURE_CODE: 25, 4: 12.5}, id="add crop, leave pasture"), + P(0.25, 0.5, {1: 12.5, CROP_CODE: 25, PASTURE_CODE: 50, 4: 12.5}, id="leave crop, add pasture"), + P(0.5, 0.2, {1: 15, CROP_CODE: 50, PASTURE_CODE: 20, 4: 15}, id="add crop, remove pasture, more add than remove"), + P(0.2, 0.5, {1: 15, CROP_CODE: 20, PASTURE_CODE: 50, 4: 15}, id="remove crop, add pasture, more add than remove"), + P(0.3, 0.1, {1: 35, CROP_CODE: 30, PASTURE_CODE: 10, 4: 25}, id="add crop, remove pasture, more remove than add"), + P(0.1, 0.3, {1: 35, CROP_CODE: 10, PASTURE_CODE: 30, 4: 25}, id="remove crop, add pasture, more remove than add"), + P(0.3, 0.3, {1: 20, CROP_CODE: 30, PASTURE_CODE: 30, 4: 20}, id="all both, but not total"), + P(0.5, 0.5, {1: 0, CROP_CODE: 50, PASTURE_CODE: 50, 4: 0}, id="all both"), + P(0.0, 0.5, {1: 25, CROP_CODE: 0, PASTURE_CODE: 50, 4: 25}, id="replace crop with pasture"), + P(0.5, 0.0, {1: 25, CROP_CODE: 50, PASTURE_CODE: 0, 4: 25}, id="replace pasture with crop"), + P(0.0, 1.0, {1: 0, CROP_CODE: 0, PASTURE_CODE: 100, 4: 0}, id="All pasture, all the time"), + P(1.0, 0.0, {1: 0, CROP_CODE: 100, PASTURE_CODE: 0, 4: 0}, id="All crop, all the time"), +]) +def test_process_tile(crop_diff: float, pasture_diff: float, expected_totals: dict[int, float]) -> None: + # One quarter neither, one quarter crop, one quarter pasture, one quarter other neither + data = np.ones((5, 5)) + raw_lcc_data_map = { + 1: np.pad(data, ((0, 5), (0, 5))), + CROP_CODE: np.pad(data, ((0, 5), (5, 0))), + PASTURE_CODE: np.pad(data, ((5, 0), (0, 5))), + 4: np.pad(data, ((5, 0), (5, 0))), + } + projection = yg.MapProjection("epsg:4326", 1.0, -1.0) + lcc_maps = {lcc: yg.from_array(x, (0, 0), projection) for lcc, x in raw_lcc_data_map.items()} + + # The pnv is just class 1, so that is always the category that will increase, class 4 will only + # only go below its initial value or be the same + pnv_data = np.ones((10, 10)) # All class 1 + pnv_map = yg.from_array(pnv_data, (0, 0), projection) + + tile = TileInfo( + x_position=0, + y_position=0, + width=10, + height=10, + crop_target=crop_diff, + pasture_target=pasture_diff, + ) + + updated_maps = process_tile(lcc_maps, pnv_map, tile) + + for key, expected_value in expected_totals.items(): + print(key, np.sum(updated_maps[key])) + + for key, expected_value in expected_totals.items(): + layer = updated_maps[key] + layer_total = np.sum(layer) + assert math.isclose(layer_total, expected_value, rel_tol=0.000001), f"Failed for {key}" From 46e122cff1431b861973440c88c5d5d270bb9c9e Mon Sep 17 00:00:00 2001 From: Michael Dales Date: Tue, 28 Apr 2026 12:06:58 +0100 Subject: [PATCH 19/27] Fix snakemake reformatting mistake --- workflow/rules/habitat.smk | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/workflow/rules/habitat.smk b/workflow/rules/habitat.smk index caa3948..d01540a 100644 --- a/workflow/rules/habitat.smk +++ b/workflow/rules/habitat.smk @@ -43,11 +43,9 @@ Fetch the compressed GAEZ data. rule gaez_expand: + """Decompress the GAEZ download.""" input: archive=DATADIR / "food" / "LR.zip", - """ -Decompress the GAEZ download. -""" output: raster=DATADIR / "food" / "GLCSv11_02_5m.tif", log: From bd9fd2b35ab53b0ebf15529524c0058728027867 Mon Sep 17 00:00:00 2001 From: Michael Dales Date: Tue, 28 Apr 2026 12:18:12 +0100 Subject: [PATCH 20/27] Remove deprecated shell scripts --- scripts/generate_food_map.sh | 126 ------------- scripts/run.sh | 355 ----------------------------------- 2 files changed, 481 deletions(-) delete mode 100755 scripts/generate_food_map.sh delete mode 100755 scripts/run.sh diff --git a/scripts/generate_food_map.sh b/scripts/generate_food_map.sh deleted file mode 100755 index ea8cd27..0000000 --- a/scripts/generate_food_map.sh +++ /dev/null @@ -1,126 +0,0 @@ -#!/bin/bash -# -# This script will generate the updated habitat map for the FOOD paper based on the original Jung habitat map. -# Note that this is not derived directly from the Jung map, but rather from the simplified version used in LIFE, -# which has all habitats at level 1 except anthropomorphic ones at level 2. As such this script assumes you have -# downloaded and generated `current_raw.tif` from the original LIFE pipeline (see run.sh) - -set -e -set -x - - -# We know we use two Go tools, so add go/bin to our path as in slurm world they're likely -# to be installed locally -export PATH="${PATH}":"${HOME}"/go/bin -if ! hash reclaimer 2>/dev/null; then - echo "Please ensure reclaimer is available" - exit 1 -fi - -# Detect if we're running under SLURM -if [[ -n "${SLURM_JOB_ID}" ]]; then - # Slurm users will probably need to customise this - # shellcheck disable=SC1091 - source "${HOME}"/venvs/life/bin/activate - cd "${HOME}"/dev/life - PROCESS_COUNT="${SLURM_JOB_CPUS_PER_NODE}" -else - PROCESS_COUNT=$(nproc --all) -fi - -if [ -z "${DATADIR}" ]; then - echo "Please specify $DATADIR" - exit 1 -fi - -if [ -z "${VIRTUAL_ENV}" ]; then - echo "Please specify run in a virtualenv" - exit 1 -fi - -if [ ! -f "${DATADIR}"/100m/jung_current/.sentinel ]; then - if [ ! -f "${DATADIR}"/habitat/jung_l2_raw.tif ]; then - reclaimer zenodo --zenodo_id 4058819 \ - --filename iucn_habitatclassification_composite_lvl2_ver004.zip \ - --extract \ - --output "${DATADIR}"/habitat/jung_l2_raw.tif - fi - - if [ ! -d "${DATADIR}"/habitat/lvl2_changemasks_ver004 ]; then - reclaimer zenodo --zenodo_id 4058819 \ - --filename lvl2_changemasks_ver004.zip \ - --extract \ - --output "${DATADIR}"/habitat/ - fi - - if [ ! -f "${DATADIR}"/100m/jung_current/.sentinel ]; then - python3 ./prepare_layers/make_current_map.py --jung_l2 "${DATADIR}"/habitat/jung_l2_raw.tif \ - --update_masks "${DATADIR}"/habitat/lvl2_changemasks_ver004 \ - --crosswalk "${DATADIR}"/crosswalk.csv \ - --output "${DATADIR}"/100m/jung_current \ - --sentinel "${DATADIR}"/100m/jung_current/.sentinel \ - -j "${PROCESS_COUNT}" - fi -fi - -if [ ! -f "${DATADIR}"/habitat/pnv_raw.tif ]; then - reclaimer zenodo --zenodo_id 4038749 \ - --filename pnv_lvl1_004.zip \ - --extract \ - --output "${DATADIR}"/habitat/pnv_raw.tif -fi - -if [ ! -f "${DATADIR}"/habitat/pnv_100m.tif ]; then - # In theory we don't need to do this, as Yirgacheffe can rescale dynamically - # but in practice it's faster if we just do this once like this, at the cost - # of some extra storage requirements - gdal -tr 0.000898315284120 -0.000898315284120 \ - -r near \ - -tap \ - -multi -wo NUM_THREADS=ALL_CPUS \ - -co COMPRESS=LZW -co NUM_THREADS=ALL_CPUS \ - "${DATADIR}"/habitat/pnv_raw.tif - "${DATADIR}"/habitat/pnv_100m.tif -fi - -# Get GAEZ data -if [ ! -f "${DATADIR}"/habitat/GLCSv11_02_5m.tif ]; then - if [ ! -f "${DATADIR}"/habitat/LR.zip ]; then - curl -o "${DATADIR}"/habitat/LR.zip https://s3.eu-west-1.amazonaws.com/data.gaezdev.aws.fao.org/LR.zip - fi - unzip -j "${DATADIR}"/habitat/LR.zip LR/lco/GLCSv11_02_5m.tif -d "${DATADIR}"/habitat/ -fi - -# Get HYDE 3.2 data -if [ ! -f "${DATADIR}"/habitat/modified_grazing2017AD.asc ]; then - if [ ! -f "${DATADIR}"/habitat/baseline.zip ]; then - curl -o "${DATADIR}"/habitat/baseline.zip "https://geo.public.data.uu.nl/vault-hyde/HYDE%203.2%5B1710494848%5D/original/baseline.zip" - fi - if [ ! -f "${DATADIR}"/habitat/2017AD_lu.zip ]; then - unzip -j "${DATADIR}"/habitat/baseline.zip baseline/zip/2017AD_lu.zip -d "${DATADIR}"/habitat/ - fi - if [ ! -f "${DATADIR}"/habitat/grazing2017AD.asc ]; then - unzip -j "${DATADIR}"/habitat/2017AD_lu.zip grazing2017AD.asc -d "${DATADIR}"/habitat/ - fi - # The pixel scale in the two files have different rounding despite covering the same area - # and so this makes them align. - sed "s/0.0833333/0.08333333333333333/" "${DATADIR}"/habitat/grazing2017AD.asc > "${DATADIR}"/habitat/modified_grazing2017AD.asc -fi - -if [ ! -f "${DATADIR}"/habitat/modified_grazing2017AD.prj ]; then - echo 'GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]' > "${DATADIR}"/habitat/modified_grazing2017AD.prj -fi - -if [ ! -f "${DATADIR}"/habitat/crop.tif ] || [ ! -f "${DATADIR}"/habitat/pasture.tif ]; then - python3 ./prepare_layers/build_gaez_hyde.py --gaez "${DATADIR}"/habitat/GLCSv11_02_5m.tif \ - --hyde "${DATADIR}"/habitat/modified_grazing2017AD.asc \ - --output "${DATADIR}"/habitat/ -fi - -if [ ! -f "${DATADIR}"/habitat/current_raw.tif ]; then - python3 ./prepare_layers/make_food_current_map.py --current_lvl1 "${DATADIR}"/100m/jung_current \ - --pnv "${DATADIR}"/habitat/pnv_100m.tif \ - --crop "${DATADIR}"/habitat/crop.tif \ - --pasture "${DATADIR}"/habitat/pasture.tif \ - --output "${DATADIR}"/100m/current -fi diff --git a/scripts/run.sh b/scripts/run.sh deleted file mode 100755 index 88a9fcb..0000000 --- a/scripts/run.sh +++ /dev/null @@ -1,355 +0,0 @@ -#!/bin/bash -# -# Assumes you've set up a python virtual environement in the current directory. -# -# In addition to the Python environemnt, you will need the following extra command line tools: -# -# https://github.com/quantifyearth/reclaimer - used to download inputs from Zenodo directly -# https://github.com/quantifyearth/littlejohn - used to run batch jobs in parallel - -# Set shell script to exit on first error (-e) and to output commands being run to make -# reviewing logs easier (-x) -set -e -set -x - -# We know we use two Go tools, so add go/bin to our path as in slurm world they're likely -# to be installed locally -export PATH="${PATH}":"${HOME}"/go/bin -if ! hash littlejohn 2>/dev/null; then - echo "Please ensure littlejohn is available" - exit 1 -fi -if ! hash reclaimer 2>/dev/null; then - echo "Please ensure reclaimer is available" - exit 1 -fi - -# Detect if we're running under SLURM -if [[ -n "${SLURM_JOB_ID}" ]]; then - # Slurm users will probably need to customise this - # shellcheck disable=SC1091 - source "${HOME}"/venvs/life/bin/activate - cd "${HOME}"/dev/life - PROCESS_COUNT="${SLURM_JOB_CPUS_PER_NODE}" -else - PROCESS_COUNT=$(nproc --all) -fi - -if [ -z "${DATADIR}" ]; then - echo "Please specify $DATADIR" - exit 1 -fi - -if [ -z "${VIRTUAL_ENV}" ]; then - echo "Please specify run in a virtualenv" - exit 1 -fi - -# Experiment configuration. You can override these variables in the environment and it will use those -# values rather than the defaults specified here. -# shellcheck disable=SC2206 -declare -a SCENARIOS=(${SCENARIOS:-"arable" "restore" "restore_all" "urban" "pasture" "restore_agriculture"}) -# shellcheck disable=SC2206 -declare -a TAXAS=(${TAXAS:-"AMPHIBIA" "AVES" "MAMMALIA" "REPTILIA"}) -export CURVE=${CURVE:0.25} -export PIXEL_SCALE=${PIXEL_SCALE:-0.016666666666667} - -check_scenario() { - local target="$1" - for scenario in "${SCENARIOS[@]}"; do - if [[ "$scenario" == "$target" ]]; then - return 0 - fi - done - return 1 -} - -if [ ! -f "${DATADIR}"/crosswalk.csv ]; then - python3 ./prepare_layers/generate_crosswalk.py --output "${DATADIR}"/crosswalk.csv -fi - -# Get habitat layer and prepare for use -if [ ! -f "${DATADIR}"/5_arc_seconds/current/.sentinel ]; then - if [ ! -f "${DATADIR}"/habitat/jung_l2_raw.tif ]; then - reclaimer zenodo --zenodo_id 4058819 \ - --filename iucn_habitatclassification_composite_lvl2_ver004.zip \ - --extract \ - --output "${DATADIR}"/habitat/jung_l2_raw.tif - fi - - if [ ! -d "${DATADIR}"/habitat/lvl2_changemasks_ver004 ]; then - reclaimer zenodo --zenodo_id 4058819 \ - --filename lvl2_changemasks_ver004.zip \ - --extract \ - --output "${DATADIR}"/habitat/ - fi - - if [ ! -f "${DATADIR}"/100m/current/.sentinel ]; then - python3 ./prepare_layers/make_current_map.py --jung_l2 "${DATADIR}"/habitat/jung_l2_raw.tif \ - --update_masks "${DATADIR}"/habitat/lvl2_changemasks_ver004 \ - --crosswalk "${DATADIR}"/crosswalk.csv \ - --output "${DATADIR}"/100m/current \ - --sentinel "${DATADIR}"/100m/current/.sentinel \ - -j 16 - fi - - if [ ! -f "${DATADIR}"/5_arc_seconds/current/.sentinel ]; then - mkdir -p "${DATADIR}"/5_arc_seconds/current - for d in "${DATADIR}"/100m/current/*.tif; do - basename=$(basename "${d}") - gdalwarp -t_srs EPSG:4326 \ - -tr 0.016666666666667 -0.016666666666667 \ - -r average \ - -multi \ - -co COMPRESS=LZW \ - -co NUM_THREADS=16 \ - -wo NUM_THREADS=16 \ - "${d}" \ - "${DATADIR}"/5_arc_seconds/current/"${basename}" - done - touch "${DATADIR}"/5_arc_seconds/current/.sentinel - fi -fi - -# Get PNV layer and prepare for use -if [ ! -f "${DATADIR}"/habitat/pnv_raw.tif ]; then - reclaimer zenodo --zenodo_id 4038749 \ - --filename pnv_lvl1_004.zip \ - --extract \ - --output "${DATADIR}"/habitat/pnv_raw.tif -fi - -if [ ! -d "${DATADIR}"/5_arc_seconds/pnv ]; then - aoh-habitat-process --habitat "${DATADIR}"/habitat/pnv_raw.tif \ - --scale "${PIXEL_SCALE}" \ - --output "${DATADIR}"/5_arc_seconds/pnv/ -fi - -# Generate the arable scenario map -if check_scenario "arable"; then - if [ ! -d "${DATADIR}"/habitat/arable ]; then - python3 ./prepare_layers/make_arable_map.py --current "${DATADIR}"/100m/current \ - --output "${DATADIR}"/100m/arable - fi - - if [ ! -f "${DATADIR}"/5_arc_seconds/arable/.sentinel ]; then - mkdir -p "${DATADIR}"/5_arc_seconds/arable - for d in "${DATADIR}"/100m/arable/*.tif; do - basename=$(basename "${d}") - gdalwarp -t_srs EPSG:4326 \ - -tr 0.016666666666667 -0.016666666666667 \ - -r average \ - -multi \ - -co COMPRESS=LZW \ - -co NUM_THREADS=16 \ - -wo NUM_THREADS=16 \ - "${d}" \ - "${DATADIR}"/5_arc_seconds/arable/"${basename}" - done - touch "${DATADIR}"/5_arc_seconds/arable/.sentinel - fi - - if [ ! -f "${DATADIR}"/habitat/arable_diff_area.tif ]; then - python3 ./prepare_layers/make_diff_map_2.py --current "${DATADIR}"/5_arc_seconds/current \ - --scenario "${DATADIR}"/5_arc_seconds/arable \ - --output "${DATADIR}"/habitat/arable_diff_area.tif - fi -fi - -# Generate the pasture scenario map -# if check_scenario "pasture"; then -# if [ ! -f "${DATADIR}"/habitat/pasture.tif ]; then -# python3 ./prepare_layers/make_pasture_map.py --current "${DATADIR}"/habitat/current_raw.tif \ -# --output "${DATADIR}"/habitat/pasture.tif -# fi -# -# if [ ! -d "${DATADIR}"/habitat_maps/pasture ]; then -# aoh-habitat-process --habitat "${DATADIR}"/habitat/pasture.tif \ -# --scale "${PIXEL_SCALE}" \ -# --output "${DATADIR}"/habitat_maps/pasture/ -# fi -# -# if [ ! -f "${DATADIR}"/habitat/pasture_diff_area.tif ]; then -# python3 ./prepare_layers/make_diff_map.py --current "${DATADIR}"/habitat/current_raw.tif \ -# --scenario "${DATADIR}"/habitat/pasture.tif \ -# --scale "${PIXEL_SCALE}" \ -# --output "${DATADIR}"/habitat/pasture_diff_area.tif -# fi -# fi - -# Generate the restore map -if check_scenario "restore"; then - if [ ! -d "${DATADIR}"/habitat/restore ]; then - python3 ./prepare_layers/make_restore_map.py --pnv "${DATADIR}"/habitat/pnv_raw.tif \ - --current "${DATADIR}"/100m/current \ - --crosswalk "${DATADIR}"/crosswalk.csv \ - --output "${DATADIR}"/100m/restore - fi - - if [ ! -f "${DATADIR}"/5_arc_seconds/restore/.sentinel ]; then - mkdir -p "${DATADIR}"/5_arc_seconds/restore - for d in "${DATADIR}"/100m/restore/*.tif; do - basename=$(basename "${d}") - gdalwarp -t_srs EPSG:4326 \ - -tr 0.016666666666667 -0.016666666666667 \ - -r average \ - -multi \ - -co COMPRESS=LZW \ - -co NUM_THREADS=16 \ - -wo NUM_THREADS=16 \ - "${d}" \ - "${DATADIR}"/5_arc_seconds/restore/"${basename}" - done - touch "${DATADIR}"/5_arc_seconds/restore/.sentinel - fi - - if [ ! -f "${DATADIR}"/100m/restore_diff_area.tif ]; then - python3 ./prepare_layers/make_diff_map.py --current "${DATADIR}"/100m/current \ - --scenario "${DATADIR}"/100m/restore \ - --output "${DATADIR}"/100m/restore_diff_area.tif - fi - - if [ ! -f "${DATADIR}"/5_arc_seconds/restore_diff_area.tif ]; then - # In theory we should just use gdalwarp here, but it kept running out of - # memory, so we use our own Yirgacheffe version that just does the same thing in small - # chunks. - fi -fi -# -# # Generate the restore_agriculture map -# if check_scenario "restore_agriculture"; then -# if [ ! -f "${DATADIR}"/habitat/restore_agriculture.tif ]; then -# python3 ./prepare_layers/make_restore_agriculture_map.py --pnv "${DATADIR}"/habitat/pnv_raw.tif \ -# --current "${DATADIR}"/habitat/current_raw.tif \ -# --crosswalk "${DATADIR}"/crosswalk.csv \ -# --output "${DATADIR}"/habitat/restore_agriculture.tif -# fi -# -# if [ ! -d "${DATADIR}"/habitat_maps/restore_agriculture ]; then -# aoh-habitat-process --habitat "${DATADIR}"/habitat/restore_agriculture.tif \ -# --scale "${PIXEL_SCALE}" \ -# --output "${DATADIR}"/habitat_maps/restore_agriculture/ -# fi -# -# if [ ! -f "${DATADIR}"/habitat/restore_agriculture_diff_area.tif ]; then -# python3 ./prepare_layers/make_diff_map.py --current "${DATADIR}"/habitat/current_raw.tif \ -# --scenario "${DATADIR}"/habitat/restore_agriculture.tif \ -# --scale "${PIXEL_SCALE}" \ -# --output "${DATADIR}"/habitat/restore_agriculture_diff_area.tif -# fi -# fi -# -# # Generate the restore all map -# if check_scenario "restore_all"; then -# if [ ! -f "${DATADIR}"/habitat/restore_all.tif ]; then -# python3 ./prepare_layers/make_restore_all_map.py --pnv "${DATADIR}"/habitat/pnv_raw.tif \ -# --current "${DATADIR}"/habitat/current_raw.tif \ -# --crosswalk "${DATADIR}"/crosswalk.csv \ -# --output "${DATADIR}"/habitat/restore_all.tif -# fi -# -# if [ ! -d "${DATADIR}"/habitat_maps/restore_all ]; then -# aoh-habitat-process --habitat "${DATADIR}"/habitat/restore_all.tif \ -# --scale "${PIXEL_SCALE}" \ -# --output "${DATADIR}"/habitat_maps/restore_all/ -# fi -# -# if [ ! -f "${DATADIR}"/habitat/restore_all_diff_area.tif ]; then -# python3 ./prepare_layers/make_diff_map.py --current "${DATADIR}"/habitat/current_raw.tif \ -# --scenario "${DATADIR}"/habitat/restore_all.tif \ -# --scale "${PIXEL_SCALE}" \ -# --output "${DATADIR}"/habitat/restore_all_diff_area.tif -# fi -# fi -# -# # Generate urban all map -# if check_scenario "urban"; then -# if [ ! -d "${DATADIR}"/habitat_maps/urban ]; then -# python3 ./prepare_layers/make_constant_habitat.py --examplar "${DATADIR}"/habitat_maps/arable/lcc_1401.tif \ -# --habitat_code 14.5 \ -# --crosswalk "${DATADIR}"/crosswalk.csv \ -# --output "${DATADIR}"/habitat_maps/urban -# fi -# -# if [ ! -f "${DATADIR}"/habitat/urban_diff_area.tif ]; then -# python3 ./prepare_layers/make_constant_diff_map.py --current "${DATADIR}"/habitat/current_raw.tif \ -# --habitat_code 14.5 \ -# --crosswalk "${DATADIR}"/crosswalk.csv \ -# --scale "${PIXEL_SCALE}" \ -# --output "${DATADIR}"/habitat/urban_diff_area.tif -# fi -# fi - -# Fetch and prepare the elevation layers -if [ ! -f "${DATADIR}"/elevation.tif ]; then - reclaimer zenodo --zenodo_id 5719984 --filename dem-100m-esri54017.tif --output "${DATADIR}"/elevation.tif -fi -if [ ! -f "${DATADIR}"/elevation-max.tif ]; then - gdalwarp -t_srs EPSG:4326 -tr "${PIXEL_SCALE}" -"${PIXEL_SCALE}" -r max -co COMPRESS=LZW -wo NUM_THREADS=40 "${DATADIR}"/elevation.tif "${DATADIR}"/elevation-max.tif -fi -if [ ! -f "${DATADIR}"/elevation-min.tif ]; then - gdalwarp -t_srs EPSG:4326 -tr "${PIXEL_SCALE}" -"${PIXEL_SCALE}" -r min -co COMPRESS=LZW -wo NUM_THREADS=40 "${DATADIR}"/elevation.tif "${DATADIR}"/elevation-min.tif -fi - -# Get species data per taxa from IUCN data -for TAXA in "${TAXAS[@]}" -do - if [ ! -d "${DATADIR}"/species-info/"${TAXA}"/ ]; then - if [ -f "${DATADIR}"/overrides.csv ]; then - python3 ./prepare_species/extract_species_psql.py --class "${TAXA}" \ - --output "${DATADIR}"/species-info/"${TAXA}"/ \ - --projection "EPSG:4326" \ - --overrides "${DATADIR}"/overrides.csv - else - python3 ./prepare_species/extract_species_psql.py --class "${TAXA}" \ - --output "${DATADIR}"/species-info/"${TAXA}"/ \ - --projection "EPSG:4326" - fi - fi -done - -# Generate the batch job input CSVs -python3 ./utils/speciesgenerator.py --datadir "${DATADIR}" \ - --output "${DATADIR}"/aohbatch.csv \ - --scenarios "${SCENARIOS[@]}" -python3 ./utils/persistencegenerator.py --datadir "${DATADIR}" \ - --curve "${CURVE}" \ - --output "${DATADIR}"/persistencebatch.csv \ - --scenarios "${SCENARIOS[@]}" - -# Calculate all the AoHs -littlejohn -j "${PROCESS_COUNT}" -o "${DATADIR}"/aohbatch.log -c "${DATADIR}"/aohbatch.csv "${VIRTUAL_ENV}"/bin/aoh-calc -- --force-habitat --pixel-area - -# Generate validation summaries -aoh-collate-data --aoh_results "${DATADIR}"/aohs/current/ --output "${DATADIR}"/aohs/current.csv -aoh-collate-data --aoh_results "${DATADIR}"/aohs/pnv/ --output "${DATADIR}"/aohs/pnv.csv -for SCENARIO in "${SCENARIOS[@]}" -do - aoh-collate-data --aoh_results "${DATADIR}"/aohs/"${SCENARIO}"/ --output "${DATADIR}"/aohs/"${SCENARIO}".csv -done - -# Calculate predictors from AoHs -aoh-species-richness --aohs_folder "${DATADIR}"/aohs/current/ \ - --output "${DATADIR}"/predictors/species_richness.tif -aoh-endemism --aohs_folder "${DATADIR}"/aohs/current/ \ - --species_richness "${DATADIR}"/predictors/species_richness.tif \ - --output "${DATADIR}"/predictors/endemism.tif - -# Calculate the per species Delta P values -littlejohn -j "${PROCESS_COUNT}" -o "${DATADIR}"/persistencebatch.log -c "${DATADIR}"/persistencebatch.csv "${VIRTUAL_ENV}"/bin/python3 -- ./deltap/global_code_residents_pixel.py - -for SCENARIO in "${SCENARIOS[@]}" -do - for TAXA in "${TAXAS[@]}" - do - python3 ./utils/raster_sum.py --rasters_directory "${DATADIR}"/deltap/"${SCENARIO}"/"${CURVE}"/"${TAXA}"/ --output "${DATADIR}"/deltap_sum/"${SCENARIO}"/"${CURVE}"/"${TAXA}".tif - done - - python3 ./utils/species_totals.py --deltaps "${DATADIR}"/deltap/"${SCENARIO}"/"${CURVE}"/ --output "${DATADIR}"/deltap/"${SCENARIO}"/"${CURVE}"/totals.csv - - # Generate final map - python3 ./deltap/delta_p_scaled.py --input "${DATADIR}"/deltap_sum/"${SCENARIO}"/"${CURVE}"/ \ - --diffmap "${DATADIR}"/habitat/"${SCENARIO}"_diff_area.tif \ - --totals "${DATADIR}"/deltap/"${SCENARIO}"/"${CURVE}"/totals.csv \ - --output "${DATADIR}"/deltap_final/scaled_"${SCENARIO}"_"${CURVE}".tif -done From 64361652b0950a948f93cf936e743fe33ecc05e6 Mon Sep 17 00:00:00 2001 From: Michael Dales Date: Wed, 29 Apr 2026 07:36:07 +0100 Subject: [PATCH 21/27] Improve handling of preserve codes --- prepare_layers/make_food_current_map.py | 80 +++++++++++++---------- tests/test_food_layer.py | 86 ++++++++++++++++++++++++- 2 files changed, 130 insertions(+), 36 deletions(-) diff --git a/prepare_layers/make_food_current_map.py b/prepare_layers/make_food_current_map.py index 8475002..5d6ca36 100644 --- a/prepare_layers/make_food_current_map.py +++ b/prepare_layers/make_food_current_map.py @@ -41,9 +41,6 @@ def balance_crop_and_pasture_differences( ) -> tuple[float,float]: """ If we remove one type of agricultural land but expand another, keep them in the same area where possible. - - One thing we know is that reductions are always achievable, as the difference is generated as - (GAEZ and HYDE - Jung), so if we have a negative value the initial state is enough to cover that. """ # Either both are a reduction or both an increase, or at least one is null, so no # balancing required. @@ -139,10 +136,16 @@ def add_land_cover( per_cell_addition = min(per_cell_addition, 1.0) agri_raster[eligible_mask] += per_cell_addition + # Remove from the other land cover classes. This assumes that coming into this + # stage the LCC pixels are: + # * only non-zero in a single layer + # * only starting at 100% for lcc, lcc_data in lcc_data_map.items(): - if lcc in [CROP_CODE, PASTURE_CODE]: + if lcc in [CROP_CODE, PASTURE_CODE] + PRESERVE_CODES: continue lcc_data[eligible_mask & (lcc_data > 0)] -= per_cell_addition + lcc_data[eligible_mask] = np.maximum(lcc_data[eligible_mask], 0.0) + def process_tile( current_maps: dict[int,yg.YirgacheffeLayer], @@ -182,44 +185,53 @@ def process_tile( lcc_data_map, ) - # Order the changes by removals first then additions. We should only generate the mask - # of eligible cells after we've done removals. + # We first do all the removals and then the additions diffs = [ (crop_diff, CROP_CODE), (pasture_diff, PASTURE_CODE), ] - diffs.sort(key=lambda a: a[0]) - - eligible_mask = None - - pnv_data = None - for diff_value, habitat_code in diffs: - if diff_value == 0 or np.isnan(diff_value): - continue + removals = [x for x in diffs if x[0] < 0] + additions = [x for x in diffs if x[0] > 0] + + pnv_data = None # lazy PNV load as it's expensive + for diff_value, habitat_code in removals: + if pnv_data is None: + pnv_data = pnv.read_array(tile.x_position, tile.y_position, tile.width, tile.height) + remove_land_cover(habitat_code, diff_value, pnv_data, lcc_data_map) + + # If there's no additions we don't need to make the eligible_mask, and we can go + # home early. + if not additions: + return lcc_data_map - if diff_value < 0: - if pnv_data is None: - pnv_data = pnv.read_array(tile.x_position, tile.y_position, tile.width, tile.height) - remove_land_cover(habitat_code, diff_value, pnv_data, lcc_data_map) - else: - if eligible_mask is None: - # This mask has to be generated after we've done all removals, before - # we start doing additions, so relies on the sort we do above. - # - # Find areas we can put the new data. This is anywhere we don't already - # have agricultural land, and other places unlikely to be converted (cities, lakes, etc.) - # We know that there should be no partial cells involving crop/pasture at this stage - # because of the balancing we did initially. - excluded_codes = [CROP_CODE, PASTURE_CODE] + PRESERVE_CODES - eligible_mask = np.ones_like(lcc_data_map[CROP_CODE], dtype=bool) - for excluded_code in excluded_codes: - if excluded_code in lcc_data_map: - eligible_mask &= (lcc_data_map[excluded_code] == 0) - - add_land_cover(eligible_mask, habitat_code, diff_value, lcc_data_map) + # Find areas we can put the new data. This is anywhere we don't already + # have agricultural land, and other places unlikely to be converted (cities, lakes, etc.) + # We know that there should be no partial cells involving crop/pasture at this stage + # because of the balancing we did initially. + excluded_codes = [CROP_CODE, PASTURE_CODE] + PRESERVE_CODES + eligible_mask = np.ones_like(lcc_data_map[CROP_CODE], dtype=bool) + for excluded_code in excluded_codes: + if excluded_code in lcc_data_map: + eligible_mask &= (lcc_data_map[excluded_code] == 0) + + # There is a risk that the total is not achievable as there is a disagreement between the combined + # GAEZ/HYDE, Jung, and our PRESERVE_CODES list (that say don't covert urban or rocky land to farmland). + # As such we need to adjust for what is actually achievable before we make changes. + available_cells = eligible_mask.sum() / eligible_mask.size + total_desired_change = sum(x[0] for x in additions) + if total_desired_change > available_cells: + # Split what is actually possible proportionally to the original demand + additions = [ + ((change / total_desired_change) * available_cells, klass) + for (change, klass) in additions + ] + + for diff_value, habitat_code in additions: + add_land_cover(eligible_mask, habitat_code, diff_value, lcc_data_map) return lcc_data_map + def process_tile_concurrently( current_lvl1_path: Path, pnv_path: Path, diff --git a/tests/test_food_layer.py b/tests/test_food_layer.py index 7b65266..7d3c870 100644 --- a/tests/test_food_layer.py +++ b/tests/test_food_layer.py @@ -6,7 +6,7 @@ import yirgacheffe as yg from prepare_layers.make_food_current_map import balance_crop_and_pasture_differences, \ - CROP_CODE, PASTURE_CODE, remove_land_cover, add_land_cover, TileInfo, process_tile + CROP_CODE, PASTURE_CODE, remove_land_cover, add_land_cover, TileInfo, process_tile, PRESERVE_CODES @pytest.mark.parametrize( [ @@ -320,9 +320,91 @@ def test_process_tile(crop_diff: float, pasture_diff: float, expected_totals: di updated_maps = process_tile(lcc_maps, pnv_map, tile) for key, expected_value in expected_totals.items(): - print(key, np.sum(updated_maps[key])) + layer = updated_maps[key] + layer_total = np.sum(layer) + assert layer_total >= 0 + assert math.isclose(layer_total, expected_value, rel_tol=0.000001), f"Failed for {key}" + + +@pytest.mark.parametrize(["crop_diff", "pasture_diff", "expected_totals"], [ + P(0.25, 0.25, {1: 25, CROP_CODE: 25, PASTURE_CODE: 25}, id="no change"), + P(0.0, 0.0, {1: 75, CROP_CODE: 0, PASTURE_CODE: 0}, id="remove both"), + P(0.25, 0.0, {1: 50, CROP_CODE: 25, PASTURE_CODE: 0}, id="remove pasture, leave crop"), + P(0.0, 0.25, {1: 50, CROP_CODE: 0, PASTURE_CODE: 25}, id="remove crop, leave pasture"), + P(0.5, 0.25, {1: 0, CROP_CODE: 50, PASTURE_CODE: 25}, id="add crop, leave pasture"), + P(0.25, 0.5, {1: 0, CROP_CODE: 25, PASTURE_CODE: 50}, id="leave crop, add pasture"), + P(0.5, 0.2, {1: 5, CROP_CODE: 50, PASTURE_CODE: 20}, id="add crop, remove pasture, more add than remove"), + P(0.2, 0.5, {1: 5, CROP_CODE: 20, PASTURE_CODE: 50}, id="remove crop, add pasture, more add than remove"), + P(0.3, 0.1, {1: 35, CROP_CODE: 30, PASTURE_CODE: 10}, id="add crop, remove pasture, more remove than add"), + P(0.1, 0.3, {1: 35, CROP_CODE: 10, PASTURE_CODE: 30}, id="remove crop, add pasture, more remove than add"), + P(0.3, 0.3, {1: 15, CROP_CODE: 30, PASTURE_CODE: 30}, id="all both, but not total"), + P(0.5, 0.5, {1: 0, CROP_CODE: 37.5, PASTURE_CODE: 37.5}, id="all both (unobtainable)"), + P(0.0, 0.5, {1: 25, CROP_CODE: 0, PASTURE_CODE: 50}, id="replace crop with pasture"), + P(0.5, 0.0, {1: 25, CROP_CODE: 50, PASTURE_CODE: 0}, id="replace pasture with crop"), + P(0.0, 1.0, {1: 0, CROP_CODE: 0, PASTURE_CODE: 75}, id="All pasture, all the time"), + P(1.0, 0.0, {1: 0, CROP_CODE: 75, PASTURE_CODE: 0}, id="All crop, all the time"), + P(0.6, 0.2, {1: 0, CROP_CODE: 55, PASTURE_CODE: 20}, id="Less pasture, but too much crop demand"), + P(0.2, 0.6, {1: 0, CROP_CODE: 20, PASTURE_CODE: 55}, id="Less crop, but too much pasture demand"), +]) +def test_process_tile_with_preserve_codes( + crop_diff: float, + pasture_diff: float, + expected_totals: dict[int, float] +) -> None: + # One quarter neither, one quarter crop, one quarter pasture, one quarter preserved + data = np.ones((5, 5)) + raw_lcc_data_map = { + 1: np.pad(data, ((0, 5), (0, 5))), + CROP_CODE: np.pad(data, ((0, 5), (5, 0))), + PASTURE_CODE: np.pad(data, ((5, 0), (0, 5))), + PRESERVE_CODES[0]: np.pad(data, ((5, 0), (5, 0))), + } + projection = yg.MapProjection("epsg:4326", 1.0, -1.0) + lcc_maps = {lcc: yg.from_array(x, (0, 0), projection) for lcc, x in raw_lcc_data_map.items()} + + # The pnv is just class 1, so that is always the category that will increase + pnv_data = np.ones((10, 10)) # All class 1 + pnv_map = yg.from_array(pnv_data, (0, 0), projection) + + tile = TileInfo( + x_position=0, + y_position=0, + width=10, + height=10, + crop_target=crop_diff, + pasture_target=pasture_diff, + ) + + updated_maps = process_tile(lcc_maps, pnv_map, tile) + + # This quarter of the tile should never be modified + expected_totals[PRESERVE_CODES[0]] = 25 for key, expected_value in expected_totals.items(): layer = updated_maps[key] layer_total = np.sum(layer) + assert layer_total >= 0 assert math.isclose(layer_total, expected_value, rel_tol=0.000001), f"Failed for {key}" + + +def test_ensure_no_negative_values() -> None: + # This test case is something that was seen in real world use: although the math + # generally should prevent us going negative, it does happen due to rounding. This + # test case tries to force a situation where we'd get a negative value, and we + # want to ensure it's clipped to zero instead. + shape = (4,) + eligible_mask = np.array([True, True, False, False]) + + crop = np.zeros(shape) + other = np.array([0.05, 0.05, 0.8, 0.8]) + + lcc_data_map = { + CROP_CODE: crop, + 1: other, + } + + add_land_cover(eligible_mask, CROP_CODE, 0.1, lcc_data_map) + + for lcc, lcc_data in lcc_data_map.items(): + print(lcc_data) + assert (lcc_data >= 0).all() From 14ef0064dd3ee7e7b8fe76bea0dd8266b4453790 Mon Sep 17 00:00:00 2001 From: Michael Dales Date: Wed, 29 Apr 2026 08:58:53 +0100 Subject: [PATCH 22/27] Make the add_land_cover flow a little more obvious --- prepare_layers/make_food_current_map.py | 30 ++++++++++++------------- tests/test_food_layer.py | 11 ++++----- 2 files changed, 19 insertions(+), 22 deletions(-) diff --git a/prepare_layers/make_food_current_map.py b/prepare_layers/make_food_current_map.py index 5d6ca36..483476a 100644 --- a/prepare_layers/make_food_current_map.py +++ b/prepare_layers/make_food_current_map.py @@ -113,29 +113,31 @@ def remove_land_cover( def add_land_cover( eligible_mask: np.ndarray, - lcc_code: int, - diff: float, + diffs: list[tuple[float, int]], lcc_data_map: dict[int,np.ndarray], ) -> None: - assert diff >= 0 - - agri_raster = lcc_data_map[lcc_code] + # Calculate capacity eligible_count = eligible_mask.sum() if eligible_count == 0: return - - # Calculate capacity total_cells = eligible_mask.size eligible_fraction = eligible_count / total_cells - if eligible_fraction == 0: return - per_cell_addition = diff / eligible_fraction - per_cell_addition = min(per_cell_addition, 1.0) + total_addition = 0 + for diff, lcc_code in diffs: + assert 0 <= diff <= 1 + + agri_raster = lcc_data_map[lcc_code] + + per_cell_addition = diff / eligible_fraction + per_cell_addition = min(per_cell_addition, 1.0) + total_addition += per_cell_addition + + agri_raster[eligible_mask] += per_cell_addition - agri_raster[eligible_mask] += per_cell_addition # Remove from the other land cover classes. This assumes that coming into this # stage the LCC pixels are: # * only non-zero in a single layer @@ -143,10 +145,9 @@ def add_land_cover( for lcc, lcc_data in lcc_data_map.items(): if lcc in [CROP_CODE, PASTURE_CODE] + PRESERVE_CODES: continue - lcc_data[eligible_mask & (lcc_data > 0)] -= per_cell_addition + lcc_data[eligible_mask & (lcc_data > 0)] -= total_addition lcc_data[eligible_mask] = np.maximum(lcc_data[eligible_mask], 0.0) - def process_tile( current_maps: dict[int,yg.YirgacheffeLayer], pnv: yg.YirgacheffeLayer, @@ -226,8 +227,7 @@ def process_tile( for (change, klass) in additions ] - for diff_value, habitat_code in additions: - add_land_cover(eligible_mask, habitat_code, diff_value, lcc_data_map) + add_land_cover(eligible_mask, additions, lcc_data_map) return lcc_data_map diff --git a/tests/test_food_layer.py b/tests/test_food_layer.py index 7d3c870..42a4c97 100644 --- a/tests/test_food_layer.py +++ b/tests/test_food_layer.py @@ -231,8 +231,7 @@ def test_add_land_simple( add_land_cover( np.ones((10, 10), dtype=bool), - CROP_CODE, - crop_diff, + [(crop_diff, CROP_CODE)], lcc_data_map, ) @@ -260,8 +259,7 @@ def test_add_land_avoid_excluded( add_land_cover( np.array([[(i + 1) % 2] * 10 for i in range(10)]).astype(bool), - CROP_CODE, - crop_diff, + [(crop_diff, CROP_CODE)], lcc_data_map, ) @@ -403,8 +401,7 @@ def test_ensure_no_negative_values() -> None: 1: other, } - add_land_cover(eligible_mask, CROP_CODE, 0.1, lcc_data_map) + add_land_cover(eligible_mask, [(0.1, CROP_CODE)], lcc_data_map) - for lcc, lcc_data in lcc_data_map.items(): - print(lcc_data) + for lcc_data in lcc_data_map.values(): assert (lcc_data >= 0).all() From dac39011e36e2e2d23ba8d13777901936fda919c Mon Sep 17 00:00:00 2001 From: Michael Dales Date: Thu, 30 Apr 2026 09:23:52 +0100 Subject: [PATCH 23/27] Simplify snakemake rules --- workflow/rules/aoh.smk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflow/rules/aoh.smk b/workflow/rules/aoh.smk index ce7f99c..72a8951 100644 --- a/workflow/rules/aoh.smk +++ b/workflow/rules/aoh.smk @@ -112,7 +112,7 @@ For pnv AOHs: uses historic/ era species data. # ============================================================================= -checkpoint aggregate_aohs_per_taxa: +rule aggregate_aohs_per_taxa: """ Checkpoint that ensures all AOHs for a taxa/scenario are generated. Creates a sentinel file when complete. From c5e0eca8f78370f7d14400608e2e92a6a51e400f Mon Sep 17 00:00:00 2001 From: Michael Dales Date: Fri, 15 May 2026 09:16:26 +0100 Subject: [PATCH 24/27] Remember to tap when warping --- workflow/rules/habitat.smk | 1 + workflow/rules/prepare.smk | 2 ++ workflow/rules/scenario.smk | 2 ++ 3 files changed, 5 insertions(+) diff --git a/workflow/rules/habitat.smk b/workflow/rules/habitat.smk index d01540a..c30c77c 100644 --- a/workflow/rules/habitat.smk +++ b/workflow/rules/habitat.smk @@ -285,6 +285,7 @@ PRECIOUS: Only rebuilds if the sentinel is explicitly deleted. -t_srs EPSG:4326 \ -tr {params.pixel_scale} -{params.pixel_scale} \ -r average \ + -tap \ -multi \ -co COMPRESS=LZW \ -co NUM_THREADS={threads} \ diff --git a/workflow/rules/prepare.smk b/workflow/rules/prepare.smk index e69be79..ec186a9 100644 --- a/workflow/rules/prepare.smk +++ b/workflow/rules/prepare.smk @@ -149,6 +149,7 @@ Precious: only runs if output doesn't exist. -t_srs EPSG:4326 \ -tr {params.pixel_scale} -{params.pixel_scale} \ -r max \ + -tap \ -co COMPRESS=LZW \ -wo NUM_THREADS={threads} \ {input.elevation} \ @@ -177,6 +178,7 @@ Precious: only runs if output doesn't exist. -t_srs EPSG:4326 \ -tr {params.pixel_scale} -{params.pixel_scale} \ -r min \ + -tap \ -co COMPRESS=LZW \ -wo NUM_THREADS={threads} \ {input.elevation} \ diff --git a/workflow/rules/scenario.smk b/workflow/rules/scenario.smk index efd92b9..096958b 100644 --- a/workflow/rules/scenario.smk +++ b/workflow/rules/scenario.smk @@ -71,6 +71,7 @@ PRECIOUS: Only rebuilds if the sentinel is explicitly deleted. -t_srs EPSG:4326 \ -tr {params.pixel_scale} -{params.pixel_scale} \ -r average \ + -tap \ -multi \ -co COMPRESS=LZW \ -co NUM_THREADS={threads} \ @@ -171,6 +172,7 @@ PRECIOUS: Only rebuilds if the sentinel is explicitly deleted. -t_srs EPSG:4326 \ -tr {params.pixel_scale} -{params.pixel_scale} \ -r average \ + -tap \ -multi \ -co COMPRESS=LZW \ -co NUM_THREADS={threads} \ From 2eb31b6add61c99dc032c991235a9f56136144dd Mon Sep 17 00:00:00 2001 From: Michael Dales Date: Tue, 19 May 2026 20:17:52 +0100 Subject: [PATCH 25/27] Add additional restore scenarios and land cover analysis --- config/config.yaml | 8 +- prepare_layers/make_arable_map.py | 26 +++- prepare_layers/make_pasture_map.py | 36 ++--- .../make_restore_agriculture_map.py | 116 --------------- prepare_layers/make_restore_all_map.py | 121 --------------- prepare_layers/make_restore_map.py | 45 ++++-- utils/land_cover_area.py | 80 ++++++++++ workflow/Snakefile | 10 ++ workflow/rules/aoh.smk | 33 +++++ workflow/rules/habitat.smk | 25 ++++ workflow/rules/scenario.smk | 140 +++++++----------- 11 files changed, 270 insertions(+), 370 deletions(-) delete mode 100644 prepare_layers/make_restore_agriculture_map.py delete mode 100644 prepare_layers/make_restore_all_map.py create mode 100644 utils/land_cover_area.py diff --git a/config/config.yaml b/config/config.yaml index c260e71..159e003 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -15,10 +15,16 @@ pixel_scale: 0.016666666666667 hyde_projection: 'GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]' hyde_pixel_scale: 0.08333333333333333 -# Scenarios to run +# Scenarios to run - accepted values are +# - arable +# - restore +# - restore_all +# - restore_agriculture scenarios: - arable - restore + - restore_all + - restore_agriculture # Z-curve value for delta P calculation curve: "0.25" diff --git a/prepare_layers/make_arable_map.py b/prepare_layers/make_arable_map.py index 20aa6da..0b3531a 100644 --- a/prepare_layers/make_arable_map.py +++ b/prepare_layers/make_arable_map.py @@ -1,9 +1,11 @@ import argparse +import math import os import shutil +from contextlib import nullcontext from pathlib import Path -from typing import Optional +import psutil import yirgacheffe as yg from alive_progress import alive_bar @@ -13,7 +15,7 @@ def make_arable_map( current_dir_path: Path, output_path: Path, - concurrency: Optional[int], + parallelism: int | None, show_progress: bool, ) -> None: os.makedirs(output_path, exist_ok=True) @@ -24,12 +26,22 @@ def make_arable_map( shutil.copy(urban_filename, output_path) with yg.read_raster(urban_filename) as urban: + new_arable = 1.0 - urban - if show_progress: - with alive_bar(manual=True) as bar: - new_arable.to_geotiff(new_arable_filename, callback=bar, parallelism=concurrency) - else: - new_arable.to_geotiff(new_arable_filename, parallelism=concurrency) + + if parallelism is not None: + # If we use all the cores on bigger machines we'll run out of memory + # as Yirgacheffe isn't that smart yet unfortunately + mem = psutil.virtual_memory() + estimated_memory_per_row = (urban.window.xsize * 8) * 2 + estimated_rows_per_free_memory = mem.free / estimated_memory_per_row + estimated_chunk_size = estimated_rows_per_free_memory / parallelism + + new_arable.ystep = min(math.floor(estimated_chunk_size), yg.constants.YSTEP) + + ctx = alive_bar(manual=True) if show_progress else nullcontext() + with ctx as bar: + new_arable.to_geotiff(new_arable_filename, callback=bar, parallelism=parallelism) def main() -> None: parser = argparse.ArgumentParser(description="Generate the arable scenario map.") diff --git a/prepare_layers/make_pasture_map.py b/prepare_layers/make_pasture_map.py index 5585b5c..994fd15 100644 --- a/prepare_layers/make_pasture_map.py +++ b/prepare_layers/make_pasture_map.py @@ -1,37 +1,33 @@ import argparse +import os +import shutil +from contextlib import nullcontext from pathlib import Path -from typing import Optional -import yirgacheffe.operators as yo +import yirgacheffe as yg from alive_progress import alive_bar -from yirgacheffe.layers import RasterLayer - -from osgeo import gdal -gdal.SetCacheMax(1 * 1024 * 1024 * 1024) JUNG_PASTURE_CODE = 1402 JUNG_URBAN_CODE = 1405 def make_pasture_map( - current_path: Path, + current_dir_path: Path, output_path: Path, - concurrency: Optional[int], + parallelism: int | None, show_progress: bool, ) -> None: - with RasterLayer.layer_from_file(current_path) as current: + os.makedirs(output_path, exist_ok=True) - arable_map = yo.where(current != JUNG_URBAN_CODE, JUNG_PASTURE_CODE, JUNG_URBAN_CODE) + # In this scenario all land that isn't urban is covered to arable + urban_filename = current_dir_path / f"lcc_{JUNG_URBAN_CODE}.tif" + new_pasture_filename = output_path / f"lcc_{JUNG_PASTURE_CODE}.tif" - with RasterLayer.empty_raster_layer_like( - arable_map, - filename=output_path, - threads=16 - ) as result: - if show_progress: - with alive_bar(manual=True) as bar: - arable_map.parallel_save(result, callback=bar, parallelism=concurrency) - else: - arable_map.parallel_save(result, parallelism=concurrency) + shutil.copy(urban_filename, output_path) + with yg.read_raster(urban_filename) as urban: + new_pasture = 1.0 - urban + ctx = alive_bar(manual=True) if show_progress else nullcontext() + with ctx as bar: + new_pasture.to_geotiff(new_pasture_filename, callback=bar, parallelism=parallelism) def main() -> None: parser = argparse.ArgumentParser(description="Generate the pasture scenario map.") diff --git a/prepare_layers/make_restore_agriculture_map.py b/prepare_layers/make_restore_agriculture_map.py deleted file mode 100644 index 686b8d7..0000000 --- a/prepare_layers/make_restore_agriculture_map.py +++ /dev/null @@ -1,116 +0,0 @@ -import argparse -import itertools -from pathlib import Path -from typing import Dict, List, Optional - -import pandas as pd -import yirgacheffe.operators as yo -from alive_progress import alive_bar -from yirgacheffe.layers import RasterLayer, RescaledRasterLayer - -from osgeo import gdal -gdal.SetCacheMax(1 * 1024 * 1024 * 1024) - -# From Eyres et al: In the restoration scenario all areas classified as arable or pasture were restored to their PNV -IUCN_CODE_REPLACEMENTS = [ - "14.1", - "14.2", -] - -def load_crosswalk_table(table_file_name: Path) -> Dict[str,List[int]]: - rawdata = pd.read_csv(table_file_name) - result: Dict[str,List[int]] = {} - for _, row in rawdata.iterrows(): - try: - result[row.code].append(int(row.value)) - except KeyError: - result[row.code] = [int(row.value)] - return result - - -def make_restore_map( - pnv_path: Path, - current_path: Path, - crosswalk_path: Path, - output_path: Path, - concurrency: Optional[int], - show_progress: bool, -) -> None: - with RasterLayer.layer_from_file(current_path) as current: - with RescaledRasterLayer.layer_from_file(pnv_path, current.pixel_scale) as pnv: - crosswalk = load_crosswalk_table(crosswalk_path) - - map_replacement_codes = list(itertools.chain.from_iterable([crosswalk[x] for x in IUCN_CODE_REPLACEMENTS])) - restore_map = yo.where(current.isin(map_replacement_codes), pnv, current) - - with RasterLayer.empty_raster_layer_like( - restore_map, - filename=output_path, - threads=16 - ) as result: - if show_progress: - with alive_bar(manual=True) as bar: - restore_map.parallel_save(result, callback=bar, parallelism=concurrency) - else: - restore_map.parallel_save(result, parallelism=concurrency) - - -def main() -> None: - parser = argparse.ArgumentParser(description="Zenodo resource downloader.") - parser.add_argument( - '--pnv', - type=Path, - help='Path of PNV map', - required=True, - dest='pnv_path', - ) - parser.add_argument( - '--current', - type=Path, - help='Path of current map', - required=True, - dest='current_path', - ) - parser.add_argument( - '--crosswalk', - type=Path, - help='Path of map to IUCN crosswalk table', - required=True, - dest='crosswalk_path', - ) - parser.add_argument( - '--output', - type=Path, - help='Path where final map should be stored', - required=True, - dest='results_path', - ) - parser.add_argument( - '-j', - type=int, - help='Number of concurrent threads to use for calculation.', - required=False, - default=None, - dest='concurrency', - ) - parser.add_argument( - '-p', - help="Show progress indicator", - default=False, - required=False, - action='store_true', - dest='show_progress', - ) - args = parser.parse_args() - - make_restore_map( - args.pnv_path, - args.current_path, - args.crosswalk_path, - args.results_path, - args.concurrency, - args.show_progress, - ) - -if __name__ == "__main__": - main() diff --git a/prepare_layers/make_restore_all_map.py b/prepare_layers/make_restore_all_map.py deleted file mode 100644 index 5918418..0000000 --- a/prepare_layers/make_restore_all_map.py +++ /dev/null @@ -1,121 +0,0 @@ -import argparse -import itertools -from pathlib import Path -from typing import Dict, List, Optional - -import pandas as pd -import yirgacheffe.operators as yo -from alive_progress import alive_bar -from yirgacheffe.layers import RasterLayer, RescaledRasterLayer - -from osgeo import gdal -gdal.SetCacheMax(1 * 1024 * 1024 * 1024) - -# From Eyres et al: In the restoration scenario all areas classified as arable or pasture were restored to their PNV -IUCN_CODE_REPLACEMENTS = [ - "14", - "14.1", - "14.2", - "14.3", - "14.4", - "14.5", - "14.6" -] - -def load_crosswalk_table(table_file_name: Path) -> Dict[str,List[int]]: - rawdata = pd.read_csv(table_file_name) - result: Dict[str,List[int]] = {} - for _, row in rawdata.iterrows(): - try: - result[row.code].append(int(row.value)) - except KeyError: - result[row.code] = [int(row.value)] - return result - - -def make_restore_map( - pnv_path: Path, - current_path: Path, - crosswalk_path: Path, - output_path: Path, - concurrency: Optional[int], - show_progress: bool, -) -> None: - with RasterLayer.layer_from_file(current_path) as current: - with RescaledRasterLayer.layer_from_file(pnv_path, current.pixel_scale) as pnv: - crosswalk = load_crosswalk_table(crosswalk_path) - - map_replacement_codes = list(itertools.chain.from_iterable([crosswalk[x] for x in IUCN_CODE_REPLACEMENTS])) - restore_map = yo.where(current.isin(map_replacement_codes), pnv, current) - - with RasterLayer.empty_raster_layer_like( - restore_map, - filename=output_path, - threads=16 - ) as result: - if show_progress: - with alive_bar(manual=True) as bar: - restore_map.parallel_save(result, callback=bar, parallelism=concurrency) - else: - restore_map.parallel_save(result, parallelism=concurrency) - - -def main() -> None: - parser = argparse.ArgumentParser(description="Zenodo resource downloader.") - parser.add_argument( - '--pnv', - type=Path, - help='Path of PNV map', - required=True, - dest='pnv_path', - ) - parser.add_argument( - '--current', - type=Path, - help='Path of current map', - required=True, - dest='current_path', - ) - parser.add_argument( - '--crosswalk', - type=Path, - help='Path of map to IUCN crosswalk table', - required=True, - dest='crosswalk_path', - ) - parser.add_argument( - '--output', - type=Path, - help='Path where final map should be stored', - required=True, - dest='results_path', - ) - parser.add_argument( - '-j', - type=int, - help='Number of concurrent threads to use for calculation.', - required=False, - default=None, - dest='concurrency', - ) - parser.add_argument( - '-p', - help="Show progress indicator", - default=False, - required=False, - action='store_true', - dest='show_progress', - ) - args = parser.parse_args() - - make_restore_map( - args.pnv_path, - args.current_path, - args.crosswalk_path, - args.results_path, - args.concurrency, - args.show_progress, - ) - -if __name__ == "__main__": - main() diff --git a/prepare_layers/make_restore_map.py b/prepare_layers/make_restore_map.py index a273b5f..46b9fb8 100644 --- a/prepare_layers/make_restore_map.py +++ b/prepare_layers/make_restore_map.py @@ -1,26 +1,19 @@ import argparse import itertools +import math import os from contextlib import ExitStack, nullcontext from pathlib import Path -from typing import Dict, List, Optional import pandas as pd +import psutil import yirgacheffe as yg from alive_progress import alive_bar -# From Eyres et al: In the restoration scenario all areas classified as arable or pasture were restored to their PNV -IUCN_CODE_REPLACEMENTS = [ - "14.1", - "14.2", - "14.3", - "14.4", - "14.6" -] -def load_crosswalk_table(table_file_name: Path) -> Dict[str,List[int]]: +def load_crosswalk_table(table_file_name: Path) -> dict[str,list[int]]: rawdata = pd.read_csv(table_file_name) - result: Dict[str,List[int]] = {} + result: dict[str,list[int]] = {} for _, row in rawdata.iterrows(): try: result[row.code].append(int(row.value)) @@ -33,14 +26,15 @@ def make_restore_map( current_dir_path: Path, crosswalk_path: Path, output_path: Path, - parallelism: Optional[int], + iucn_codes: list[str], + parallelism: int | None, show_progress: bool, ) -> None: os.makedirs(output_path, exist_ok=True) crosswalk = load_crosswalk_table(crosswalk_path) - map_replacement_codes = list(itertools.chain.from_iterable([crosswalk[x] for x in IUCN_CODE_REPLACEMENTS])) + map_replacement_codes = list(itertools.chain.from_iterable([crosswalk[x] for x in iucn_codes])) ideal_map_replacement_filenames = [current_dir_path / f"lcc_{code}.tif" for code in map_replacement_codes] map_replacement_filenames = [path for path in ideal_map_replacement_filenames if path.exists()] @@ -61,8 +55,21 @@ def make_restore_map( ctx = alive_bar(manual=True, title=str(lcc_code)) if show_progress else nullcontext() with ctx as bar: with yg.read_raster(filename) as layer: + + if parallelism is not None: + # If we use all the cores on bigger machines we'll run out of memory + # as Yirgacheffe isn't that smart yet unfortunately + mem = psutil.virtual_memory() + estimated_memory_per_row = (layer.window.xsize * 8) * 2 + estimated_rows_per_free_memory = mem.free / estimated_memory_per_row + estimated_chunk_size = estimated_rows_per_free_memory / parallelism + updated_layer = layer + (replacement_total * (pnv == lcc_code).astype(yg.DataType.Float32)) capped_updated_layer = yg.where(updated_layer > 1, 1.0, updated_layer) + + if parallelism is not None: + capped_updated_layer.ystep = min(math.floor(estimated_chunk_size), yg.constants.YSTEP) + capped_updated_layer.to_geotiff( output_path / f"lcc_{lcc_code}.tif", callback=bar, @@ -70,7 +77,7 @@ def make_restore_map( ) def main() -> None: - parser = argparse.ArgumentParser(description="Zenodo resource downloader.") + parser = argparse.ArgumentParser(description="Generate restore scenario counterfactual maps.") parser.add_argument( '--pnv', type=Path, @@ -95,10 +102,17 @@ def main() -> None: parser.add_argument( '--output', type=Path, - help='Path where final map should be stored', + help='Path where final maps should be stored', required=True, dest='results_path', ) + parser.add_argument( + '--codes', + type=str, + help='Comma-separated IUCN habitat codes to restore (e.g. 14.1,14.2,14.3)', + required=True, + dest='codes', + ) parser.add_argument( '-j', type=int, @@ -122,6 +136,7 @@ def main() -> None: args.current_dir_path, args.crosswalk_path, args.results_path, + args.codes.split(','), args.parallelism, args.show_progress, ) diff --git a/utils/land_cover_area.py b/utils/land_cover_area.py new file mode 100644 index 0000000..448bd01 --- /dev/null +++ b/utils/land_cover_area.py @@ -0,0 +1,80 @@ +import argparse +import os +from pathlib import Path + +import pandas as pd +import yirgacheffe as yg +from snakemake_argparse_bridge import snakemake_compatible # type: ignore + +def sum_dir(directory: Path) -> dict[int,float]: + result: dict[int,float] = {} + for raster_path in directory.glob("lcc_*.tif"): + class_num = int(raster_path.stem.split("_")[1]) + with ( + yg.read_raster(raster_path) as raster, + yg.area_raster(raster.map_projection) as area_raster, + ): + result[class_num] = (raster * area_raster).parallel_sum() + return result + +def land_cover_area( + jung_current_dir: Path, + current_dir: Path, + output_filename: Path, +) -> None: + os.makedirs(output_filename.parent, exist_ok=True) + + jung_current_areas = sum_dir(jung_current_dir) + current_areas = sum_dir(current_dir) + + all_classes = sorted(set(jung_current_areas) | set(current_areas)) + rows = [ + [cls, jung_current_areas.get(cls, 0.0), current_areas.get(cls, 0.0)] + for cls in all_classes + ] + + df = pd.DataFrame(rows, columns=["land_cover_class", "jung current", "hybrid current"]) + df.to_csv(output_filename, index=False) + + +@snakemake_compatible(mapping={ + "jung_current_dir": "params.jung_current_dir", + "current_dir": "params.current_dir", + "output_filename": "output[0]", +}) +def main() -> None: + parser = argparse.ArgumentParser( + description="Compare land cover areas between jung_current and current maps." + ) + parser.add_argument( + "--jung-current", + type=Path, + required=True, + dest="jung_current_dir", + help="Directory of jung_current lcc_*.tif rasters", + ) + parser.add_argument( + "--current", + type=Path, + required=True, + dest="current_dir", + help="Directory of current lcc_*.tif rasters", + ) + parser.add_argument( + "--output", + type=Path, + required=True, + dest="output_filename", + help="Destination CSV path", + ) + args = parser.parse_args() + + land_cover_area( + args.jung_current_dir, + args.current_dir, + args.output_filename, + ) + + +if __name__ == "__main__": + main() diff --git a/workflow/Snakefile b/workflow/Snakefile index b179ea9..92a279e 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -185,6 +185,16 @@ rule prepare: DATADIR / "habitat_layers" / "pnv" / ".sentinel", DATADIR / "elevation-max.tif", DATADIR / "elevation-min.tif", + DATADIR / "land_cover_area.csv", + + +rule footprints: + """Target: compute footprint of humanity for all counterfactual scenarios.""" + input: + expand( + str(DATADIR / "footprint" / "{scenario}.csv"), + scenario=SCENARIOS, + ), rule validation: diff --git a/workflow/rules/aoh.smk b/workflow/rules/aoh.smk index 72a8951..e0c21c5 100644 --- a/workflow/rules/aoh.smk +++ b/workflow/rules/aoh.smk @@ -164,3 +164,36 @@ Used by validation (current scenario) and for downstream analysis. --output {output.collated} \ 2>&1 | tee {log} """ + + +# ============================================================================= +# Footprint of Humanity (per scenario) +# ============================================================================= + + +rule footprint_of_humanity: + """ +Compute the footprint of humanity metric for a given scenario. + +Compares current AOH data against PNV and the scenario CSVs. +""" + input: + pnv_csv=DATADIR / "aohs" / "pnv.csv", + scenario_csv=DATADIR / "aohs" / "{scenario}.csv", + current_csv=DATADIR / "aohs" / "current.csv", + output: + DATADIR / "footprint" / "{scenario}.csv", + log: + DATADIR / "logs" / "footprint_{scenario}.log", + wildcard_constraints: + scenario="|".join(COUNTERFACTUAL_SCENARIOS), + shell: + """ + mkdir -p $(dirname {output}) + python3 {SRCDIR}/utils/footprint_of_humanity.py \ + --current {input.current_csv} \ + --pnv {input.pnv_csv} \ + --scenario {input.scenario_csv} \ + --output {output} \ + 2>&1 | tee {log} + """ diff --git a/workflow/rules/habitat.smk b/workflow/rules/habitat.smk index c30c77c..d47aed6 100644 --- a/workflow/rules/habitat.smk +++ b/workflow/rules/habitat.smk @@ -303,6 +303,31 @@ PRECIOUS: Only rebuilds if the sentinel is explicitly deleted. # ============================================================================= +rule land_cover_area: + """ +Compare total land cover area between jung_current and current maps. +Produces a CSV with land cover class as rows and source map as columns. +""" + input: + jung_current_sentinel=DATADIR / "100m" / "jung_current" / ".sentinel", + current_sentinel=DATADIR / "100m" / "current" / ".sentinel", + output: + DATADIR / "land_cover_area.csv", + log: + DATADIR / "logs" / "land_cover_area.log", + params: + jung_current_dir=DATADIR / "100m" / "jung_current", + current_dir=DATADIR / "100m" / "current", + shell: + """ + python3 {SRCDIR}/utils/land_cover_area.py \ + --jung-current {params.jung_current_dir} \ + --current {params.current_dir} \ + --output {output} \ + 2>&1 | tee {log} + """ + + rule pnv_processed: """ Process the PNV map into per-class fractional rasters at the target pixel scale diff --git a/workflow/rules/scenario.smk b/workflow/rules/scenario.smk index 096958b..a923c23 100644 --- a/workflow/rules/scenario.smk +++ b/workflow/rules/scenario.smk @@ -4,7 +4,7 @@ # Handles generating scenario-specific habitat maps and difference maps: # # - Arable: all non-urban land converted to arable -# - Restore: agricultural land restored to PNV +# - Restore variants: agricultural/pastoral land restored to PNV # # Each scenario: # 1. Generate 100m scenario map from current map @@ -15,6 +15,16 @@ import os from pathlib import Path +# IUCN codes to restore for each restore scenario variant +RESTORE_SCENARIOS = { + "restore": "14.1,14.2,14.3,14.4,14.6", + "restore_all": "14,14.1,14.2,14.3,14.4,14.5,14.6", + "restore_agriculture": "14.1,14.2", +} + +COUNTERFACTUAL_SCENARIOS = ["arable"] + list(RESTORE_SCENARIOS.keys()) + + # ============================================================================= # Arable scenario # ============================================================================= @@ -40,128 +50,75 @@ All non-urban land is converted to arable. python3 {SRCDIR}/prepare_layers/make_arable_map.py \ --current {params.current_dir} \ --output {params.output_dir} \ - -j 2 \ - 2>&1 | tee {log} - touch {output.sentinel} - """ - - -rule warp_arable: - """ -Warp the arable scenario map from 100m to the target pixel scale. -PRECIOUS: Only rebuilds if the sentinel is explicitly deleted. -""" - input: - sentinel=ancient(DATADIR / "100m" / "arable" / ".sentinel"), - output: - sentinel=DATADIR / "habitat_layers" / "arable" / ".sentinel", - log: - DATADIR / "logs" / "warp_arable.log", - threads: workflow.cores - params: - input_dir=DATADIR / "100m" / "arable", - output_dir=DATADIR / "habitat_layers" / "arable", - pixel_scale=config["pixel_scale"], - shell: - """ - mkdir -p {params.output_dir} - for d in {params.input_dir}/*.tif; do - basename=$(basename "$d") - gdalwarp \ - -t_srs EPSG:4326 \ - -tr {params.pixel_scale} -{params.pixel_scale} \ - -r average \ - -tap \ - -multi \ - -co COMPRESS=LZW \ - -co NUM_THREADS={threads} \ - -wo NUM_THREADS={threads} \ - "$d" \ - {params.output_dir}/"$basename" \ - 2>&1 - done 2>&1 | tee {log} - touch {output.sentinel} - """ - - -rule diff_map_arable: - """ -Generate the area difference map between current and arable habitat layers. -Used by the delta P scaling step. -""" - input: - current_sentinel=DATADIR / "habitat_layers" / "current" / ".sentinel", - arable_sentinel=DATADIR / "habitat_layers" / "arable" / ".sentinel", - output: - DATADIR / "habitat" / "arable_diff_area.tif", - log: - DATADIR / "logs" / "diff_map_arable.log", - threads: workflow.cores - params: - current_dir=DATADIR / "habitat_layers" / "current", - arable_dir=DATADIR / "habitat_layers" / "arable", - shell: - """ - mkdir -p $(dirname {output}) - python3 {SRCDIR}/prepare_layers/make_diff_map.py \ - --current {params.current_dir} \ - --scenario {params.arable_dir} \ - --output {output} \ -j {threads} \ + -p \ 2>&1 | tee {log} + touch {output.sentinel} """ # ============================================================================= -# Restore scenario +# Restore scenario variants # ============================================================================= -rule make_restore_map: +rule make_restore_scenario: """ -Generate the restore scenario map at 100m resolution. -Agricultural land is restored to PNV habitat. +Generate a restore scenario map at 100m resolution. +The IUCN habitat codes to restore are passed via --codes. """ input: current_sentinel=DATADIR / "100m" / "current" / ".sentinel", pnv=DATADIR / "habitat" / "pnv_raw.tif", crosswalk=DATADIR / "crosswalk.csv", output: - sentinel=DATADIR / "100m" / "restore" / ".sentinel", + sentinel=DATADIR / "100m" / "{scenario}" / ".sentinel", log: - DATADIR / "logs" / "make_restore_map.log", + DATADIR / "logs" / "make_{scenario}_map.log", + wildcard_constraints: + scenario="|".join(RESTORE_SCENARIOS.keys()), threads: workflow.cores params: current_dir=DATADIR / "100m" / "current", - output_dir=DATADIR / "100m" / "restore", + output_dir=lambda wc: DATADIR / "100m" / wc.scenario, + codes=lambda wc: RESTORE_SCENARIOS[wc.scenario], shell: """ python3 {SRCDIR}/prepare_layers/make_restore_map.py \ --pnv {input.pnv} \ --current {params.current_dir} \ --crosswalk {input.crosswalk} \ + --codes {params.codes} \ --output {params.output_dir} \ - -j 2 \ + -j {threads} \ + -p \ 2>&1 | tee {log} touch {output.sentinel} """ -rule warp_restore: +# ============================================================================= +# Warp and diff rules (shared across all counterfactual scenarios) +# ============================================================================= + + +rule warp_scenario: """ -Warp the restore scenario map from 100m to the target pixel scale. +Warp a scenario map from 100m to the target pixel scale. PRECIOUS: Only rebuilds if the sentinel is explicitly deleted. """ input: - sentinel=ancient(DATADIR / "100m" / "restore" / ".sentinel"), + sentinel=ancient(DATADIR / "100m" / "{scenario}" / ".sentinel"), output: - sentinel=DATADIR / "habitat_layers" / "restore" / ".sentinel", + sentinel=DATADIR / "habitat_layers" / "{scenario}" / ".sentinel", log: - DATADIR / "logs" / "warp_restore.log", + DATADIR / "logs" / "warp_{scenario}.log", + wildcard_constraints: + scenario="|".join(COUNTERFACTUAL_SCENARIOS), threads: workflow.cores params: - input_dir=DATADIR / "100m" / "restore", - output_dir=DATADIR / "habitat_layers" / "restore", + input_dir=lambda wc: DATADIR / "100m" / wc.scenario, + output_dir=lambda wc: DATADIR / "habitat_layers" / wc.scenario, pixel_scale=config["pixel_scale"], shell: """ @@ -185,27 +142,30 @@ PRECIOUS: Only rebuilds if the sentinel is explicitly deleted. """ -rule diff_map_restore: +rule diff_map_scenario: """ -Generate the area difference map between current and restore habitat layers. +Generate the area difference map between current and a scenario's habitat layers. +Used by the delta P scaling step. """ input: current_sentinel=DATADIR / "habitat_layers" / "current" / ".sentinel", - restore_sentinel=DATADIR / "habitat_layers" / "restore" / ".sentinel", + scenario_sentinel=DATADIR / "habitat_layers" / "{scenario}" / ".sentinel", output: - DATADIR / "habitat" / "restore_diff_area.tif", + DATADIR / "habitat" / "{scenario}_diff_area.tif", log: - DATADIR / "logs" / "diff_map_restore.log", + DATADIR / "logs" / "diff_map_{scenario}.log", + wildcard_constraints: + scenario="|".join(COUNTERFACTUAL_SCENARIOS), threads: workflow.cores params: current_dir=DATADIR / "habitat_layers" / "current", - restore_dir=DATADIR / "habitat_layers" / "restore", + scenario_dir=lambda wc: DATADIR / "habitat_layers" / wc.scenario, shell: """ mkdir -p $(dirname {output}) python3 {SRCDIR}/prepare_layers/make_diff_map.py \ --current {params.current_dir} \ - --scenario {params.restore_dir} \ + --scenario {params.scenario_dir} \ --output {output} \ -j {threads} \ 2>&1 | tee {log} From 1e1a44e211bb20ddef199d4a546390fe7ece157e Mon Sep 17 00:00:00 2001 From: Michael Dales Date: Thu, 4 Jun 2026 07:59:33 +0100 Subject: [PATCH 26/27] Fix diff map logic --- deltap/delta_p_scaled.py | 52 ++++++++++++++++----------------- prepare_layers/make_diff_map.py | 7 +++-- 2 files changed, 30 insertions(+), 29 deletions(-) diff --git a/deltap/delta_p_scaled.py b/deltap/delta_p_scaled.py index 3925e93..efb2a97 100644 --- a/deltap/delta_p_scaled.py +++ b/deltap/delta_p_scaled.py @@ -25,37 +25,37 @@ def delta_p_scaled_area( species_total_counts = pd.read_csv(species_totals_path) - diff_area = yg.read_raster(diff_area_map_path) - diff_area_rescaled = yg.where(diff_area < SCALE, float('nan'), diff_area / SCALE) + with yg.read_raster(diff_area_map_path) as diff_area: + diff_area_rescaled = yg.where(diff_area < SCALE, float('nan'), diff_area / SCALE) - # Process all species in total - total_species_count = int(species_total_counts[species_total_counts.taxa=="all"]["count"].values[0]) - summed_layer = yg.sum(per_taxa) - all_scaled_filtered_layer = yg.where( - diff_area_rescaled != 0, - ((summed_layer / diff_area_rescaled) * -1.0) / total_species_count, - float('nan') - ) - - bands = [all_scaled_filtered_layer] - labels = ["all"] - - # Now per taxa - for inlayer in per_taxa: - # get the taxa from the filename - _, name = os.path.split(inlayer.name) - taxa = name[:-4] - labels.append(taxa) - - taxa_species_count = int(species_total_counts[species_total_counts.taxa==taxa]["count"].values[0]) - scaled_filtered_layer = yg.where( + # Process all species in total + total_species_count = int(species_total_counts[species_total_counts.taxa=="all"]["count"].values[0]) + summed_layer = yg.sum(per_taxa) + all_scaled_filtered_layer = yg.where( diff_area_rescaled != 0, - ((inlayer / diff_area_rescaled) * -1.0) / taxa_species_count, + ((summed_layer / diff_area_rescaled) * -1.0) / total_species_count, float('nan') ) - bands.append(scaled_filtered_layer) - yg.to_geotiff(output_path, bands, labels, nodata=float('nan')) + bands = [all_scaled_filtered_layer] + labels = ["all"] + + # Now per taxa + for inlayer in per_taxa: + # get the taxa from the filename + _, name = os.path.split(inlayer.name) + taxa = name[:-4] + labels.append(taxa) + + taxa_species_count = int(species_total_counts[species_total_counts.taxa==taxa]["count"].values[0]) + scaled_filtered_layer = yg.where( + diff_area_rescaled != 0, + ((inlayer / diff_area_rescaled) * -1.0) / taxa_species_count, + float('nan') + ) + bands.append(scaled_filtered_layer) + + yg.to_geotiff(output_path, bands, labels, nodata=float('nan')) @snakemake_compatible(mapping={ "input_path": "params.input_dir", diff --git a/prepare_layers/make_diff_map.py b/prepare_layers/make_diff_map.py index e9fe7c6..6b967af 100644 --- a/prepare_layers/make_diff_map.py +++ b/prepare_layers/make_diff_map.py @@ -28,10 +28,11 @@ def make_diff_map( else yg.constant(0.0) scenario_layer = yg.read_raster(scenario_habitat_filename) if scenario_habitat_filename.exists() \ else yg.constant(0.0) - habitat_diff = current_layer != scenario_layer - layers.append(habitat_diff) + habitat_diff = current_layer - scenario_layer + from_current = yg.where(habitat_diff > 0, habitat_diff, 0) + layers.append(from_current) - diff = yg.any(layers) + diff = yg.sum(layers) area = yg.area_raster(diff.map_projection) scaled_diff = diff * area From 5a466d3a983180ac6febe33272a5c0a871a4e4eb Mon Sep 17 00:00:00 2001 From: Michael Dales Date: Thu, 4 Jun 2026 08:05:54 +0100 Subject: [PATCH 27/27] Updated snakefmt --- workflow/rules/habitat.smk | 4 ++-- workflow/rules/prepare.smk | 30 +++++++++++++++--------------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/workflow/rules/habitat.smk b/workflow/rules/habitat.smk index d47aed6..b9eb054 100644 --- a/workflow/rules/habitat.smk +++ b/workflow/rules/habitat.smk @@ -133,7 +133,7 @@ enough to align with the GAEZ data. shell: """ sed "s/0.0833333/0.08333333333333333/" \ - {input.raw_hyde_raster} > {output.modified_hyde_raster} + {input.raw_hyde_raster} >{output.modified_hyde_raster} """ @@ -148,7 +148,7 @@ the rest of the workflow can mix it with projected raster data. projection=config["hyde_projection"], shell: """ - echo '{params.projection}' > {output.hyde_projection_file} + echo '{params.projection}' >{output.hyde_projection_file} """ diff --git a/workflow/rules/prepare.smk b/workflow/rules/prepare.smk index ec186a9..1761510 100644 --- a/workflow/rules/prepare.smk +++ b/workflow/rules/prepare.smk @@ -46,10 +46,10 @@ Fetch the Jung L2 habitat map from Zenodo. """ mkdir -p $(dirname {output.habitat}) reclaimer zenodo --zenodo_id {params.zenodo_id} \ - --filename "{params.filename}" \ - --extract \ - --output {output.habitat} \ - 2>&1 | tee {log} + --filename "{params.filename}" \ + --extract \ + --output {output.habitat} \ + 2>&1 | tee {log} """ @@ -69,10 +69,10 @@ Fetch the Jung habitat map update masks from Zenodo. """ mkdir -p {params.habitat_dir} reclaimer zenodo --zenodo_id {params.zenodo_id} \ - --filename "{params.filename}" \ - --extract \ - --output {params.habitat_dir} \ - 2>&1 | tee {log} + --filename "{params.filename}" \ + --extract \ + --output {params.habitat_dir} \ + 2>&1 | tee {log} touch {output.sentinel} """ @@ -97,10 +97,10 @@ Fetch the Jung PNV map from Zenodo. """ mkdir -p $(dirname {output.habitat}) reclaimer zenodo --zenodo_id {params.zenodo_id} \ - --filename "{params.filename}" \ - --extract \ - --output {output.habitat} \ - 2>&1 | tee {log} + --filename "{params.filename}" \ + --extract \ + --output {output.habitat} \ + 2>&1 | tee {log} """ @@ -123,9 +123,9 @@ Download raw elevation DEM from Zenodo. shell: """ reclaimer zenodo --zenodo_id {params.zenodo_id} \ - --filename "{params.filename}" \ - --output {output.elevation} \ - 2>&1 | tee {log} + --filename "{params.filename}" \ + --output {output.elevation} \ + 2>&1 | tee {log} """