From 708ebe0010692d42796407ee94d54db94081f0d3 Mon Sep 17 00:00:00 2001 From: rehsani Date: Mon, 4 May 2026 09:58:45 -0700 Subject: [PATCH] Add soil module (SoilGrids 2.0) with NEH 630 Ch7 hydrologic-soil-group derivation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit floodpath.soil supplies the SOIL half of the SCS-CN runoff parameterisation. Pair with floodpath.landuse to derive a Curve Number raster. - floodpath/soil/soilgrids.py: get_soilgrids_texture(lat, lon, buffer_deg, depths) — fetches sand/silt/clay (% by mass) at user-selectable depth slices from the public ISRIC bucket via /vsicurl/ + WarpedVRT (native Goode Homolosine -> WGS84). Default depths=(0-5cm, 5-15cm, 15-30cm), depth-weighted to a 0-30 cm topsoil composition (closest available proxy for NEH 630 Ch7 Table 7-1's 0-50 cm reference depth). Fetches parallelised across (variable, depth) pairs — ~73 s for 9 reads vs ~525 s sequential. SoilGrids nodata (-32768) masked to NaN. - floodpath/soil/utils.py: usda_texture_class(sand, silt, clay) — full USDA Soil Survey Manual 1993 / Soil Taxonomy 1999 texture triangle with all 12 canonical classes (sand, loamy sand, sandy loam, loam, silt loam, silt, sandy clay loam, clay loam, silty clay loam, sandy clay, silty clay, clay). Vectorised over numpy arrays. - floodpath/soil/hsg.py: texture_to_hsg(TextureGrid) -> HSGGrid via the USDA_TEXTURE_TO_HSG mapping (NEH 630 Ch7 §630.0701 textural shortcut). Custom mappings supported for site-calibrated work. - TextureGrid carries closure_residual() for sand+silt+clay~=100 sanity checks; HSGGrid carries class_counts(), fraction(), dominant_class(), as_letter_grid() helpers and a uint8 1-4 encoding (0=nodata). - Tests: 60 total — 12 USDA centroid round-trips, 11 hsg unit cases, 3 model property tests, plus a committed Robit Bata fixture (8.4 KB, 33x33 multiband float32 GeoTIFF) with pinned per-cell stats and a 100% HSG-D outcome (Vertisol-rich Ethiopian highlands). One live ISRIC fetch regression test guards the fixture; an integration test in Djibouti exercises the full /vsicurl/+WarpedVRT+parallel-fetch path. --- README.md | 3 +- floodpath/soil/__init__.py | 32 ++++ floodpath/soil/constants.py | 122 ++++++++++++ floodpath/soil/hsg.py | 72 +++++++ floodpath/soil/models.py | 100 ++++++++++ floodpath/soil/soilgrids.py | 198 ++++++++++++++++++++ floodpath/soil/utils.py | 128 +++++++++++++ tests/conftest.py | 35 ++++ tests/fixtures/_generate_robit_bata_soil.py | 79 ++++++++ tests/fixtures/robit_bata_soil.tif | Bin 0 -> 8570 bytes tests/soil/__init__.py | 0 tests/soil/test_constants.py | 55 ++++++ tests/soil/test_fixture_regression.py | 44 +++++ tests/soil/test_hsg.py | 102 ++++++++++ tests/soil/test_models.py | 91 +++++++++ tests/soil/test_soilgrids.py | 119 ++++++++++++ tests/soil/test_utils.py | 98 ++++++++++ 17 files changed, 1277 insertions(+), 1 deletion(-) create mode 100644 floodpath/soil/__init__.py create mode 100644 floodpath/soil/constants.py create mode 100644 floodpath/soil/hsg.py create mode 100644 floodpath/soil/models.py create mode 100644 floodpath/soil/soilgrids.py create mode 100644 floodpath/soil/utils.py create mode 100644 tests/fixtures/_generate_robit_bata_soil.py create mode 100644 tests/fixtures/robit_bata_soil.tif create mode 100644 tests/soil/__init__.py create mode 100644 tests/soil/test_constants.py create mode 100644 tests/soil/test_fixture_regression.py create mode 100644 tests/soil/test_hsg.py create mode 100644 tests/soil/test_models.py create mode 100644 tests/soil/test_soilgrids.py create mode 100644 tests/soil/test_utils.py diff --git a/README.md b/README.md index c57a883..e3fe758 100644 --- a/README.md +++ b/README.md @@ -71,7 +71,8 @@ print(f"Total damaged built-up: {damage.values.sum():,.0f} m²") | `floodpath.dem` | Copernicus GLO-30 (AWS Open Data, COG) | Elevation patch around any (lat, lon) | | `floodpath.hydrology` | derived from DEM via `pyflwdir` | Flow direction + accumulation, stream networks (with Strahler order), basin delineation, HAND | | `floodpath.exposure` | GHSL R2023A, WorldPop, OpenStreetMap (Overpass) | Built-up surface, population, building footprints | -| `floodpath.landuse` | ESA WorldCover (10 m, AWS Open Data, COG) | 11-class land-cover raster (2020 v100, 2021 v200) | +| `floodpath.landuse` | ESA WorldCover (10 m, AWS Open Data, COG) | 11-class land-cover raster (2020 v100, 2021 v200), Manning's roughness derivation | +| `floodpath.soil` | ISRIC SoilGrids 2.0 (250 m, COG) | Sand/silt/clay topsoil composition + USDA texture-triangle classification + NEH 630 Ch7 hydrologic soil group (A/B/C/D) | | `floodpath.damage` | JRC Huizinga 2017 + DEM/HAND/GHSL | Per-cell flood depth and damage in m² of built-up surface | ## Depth-damage curves diff --git a/floodpath/soil/__init__.py b/floodpath/soil/__init__.py new file mode 100644 index 0000000..ddb135a --- /dev/null +++ b/floodpath/soil/__init__.py @@ -0,0 +1,32 @@ +"""Soil module: hydrologic soil properties from SoilGrids 2.0. + +Provides the SOIL half of the SCS-CN runoff parameterisation. Pair with +`floodpath.landuse` to derive a Curve Number raster. +""" + +from .constants import ( + HSG_CLASSES, + HSG_TO_INT, + INT_TO_HSG, + SOILGRIDS_DEFAULT_DEPTHS, + USDA_TEXTURE_CLASSES, + USDA_TEXTURE_TO_HSG, +) +from .hsg import texture_to_hsg +from .models import HSGGrid, TextureGrid +from .soilgrids import get_soilgrids_texture +from .utils import usda_texture_class + +__all__ = [ + "HSGGrid", + "HSG_CLASSES", + "HSG_TO_INT", + "INT_TO_HSG", + "SOILGRIDS_DEFAULT_DEPTHS", + "TextureGrid", + "USDA_TEXTURE_CLASSES", + "USDA_TEXTURE_TO_HSG", + "get_soilgrids_texture", + "texture_to_hsg", + "usda_texture_class", +] diff --git a/floodpath/soil/constants.py b/floodpath/soil/constants.py new file mode 100644 index 0000000..7b097af --- /dev/null +++ b/floodpath/soil/constants.py @@ -0,0 +1,122 @@ +"""Constants for the soil module.""" + +from types import MappingProxyType + +# --- ISRIC SoilGrids 2.0 (250 m global, COG via WebDAV) -------------------- +# +# VRT files reference the underlying COG tiles in Goode Homolosine; GDAL's +# /vsicurl/ + a WarpedVRT projects them to WGS84 on the fly. The bucket +# honours HTTP range requests, so a window read is cheap. + +SOILGRIDS_BASE_URL: str = "https://files.isric.org/soilgrids/latest/data" +SOILGRIDS_NATIVE_CRS: str = ( + 'PROJCRS["unknown",BASEGEOGCRS["unknown",DATUM["unknown",' + 'ELLIPSOID["WGS 84",6378137,298.257223563,LENGTHUNIT["metre",1]]],' + 'PRIMEM["Greenwich",0,ANGLEUNIT["degree",0.0174532925199433]]],' + 'CONVERSION["unknown",METHOD["Interrupted Goode Homolosine"],' + 'PARAMETER["Longitude of natural origin",0,ANGLEUNIT["degree",0.0174532925199433]],' + 'PARAMETER["False easting",0,LENGTHUNIT["metre",1]],' + 'PARAMETER["False northing",0,LENGTHUNIT["metre",1]]],' + 'CS[Cartesian,2],AXIS["(E)",east,ORDER[1],LENGTHUNIT["metre",1]],' + 'AXIS["(N)",north,ORDER[2],LENGTHUNIT["metre",1]]]' +) +SOILGRIDS_TARGET_CRS: str = "EPSG:4326" + +# SoilGrids stores sand/silt/clay as integer g/kg (0-1000). Multiply by 0.1 +# to get percent by mass (0-100). +SOILGRIDS_TEXTURE_SCALE: float = 0.1 + +# SoilGrids nodata sentinel (int16). Cells with this value have no +# prediction (oceans, ice, glaciers, no-cover land). Must be masked +# *before* applying the scale factor or it pollutes downstream stats. +SOILGRIDS_NODATA: int = -32768 + +# Standard depth slices (in cm) published by SoilGrids 2.0. +SOILGRIDS_DEPTHS: tuple[str, ...] = ( + "0-5cm", + "5-15cm", + "15-30cm", + "30-60cm", + "60-100cm", + "100-200cm", +) +# Default depth-weighted topsoil window (0-30 cm). Closest available proxy +# for the 0-50 cm reference depth used in NEH 630 Ch7 Table 7-1. +SOILGRIDS_DEFAULT_DEPTHS: tuple[str, ...] = ("0-5cm", "5-15cm", "15-30cm") +# Width in cm of each depth slice — used as weights when averaging. +SOILGRIDS_DEPTH_WIDTHS_CM: MappingProxyType = MappingProxyType({ + "0-5cm": 5, + "5-15cm": 10, + "15-30cm": 15, + "30-60cm": 30, + "60-100cm": 40, + "100-200cm": 100, +}) + +SOILGRIDS_BUFFER_DEG: float = 0.1 +SOILGRIDS_TEXTURE_UNITS: str = "% by mass" + + +# --- USDA Soil Texture Triangle (12 classes) ------------------------------- +# +# Sand / silt / clay in % by mass. Boundaries follow the Soil Survey +# Manual (1993) and Soil Taxonomy 2nd ed. (1999). + +USDA_TEXTURE_CLASSES: tuple[str, ...] = ( + "sand", + "loamy_sand", + "sandy_loam", + "loam", + "silt_loam", + "silt", + "sandy_clay_loam", + "clay_loam", + "silty_clay_loam", + "sandy_clay", + "silty_clay", + "clay", +) + + +# --- USDA Texture class -> NEH 630 Ch7 Hydrologic Soil Group --------------- +# +# Source: NRCS National Engineering Handbook Part 630 Chapter 7 +# (Hydrologic Soil Groups, January 2009), §630.0701 textural criteria +# in conjunction with §630.0701 prose ("typical" assignment for soils +# with no shallow impermeable layer or high water table). +# +# §630.0701 group definitions: +# A <10% clay AND >90% sand or gravel; gravel/sand textures. +# B 10-20% clay AND 50-90% sand; loamy sand or sandy loam textures. +# C 20-40% clay AND <50% sand; loam, silt loam, sandy clay loam, +# clay loam, silty clay loam. +# D >40% clay AND <50% sand; clayey textures (clay, silty clay, +# sandy clay). +# +# A handful of texture classes straddle the prose ranges; we follow the +# most-cited consensus mapping used in SWAT, HEC-HMS, and the Kalyanapu / +# Yu / et-al. global SCS-CN literature. + +USDA_TEXTURE_TO_HSG: MappingProxyType = MappingProxyType({ + "sand": "A", + "loamy_sand": "A", + "sandy_loam": "B", + "loam": "B", + "silt_loam": "B", + "silt": "B", + "sandy_clay_loam": "C", + "clay_loam": "C", + "silty_clay_loam": "C", + "sandy_clay": "D", + "silty_clay": "D", + "clay": "D", +}) + +HSG_CLASSES: tuple[str, ...] = ("A", "B", "C", "D") + +# Categorical encoding for raster storage. 0 = nodata. +HSG_TO_INT: MappingProxyType = MappingProxyType({"A": 1, "B": 2, "C": 3, "D": 4}) +INT_TO_HSG: MappingProxyType = MappingProxyType({v: k for k, v in HSG_TO_INT.items()}) + +HSG_NODATA: int = 0 +HSG_UNITS: str = "NEH 630 Ch7 hydrologic soil group (1=A, 2=B, 3=C, 4=D)" diff --git a/floodpath/soil/hsg.py b/floodpath/soil/hsg.py new file mode 100644 index 0000000..7bde351 --- /dev/null +++ b/floodpath/soil/hsg.py @@ -0,0 +1,72 @@ +"""Map a TextureGrid to a Hydrologic Soil Group (HSG) raster. + +Per USDA NRCS NEH Part 630 Chapter 7 §630.0701 (January 2009): each cell's +USDA texture class is looked up in `USDA_TEXTURE_TO_HSG` to assign A/B/C/D. +This is the textural shortcut for the full Table 7-1 criteria (which would +require Ksat, depth-to-impermeable-layer, and water-table data we do not +have globally). + +The shortcut assumes "deep" soils with no shallow restrictive layer and +no high water table — appropriate for global SCS-CN runoff modelling at +~250 m. Users with site-calibrated Ksat data should compute HSG directly +from Table 7-1. +""" + +import numpy as np + +from .constants import ( + HSG_NODATA, + HSG_TO_INT, + USDA_TEXTURE_TO_HSG, +) +from .models import HSGGrid, TextureGrid +from .utils import usda_texture_class + + +def texture_to_hsg( + texture: TextureGrid, + mapping: dict[str, str] | None = None, +) -> HSGGrid: + """Convert a soil texture grid to a hydrologic soil group raster. + + Args: + texture: Per-cell sand/silt/clay percentages (TextureGrid). + mapping: Optional override of the USDA-class -> HSG-letter table. + Defaults to USDA_TEXTURE_TO_HSG (NEH 630 Ch7 §630.0701). + + Returns: + HSGGrid with int-encoded HSG classes (1=A, 2=B, 3=C, 4=D, 0=nodata). + """ + table = dict(USDA_TEXTURE_TO_HSG) if mapping is None else dict(mapping) + + # Cells with NaN in any texture component (SoilGrids nodata) cannot be + # classified — keep them at HSG_NODATA. Pass safe placeholders to the + # classifier and mask the output afterwards. + sand = np.where(np.isnan(texture.sand), 0.0, texture.sand) + silt = np.where(np.isnan(texture.silt), 0.0, texture.silt) + clay = np.where(np.isnan(texture.clay), 0.0, texture.clay) + nodata_mask = ( + np.isnan(texture.sand) + | np.isnan(texture.silt) + | np.isnan(texture.clay) + ) + + classes = usda_texture_class(sand, silt, clay) + # `classes` is an object array of texture-class names; map each unique + # class to its HSG int and apply via fancy indexing. + hsg_int = np.full(classes.shape, HSG_NODATA, dtype=np.uint8) + for cls_name, letter in table.items(): + if letter not in HSG_TO_INT: + raise ValueError( + f"mapping value {letter!r} is not a valid HSG letter" + ) + hsg_int[classes == cls_name] = HSG_TO_INT[letter] + hsg_int[nodata_mask] = HSG_NODATA + + return HSGGrid( + values=hsg_int, + transform=texture.transform, + crs=texture.crs, + bbox=texture.bbox, + depths=texture.depths, + ) diff --git a/floodpath/soil/models.py b/floodpath/soil/models.py new file mode 100644 index 0000000..2e2125e --- /dev/null +++ b/floodpath/soil/models.py @@ -0,0 +1,100 @@ +"""Dataclasses for the soil module.""" + +from dataclasses import dataclass + +import numpy as np +from rasterio.transform import Affine + +from floodpath.dem.models import BoundingBox + +from .constants import ( + HSG_CLASSES, + HSG_TO_INT, + INT_TO_HSG, + SOILGRIDS_TEXTURE_UNITS, +) + + +@dataclass +class TextureGrid: + """Per-cell soil texture composition (sand / silt / clay) as percentages. + + Each of `sand`, `silt`, `clay` is a float32 array in `% by mass`. They + should sum to ~100 at every cell (SoilGrids predictions can be off by + 1-2 due to independent regression — the texture-triangle classifier is + robust to that). + + `depths` records which SoilGrids depth slice(s) the values represent; + when more than one was averaged, the list reflects the depth-weighted + composition. + """ + + sand: np.ndarray + silt: np.ndarray + clay: np.ndarray + transform: Affine + crs: str + bbox: BoundingBox + depths: tuple[str, ...] + units: str = SOILGRIDS_TEXTURE_UNITS + + @property + def shape(self) -> tuple[int, int]: + """Return the (rows, cols) shape of the underlying rasters.""" + return self.sand.shape # type: ignore[return-value] + + def closure_residual(self) -> np.ndarray: + """Return per-cell |sand+silt+clay - 100|, useful for diagnostics.""" + return np.abs(self.sand + self.silt + self.clay - 100.0) + + +@dataclass +class HSGGrid: + """Per-cell NEH 630 Ch7 hydrologic soil group (A/B/C/D). + + Stored as an integer raster with the encoding in HSG_TO_INT (1=A, 2=B, + 3=C, 4=D, 0=nodata). The `epoch`-equivalent for soil is depth — recorded + in `depths` so consumers can introspect the assumption. + """ + + values: np.ndarray + transform: Affine + crs: str + bbox: BoundingBox + depths: tuple[str, ...] + units: str = "NEH 630 Ch7 HSG (1=A, 2=B, 3=C, 4=D)" + + @property + def shape(self) -> tuple[int, int]: + """Return the (rows, cols) shape of the underlying raster.""" + return self.values.shape # type: ignore[return-value] + + def class_counts(self) -> dict[str, int]: + """Count cells per HSG letter present in the grid (excludes nodata).""" + out: dict[str, int] = {} + for letter in HSG_CLASSES: + n = int(np.count_nonzero(self.values == HSG_TO_INT[letter])) + if n > 0: + out[letter] = n + return out + + def fraction(self, letter: str) -> float: + """Return the fraction of cells in the given HSG letter.""" + if self.values.size == 0: + return 0.0 + n = int(np.count_nonzero(self.values == HSG_TO_INT[letter])) + return float(n / self.values.size) + + def dominant_class(self) -> str: + """Return the HSG letter with the most cells; raises if grid is empty.""" + counts = self.class_counts() + if not counts: + raise ValueError("HSGGrid contains no classified cells") + return max(counts, key=counts.get) # type: ignore[arg-type] + + def as_letter_grid(self) -> np.ndarray: + """Return a US1-string array with HSG letters; nodata becomes empty string.""" + out = np.empty(self.values.shape, dtype="U1") + for code, letter in INT_TO_HSG.items(): + out[self.values == code] = letter + return out diff --git a/floodpath/soil/soilgrids.py b/floodpath/soil/soilgrids.py new file mode 100644 index 0000000..2beb2d6 --- /dev/null +++ b/floodpath/soil/soilgrids.py @@ -0,0 +1,198 @@ +"""Fetch ISRIC SoilGrids 2.0 soil-property patches via /vsicurl/.""" + +from concurrent.futures import ThreadPoolExecutor + +import numpy as np +import rasterio +from rasterio.enums import Resampling +from rasterio.env import Env +from rasterio.vrt import WarpedVRT +from rasterio.windows import from_bounds +from rasterio.windows import transform as window_transform + +from floodpath.dem.constants import GDAL_VSICURL_ENV +from floodpath.dem.models import BoundingBox +from floodpath.dem.utils import bbox_from_point + +from .constants import ( + SOILGRIDS_BASE_URL, + SOILGRIDS_BUFFER_DEG, + SOILGRIDS_DEFAULT_DEPTHS, + SOILGRIDS_DEPTH_WIDTHS_CM, + SOILGRIDS_DEPTHS, + SOILGRIDS_NODATA, + SOILGRIDS_TARGET_CRS, + SOILGRIDS_TEXTURE_SCALE, +) +from .models import TextureGrid + + +def soilgrids_vrt_url( + variable: str, + depth: str, + statistic: str = "mean", +) -> str: + """Build the public SoilGrids 2.0 VRT URL. + + Args: + variable: One of `sand`, `silt`, `clay`, `bdod`, `cec`, ... + depth: One of SOILGRIDS_DEPTHS, e.g. `5-15cm`. + statistic: Defaults to `mean`; also `Q0.05`, `Q0.5`, `Q0.95`, + `uncertainty`. + + Raises: + ValueError: `depth` is not one of the published slices. + """ + if depth not in SOILGRIDS_DEPTHS: + raise ValueError( + f"depth must be one of {SOILGRIDS_DEPTHS}, got {depth!r}" + ) + return ( + f"{SOILGRIDS_BASE_URL}/{variable}/" + f"{variable}_{depth}_{statistic}.vrt" + ) + + +def _read_warped_window( + url: str, + bbox: BoundingBox, +) -> tuple[np.ndarray, "rasterio.transform.Affine"]: + """Open a SoilGrids VRT, warp to WGS84, return the bbox window + transform. + + SoilGrids native CRS is Interrupted Goode Homolosine. We let GDAL + decide the destination resolution from the source pixel size projected + to WGS84 — the ~250 m native pixels become roughly 0.0022° at the + equator, so a 0.075° patch resolves to ~33 cells. + + Returned data is the raw int16 raster including the SoilGrids nodata + sentinel (-32768); callers are responsible for masking it. + """ + with rasterio.open(f"/vsicurl/{url}") as src: + with WarpedVRT( + src, + crs=SOILGRIDS_TARGET_CRS, + resampling=Resampling.nearest, + ) as vrt: + win = from_bounds(*bbox.bounds, transform=vrt.transform) + # WarpedVRT does not allow boundless reads; SoilGrids is global + # so any sane bbox is fully inside the source extent. + data = vrt.read(1, window=win) + clipped_transform = window_transform(win, vrt.transform) + return data, clipped_transform + + +def _scale_texture(raw: np.ndarray) -> np.ndarray: + """Apply the SoilGrids texture scale, masking nodata to NaN.""" + out = raw.astype(np.float32) * SOILGRIDS_TEXTURE_SCALE + out[raw == SOILGRIDS_NODATA] = np.nan + return out + + +def _fetch_var_at_depth( + bbox: BoundingBox, + variable: str, + depth: str, +) -> tuple[str, str, np.ndarray, "rasterio.transform.Affine"]: + """Fetch a single (variable, depth) combination. Returns scaled %.""" + url = soilgrids_vrt_url(variable, depth) + raw, transform = _read_warped_window(url, bbox) + return variable, depth, _scale_texture(raw), transform + + +def get_soilgrids_texture( + lat: float, + lon: float, + buffer_deg: float = SOILGRIDS_BUFFER_DEG, + depths: tuple[str, ...] = SOILGRIDS_DEFAULT_DEPTHS, +) -> TextureGrid: + """Return depth-weighted soil texture (sand/silt/clay %) around a point. + + The fetcher uses GDAL's `/vsicurl/` driver against the public ISRIC + bucket (`files.isric.org/soilgrids/latest/data/`) and a `WarpedVRT` + to reproject from SoilGrids' native Goode Homolosine to WGS84. + Range-read so payload is on the order of a few hundred KB per call. + + For each depth slice in `depths`, three rasters are fetched + (sand / silt / clay) — all in parallel via a thread pool, since these + are I/O-bound. They are then averaged with weights equal to each depth's + thickness in centimetres so the result represents the bulk topsoil + composition over the chosen interval. Nodata cells (oceans, glaciers) + are masked to NaN. + + Args: + lat: Latitude of the region center, decimal degrees. + lon: Longitude of the region center, decimal degrees. + buffer_deg: Half-width of the square bbox around the point, degrees. + depths: Subset of SOILGRIDS_DEPTHS to average. Default is + (`0-5cm`, `5-15cm`, `15-30cm`) — the 0-30 cm topsoil window, + the closest available proxy for the 0-50 cm reference depth + used in NEH 630 Ch7 Table 7-1. + + Returns: + TextureGrid with sand/silt/clay arrays in % by mass, NaN where + SoilGrids has no prediction. + + Raises: + ValueError: `depths` is empty, or any element is not a published + SoilGrids slice. + """ + if not depths: + raise ValueError("depths must contain at least one slice") + for d in depths: + if d not in SOILGRIDS_DEPTHS: + raise ValueError( + f"depth {d!r} not in published SoilGrids depths " + f"{SOILGRIDS_DEPTHS}" + ) + + bbox = bbox_from_point(lat, lon, buffer_deg) + + # Fan out all (variable, depth) pairs to a thread pool. ISRIC tolerates + # ~10 concurrent connections per client; with 3-9 fetches per call we + # stay well below that. + jobs = [(var, depth) for depth in depths for var in ("sand", "silt", "clay")] + + with Env(**GDAL_VSICURL_ENV): + with ThreadPoolExecutor(max_workers=min(len(jobs), 9)) as pool: + futures = [ + pool.submit(_fetch_var_at_depth, bbox, var, depth) + for var, depth in jobs + ] + results = [f.result() for f in futures] + + # results is a list of (variable, depth, scaled, transform). + # Group by variable, depth-weighted average. + transform = results[0][3] + by_var: dict[str, dict[str, np.ndarray]] = {"sand": {}, "silt": {}, "clay": {}} + for var, depth, scaled, _ in results: + by_var[var][depth] = scaled + + weight_total = float(sum(SOILGRIDS_DEPTH_WIDTHS_CM[d] for d in depths)) + avg: dict[str, np.ndarray] = {} + for var, depth_to_arr in by_var.items(): + # Use np.nansum so masked nodata cells stay NaN only where every + # depth was nodata; partially-defined columns get a partial average. + weighted = np.zeros_like(next(iter(depth_to_arr.values()))) + weight_seen = np.zeros_like(weighted) + for d in depths: + arr = depth_to_arr[d] + w = float(SOILGRIDS_DEPTH_WIDTHS_CM[d]) + valid = ~np.isnan(arr) + weighted[valid] += w * arr[valid] + weight_seen[valid] += w + with np.errstate(invalid="ignore", divide="ignore"): + avg[var] = np.where( + weight_seen > 0, + weighted / weight_seen, + np.nan, + ) + + return TextureGrid( + sand=avg["sand"].astype(np.float32), + silt=avg["silt"].astype(np.float32), + clay=avg["clay"].astype(np.float32), + transform=transform, + crs=SOILGRIDS_TARGET_CRS, + bbox=BoundingBox(*bbox.bounds), + depths=tuple(depths), + ) diff --git a/floodpath/soil/utils.py b/floodpath/soil/utils.py new file mode 100644 index 0000000..16c0e7c --- /dev/null +++ b/floodpath/soil/utils.py @@ -0,0 +1,128 @@ +"""USDA Soil Texture Triangle classification (12 classes). + +The boundaries follow the USDA Soil Survey Manual (1993) and Soil Taxonomy +(1999). Each cell of input (sand%, silt%, clay%) maps to exactly one of +the 12 classes in `USDA_TEXTURE_CLASSES`. Inputs need not sum to exactly +100 — silt is computed implicitly so small (~1%) closure errors from +SoilGrids regressions don't affect the classification. + +Implementation note: the order of clauses matters because the prose +boundaries overlap at endpoints; the resolution below matches the +canonical online USDA Texture Calculator +(https://www.nrcs.usda.gov/resources/education-and-teaching-materials/soil-texture-calculator). +""" + +from __future__ import annotations + +import numpy as np + + +def _ensure_array(x: float | np.ndarray) -> np.ndarray: + """Convert scalar or array input to a float64 numpy array.""" + return np.asarray(x, dtype=np.float64) + + +def usda_texture_class( + sand: float | np.ndarray, + silt: float | np.ndarray, + clay: float | np.ndarray, +) -> str | np.ndarray: + """Classify (sand, silt, clay) percentages into a USDA texture class. + + Args: + sand: Sand fraction, % by mass. Scalar or array. + silt: Silt fraction, % by mass. Same shape as `sand`. + clay: Clay fraction, % by mass. Same shape as `sand`. + + Returns: + If scalar inputs: a string from USDA_TEXTURE_CLASSES. + If array inputs: an object array of strings of the same shape. + """ + s = _ensure_array(sand) + si = _ensure_array(silt) + c = _ensure_array(clay) + scalar = s.ndim == 0 + + # Promote scalars to 1-element arrays so the vectorised logic is uniform. + if scalar: + s = s.reshape(1) + si = si.reshape(1) + c = c.reshape(1) + + out = np.empty(s.shape, dtype=object) + out[:] = "loam" # default; should be overwritten by the rules below + + # Order of evaluation: most specific / clay-heavy first, sand-heavy last. + # Using cumulative `done` mask so later rules don't overwrite earlier + # assignments — mirrors the USDA Texture Calculator decision tree. + done = np.zeros(s.shape, dtype=bool) + + def _assign(name: str, mask: np.ndarray) -> None: + m = mask & ~done + out[m] = name + done[m] = True + + # 1. Clay (clay >= 40, sand <= 45, silt < 40). + _assign("clay", (c >= 40) & (s <= 45) & (si < 40)) + + # 2. Silty clay (silt >= 40, clay >= 40). + _assign("silty_clay", (si >= 40) & (c >= 40)) + + # 3. Sandy clay (sand >= 45, clay >= 35). + _assign("sandy_clay", (s >= 45) & (c >= 35)) + + # 4. Clay loam (clay 27-40, sand 20-45). + _assign("clay_loam", (c >= 27) & (c < 40) & (s > 20) & (s <= 45)) + + # 5. Silty clay loam (silt >= 40, clay 27-40). + _assign("silty_clay_loam", (si >= 40) & (c >= 27) & (c < 40)) + + # 6. Sandy clay loam (sand 45-80, clay 20-35, silt < 28). + _assign( + "sandy_clay_loam", + (s > 45) & (c >= 20) & (c < 35) & (si < 28), + ) + + # 7. Loam (clay 7-27, silt 28-50, sand 23-52). + _assign( + "loam", + (c >= 7) & (c < 27) & (si >= 28) & (si < 50) & (s <= 52), + ) + + # 8. Silt (silt >= 80, clay < 12). + _assign("silt", (si >= 80) & (c < 12)) + + # 9. Silt loam (silt >= 50 AND clay 12-27) OR (silt 50-80 AND clay < 12). + _assign( + "silt_loam", + (((si >= 50) & (c >= 12) & (c < 27)) + | ((si >= 50) & (si < 80) & (c < 12))), + ) + + # 10. Sandy loam — three USDA sub-rules combined: + # a) sand 43-52 AND silt < 50 AND clay 7-20 + # b) sand 52-70 AND clay <= 20 AND silt + 2*clay >= 30 + # c) sand 70-85 AND silt + 2*clay >= 30 + sandy_loam_a = (s >= 43) & (s < 52) & (si < 50) & (c >= 7) & (c < 20) + sandy_loam_b = (s >= 52) & (s < 70) & (c <= 20) & (si + 2 * c >= 30) + sandy_loam_c = (s >= 70) & (s < 85) & (si + 2 * c >= 30) + _assign("sandy_loam", sandy_loam_a | sandy_loam_b | sandy_loam_c) + + # 11. Loamy sand (sand 70-90, clay <= 15, silt + 2*clay < 30 + # AND silt + 1.5*clay >= 15). + _assign( + "loamy_sand", + (s >= 70) & (s < 90) & (c <= 15) + & (si + 2 * c < 30) & (si + 1.5 * c >= 15), + ) + + # 12. Sand (sand >= 85, silt + 1.5*clay < 15). + _assign("sand", (s >= 85) & (si + 1.5 * c < 15)) + + # Anything still unclassified is forced to loam (a textural fallback for + # combinations slightly outside any class boundary due to closure errors). + out[~done] = "loam" + + if scalar: + return str(out[0]) + return out diff --git a/tests/conftest.py b/tests/conftest.py index 0748bd6..17fc0a9 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -16,6 +16,7 @@ from floodpath.hydrology.models import FlowGrid, Hand, StreamNetwork from floodpath.landuse import WORLDCOVER_CLASSES from floodpath.landuse.models import LanduseGrid +from floodpath.soil.models import TextureGrid ROBIT_BATA_STREAM_THRESHOLD: int = 200 ROBIT_BATA_GHSL_FIXTURE: Path = ( @@ -30,6 +31,9 @@ ROBIT_BATA_WORLDCOVER_FIXTURE: Path = ( Path(__file__).resolve().parent / "fixtures" / "robit_bata_worldcover.tif" ) +ROBIT_BATA_SOIL_FIXTURE: Path = ( + Path(__file__).resolve().parent / "fixtures" / "robit_bata_soil.tif" +) ROBIT_BATA_FIXTURE: Path = Path(__file__).resolve().parent / "fixtures" / "robit_bata.tif" @@ -191,6 +195,37 @@ def robit_bata_worldcover() -> LanduseGrid: ) +@pytest.fixture(scope="session") +def robit_bata_soil() -> TextureGrid: + """Load the committed Robit Bata SoilGrids fixture (33x33 cells, ~250 m). + + Multiband float32 GeoTIFF with sand/silt/clay (% by mass) at the 0-30 cm + depth-weighted topsoil composition (default `depths`). Regenerate via + `python tests/fixtures/_generate_robit_bata_soil.py`. + """ + with rasterio.open(ROBIT_BATA_SOIL_FIXTURE) as src: + sand = src.read(1) + silt = src.read(2) + clay = src.read(3) + transform = src.transform + crs = str(src.crs) + bounds = src.bounds + return TextureGrid( + sand=sand, + silt=silt, + clay=clay, + transform=transform, + crs=crs, + bbox=BoundingBox( + min_lon=bounds.left, + min_lat=bounds.bottom, + max_lon=bounds.right, + max_lat=bounds.top, + ), + depths=("0-5cm", "5-15cm", "15-30cm"), + ) + + @pytest.fixture(scope="session") def ne_brazil_dem() -> DEM: """Live DEM at (-5.0, -39.0) with the default buffer. diff --git a/tests/fixtures/_generate_robit_bata_soil.py b/tests/fixtures/_generate_robit_bata_soil.py new file mode 100644 index 0000000..c146a04 --- /dev/null +++ b/tests/fixtures/_generate_robit_bata_soil.py @@ -0,0 +1,79 @@ +"""Regenerate the Robit Bata SoilGrids 2.0 test fixture. + +Run when ISRIC reprocesses or we move test bboxes: + + python tests/fixtures/_generate_robit_bata_soil.py + +Stores sand / silt / clay (% by mass) as a 3-band float32 GeoTIFF at the +0-30 cm depth-weighted topsoil composition (default `depths`). +""" + +import sys +from pathlib import Path + +PROJECT_ROOT = Path(__file__).resolve().parents[2] +if str(PROJECT_ROOT) not in sys.path: + sys.path.insert(0, str(PROJECT_ROOT)) + +import numpy as np +import rasterio + +from floodpath.soil import get_soilgrids_texture +from floodpath.soil.constants import SOILGRIDS_DEFAULT_DEPTHS + +FIXTURE_PATH: Path = Path(__file__).resolve().parent / "robit_bata_soil.tif" +FIXTURE_LAT: float = 11.805 +FIXTURE_LON: float = 37.5625 +FIXTURE_BUFFER_DEG: float = 0.0375 +FIXTURE_DEPTHS: tuple[str, ...] = SOILGRIDS_DEFAULT_DEPTHS + + +def regenerate() -> None: + """Fetch the live SoilGrids patch and overwrite the on-disk fixture.""" + tex = get_soilgrids_texture( + lat=FIXTURE_LAT, + lon=FIXTURE_LON, + buffer_deg=FIXTURE_BUFFER_DEG, + depths=FIXTURE_DEPTHS, + ) + profile = { + "driver": "GTiff", + "height": tex.shape[0], + "width": tex.shape[1], + "count": 3, + "dtype": "float32", + "crs": tex.crs, + "transform": tex.transform, + "compress": "lzw", + "nodata": float("nan"), + } + with rasterio.open(FIXTURE_PATH, "w", **profile) as dst: + dst.write(tex.sand.astype(np.float32), 1) + dst.write(tex.silt.astype(np.float32), 2) + dst.write(tex.clay.astype(np.float32), 3) + dst.set_band_description(1, "sand") + dst.set_band_description(2, "silt") + dst.set_band_description(3, "clay") + size_kb = FIXTURE_PATH.stat().st_size / 1024 + valid = ~np.isnan(tex.sand) + print( + f"Wrote {FIXTURE_PATH} shape={tex.shape} " + f"depths={tex.depths} size={size_kb:.1f} KB" + ) + print( + f"Sand: {np.nanmean(tex.sand):.2f}% mean " + f"({np.nanmin(tex.sand):.2f}-{np.nanmax(tex.sand):.2f})" + ) + print( + f"Silt: {np.nanmean(tex.silt):.2f}% mean " + f"({np.nanmin(tex.silt):.2f}-{np.nanmax(tex.silt):.2f})" + ) + print( + f"Clay: {np.nanmean(tex.clay):.2f}% mean " + f"({np.nanmin(tex.clay):.2f}-{np.nanmax(tex.clay):.2f})" + ) + print(f"Valid cells: {int(valid.sum())}/{tex.sand.size}") + + +if __name__ == "__main__": + regenerate() diff --git a/tests/fixtures/robit_bata_soil.tif b/tests/fixtures/robit_bata_soil.tif new file mode 100644 index 0000000000000000000000000000000000000000..3a36fa6a4e9ceb9a918cc7789473b055b9d1d227 GIT binary patch literal 8570 zcmb7}c{H0_`}ZS4g9wSJq0w{a&Yg%fC1NP5JrNO!F{V~^IK)s@R8gI@Pecqcl~7fB zln5nNR7W}-jWkt7OLcOzhm;<4q{HcaetOpPto6KW{noqQcki|L_1V|HuJ2lVxc}KV zk4J?eVK5jD2Gi1pX)O|Av1$Dm!xve5(f56^Y5&`=v&g#t<{vp2Ed&g{Xijw#Y&`9QJxK#&S_7YmpI)oE(+B*uxfcf7GK`7;F^^ z22(6Xdzvn;@}KJN&Gzu~k4=w?iAs-Js}BI~y!6;4AUP^2b`8k(3h@l)@k4om0U(eT zm6VdW=%j-{>W&`}6Pp&Dx-BJr+m2*#ZCX@vjJw^B-2W#L`~O{JTVneEhX~_;6^Txa z+WQ|7cf0>{c`%q@x2Z>Vqu}TN*nc+Y|I*PN7E3RSS*mJL3;Q)sxy zdhZax#W52mv?%J;=+9^12;_38(|91x6Hwe^j^L0oR<==0V=Y-e~9(iUb z#8N}8PhobYLKr+YGVU$VQxrp)WQumLzRD#S7w0A==dF23PN?Ai8nOG!wjJ@_rmK#`HB$VsX8)-&q}2yHn7CW2*s zm-YV!8u+Ax+xo#`>FZueYVv{JVz~P8s`P?)Wb(`T7LfTDR}rVP@f zst?gcX|k{-JO(9Of7`@9Kn@G$RFiM(yTC6*78YX_En_n)y}*-hyO-Y@Q=UF`t6@u) zvp=@(fPMdny0fidAL9Dz0NK<{s;@kzSDhfmujnqOGS9UpDI$6rTZ4rwKvK}aAW>($ zS-I@i!_kVuoiK@GTlprJD+7r{=Mrc6B;!GAibd$X&Y*__kG&0QHvR6*-!}lWF1`zq z)#-wXSwB%0I>r*X` z{L`1ng8n?}OscZ-w;7!9B8uLPJT2-y~^WCVQpFVGF>o%h8vgq3N$tSC- zq2Mmk0UFTvSYFU&nN-!LKQdvbSR(U#6tG>?J~5_z>Jgmq7wV?4&Pu=( z`DmRaoyai1EEs1>JoF=`9^y@5Kp?U!oit^>1gvoLCd}%Chj2<9DD<&JIJ@OxI?H=H zt3spMHogz1P6e+Ef5gMG*^h1*Z=K+y=S-~&-$>{Tp;sS0+JETNb~69Tmd8B`J!rnj z=58Ff2oR=3h5k|H^=^d@e;*ZzU=sxD1yot%)}ec$OGUzUQn%cdy4*(?^_G|-gGuMO zdv}`F6;uXpTfN$7x~L*O^7W|1hojSDQv<)s|Eca$rTpy4hCX3U-u%l|z9s9*`WzDq zL`48U+<~Iioz{opft=QElg^0X4Z|H?XxQkH1!z}(uvNkc>N!sPsz+rRJnzpkoOEFl z!-H$gVwgF06R&pvX_4-x($9ngXOqg<&z!fbN=>>xK7Sxdab*ua=v-j#<_0gNXi2i% z<3)%K7SfW!nVY&5;{}$I!)Ue3b2G16zV&u-_kx*PI{v{ZN+wiIhGahagdYF@V z7G%t8Rv$0J{@k!@zmI;(6N7pcf!}vl$nKC5z8Vj0IEP z`aEW)`0TeLWjs53=Sxl#MpkNljQdj=+C}BEx+dC3e$#vpC;ooe5fAwOU&`QXqNV=6v6_{+y4Wec2JeW^vsw=MRR-OLUir9?VP)ZrVQ28RM@`Fr#}zUZ5o&U ze$hf6aE91>l-#K8U}DBE&v zZy&`s!B0l(9R^Cuj&|DW@g=73ePXxlbmqv>0%SF=K2=&e($I- z;R|1>lXuaL(^30t*PB(9^+h6EEZ}Xx!0RgC)F`v0jtD*ej^dFi&*E}j+L*&nUa9K! zkDQy-MAH8y?&Q|B%>mXeH=-<;b&gydd8=f!cJ0PuOSk;J6yN%fYZ< zOD-dimpg8`(ppX{C*2il9VI_Z@IG?7FH(XpUDl9eF)8!VT#gd=dMPy(2(@p}MOYV` zl6F`>SFN|>*V|JmYnLHz@FmNZgIn+t60p_kuE*#bA-Jj?cr&j%dk3wYT{U#P)3e)zf*CT@cfDLIN`#*PMOVyQJO#wc=v4;4I~p7zliX*K4ULc%E}4$q>z096#9dZlR-Eq>LTq<3~k4DWXFC( z5!1oO&QLTn$jbzV{UCHOO^|7Et?Hc&|F*5vd3y!L2>shBeOn`QdtzMl@K^s{+=8gi zA4K`~u=gshbd^vqD>3@cmN0GpEp7ZC8lf)?7CmBVm*`|9sI&0Iqv0i)A`fH1gy9=T zm{u)xyYrd|vV;$OVbE-i+zz~Hqqe&#OU>6b0FM$sT!J^>Z>HJ(CH{8+P64T{1Hrnw>!!OiV!N3j&VKNj{;5a~i@CW?z3WXn zY6ywe+$XiFN%d-~PPq*TP18>;fqLz(>sl90Ib0uaV$Et7!I8|jIQjU;5HmVFYne>w zWPC8pD4D^ZF!er0&+2eCx{7{&mJo!eXj86e6kpExeHQtBkwN|`Yk|9)OxgG8C8M;o zwYC_7F&Ob#Bm9<1qwP>rFa#1Y%1DX-DnrP3sMa)4G|D;#JYsZ3fh~4{piV{RuEDpv zPd|;adMa>QaS&OMOfk&Z4+?*WZ?gf9aT4YU(ala8XtieIz9x~238oiN5(N18c3nHc zZ!1KcSQ3W>uDjc!a3WqMHMbp7qR54r(6y`c#u~zH3xeI*HT7rLbi{&6lBWRWgwC|T zyVf?F@)5U|Y8(UdaEu@;^XrNsma9u7n;}w1-xYJBDR*QMma&FdW2G}j_ZzE39*wln zK^cyuf*wWC=wUPwvWOY~LYJ>;w|yYm^G99&w`;}+aOZrpV?dwaw-T|FjSg8^kIT~a z?{)Afl=}AqKZ`1!W-;Ejcrlo4zxaWOCOWwnfOPQ7PMoo8FiR9IR z`x$R&!v&~*Z#R|l{jIa4v$kf*!%JfxFOyjC)UFvo=QDLr^H^vGUn~hU~od>`@0pdZHQD%rE%?&~`_wehP52 z4yAIl#hz6hb|XHBv~V0bCM;P z4X1K|Q8KyL)J>aYXZMG_@YOyM^i?pc%VIY6LSJMq_AGUyGRr}uUMaBh7a+dw$Yiu2 zK>k--oSW|UTHA_DT1t$qK`k(v34|4uvLe28Jc%%+EKx7UP7v*@zf{gwPLa-m7Nxo~ zs;z6w+5UImzzlqv50jmB!gmR9$@LlQ;j+EcC-_cwY?Wl<)f(W=I`5IP|vjKM&k&6YT_X@;d=Qd_V^zf)CNbD+o3kl)0v)yvP(-v zYvhE``w(u`R_h|;>=y3_ZlrqsHRqndX-*=~<}WgoU43YG{o|CLxL($*Sm4TT#-u3+ z;$Fm2^mNjfZ1$-eLMgA3TjkPC!iR@7<5XOb?r%};~G!WmVLHIj# zFfNkm70z!hsGMkEiwI@>H0l?nl>E?o{T({sVa$nU5U%g9d}C~{!UExBz&FE6X&324 z-!+SenY;h4V3GSb*^1s#^rrojR#vkP`wN7dWoi#_)?cvTCN|l8vga71nj||ODhR23f}wlPR(ELnvUdcQ6c{Ge zZQnC*tx@d*`m0Ga0rMOo)CPJ}soEL|d!1UJ@j;`5?HSCz|JH6pFj0K>Fx)j;ndG3^ zXctKoN9%-W_!F3Gz_plk}0U<@v%Ppv7y}_M=M`QL6$jfZ}MZE@6PE% zXie=}Gbx0!s(+JffBpAhcu6Cu?x}kP{Bn z_}42N@bNO2%OdD^ldl+qKJrkAi8A=0`OqI;b^eXT{x+MouoR(MVIslIy!cDDd~3yo zg1YW=n32UVEMcS4lqJYoa_y3M@dLaghMALXjgqDNru^&zOnvvE#^jV`f%`v?a2rt` zAW4*@vWtCZYq5#pc@Xfns_c!dvbKz{*tA_caj(!TyZ`zXjiGq&UvZOFP&ga8`U79J z@`?ztjZ{mlsYPotAXq7s!LZ1ivRwk#?`npb=bgM!^sd#-e$Z;m%PG6GKe*S%uiX+~ zB1XQ3!pua_!yG@;u(WpgKn+Z?_t}mDRsR*13+KG+na?HAt;_dhA6A7wDT7}v49d4x z2~P&7?P7m-?-2ul5DY@H^o;YL0Ec}VHJGXa5zth=veRI1cO=1TXvVxCw$mfeJm2^F zcnXLqU=~5v`@0G76c27`AA>F$jC0MWo(l@kaRSu8PGOAm8-E7DhITg~U&bl~p4MS; z+$vUi{giGbMjz*+XxhH`Fow#3L+XGJbe#;t$UH(mo9m_CVBkYuXgyaH_!HZF=|S$lO0LW{vP^N`VxOiL_Z-0t}}v9=cZ@;++p1XARfdbv$mAcAt|s^6ZL zJPO^tmRXQ_%!cbwQEW3KJzMO;HExn)Zgy4ea}lI|w!~&L_l?w? z$W9a90|I4`^JVY1xnr{1$X4ocJCgzls-=Qkyyh^x`VlcexyR1ga3r7zR6g&Y1Owe1 zzsTn-@ws%PZtf(3HCO5!^y>G5Ec+!p`;NM?z-h#_`&gRSFxSy9X9XAAeQYJQKbF*i zWjLO))|WYUKt%~DZS4cvma^ZJP+X@iS2}mnM(^AU`Nq29X4yy|Cp|Qz z`b~dF;Mj_*#|$Ni0FPlvm`roFt}{*PhCrks79?Cm*xE1@xUd&+9!M zVr?ibY%h0`;wG&RxouoWw=CEhwZ2VPma|%~r{|Q~$sca-F$_k0AT^72A_=U^0i%SW z;E3bEEh%jXns>UgXH6ZvedopPjE%&N80=@NCO1af6D?}{1v-K2M4YVe4=xy%msJ}Y zH-tWiovh<^7A`-$-uQI_;H%)Cec$}*pab{jj!hU}!m*lS(uJ+e&!@ZI?hEsegafjh z@tr+6#6oZ&hpM4VoS*pyPdFIyDsp9tPft~6D5r0RJ~%|Y%a6Q~HmpYIMY`Lwb-+@!sdT(X}CwOhs5;vjihT-0utWBiuqB zbLymx`VwA1S(!}@=7P=Hb7&zjps5Jn0vZl4wN`DQ;6h@&M0+zLN-rN}OS$I^vFP1! zpay%F^9ipY?D{kRWCfg-AW3k5X5>9K_cu+mMVJuzo69Dm#96H3TZDLWnKb?2Z^stA z_Y%9H%O;5q23kQjjp>y~L;<0a)i)gAFGjKHl{|x|nr?v#uAdKTt zcUvHyJOW=T8}KZv`IJd2m9wRN%Z+fl@%=h~iwJiz`l9q>lYWg}E@n&6^Y0Vn$C^xf zJ0dL=OIg;PF$9YOYAGEI6Hh!ghua(Qq{f6B#|o)fyaeeWv>JSqwG0ece5*?$HO&^nM~Er+uxvk0neQd8WPJB(<|}DGh0WGD9{rigK8ze0C7tb}8e4NvH$M6A z$~p>_U#DHQ={!tq+qAms;lu_j>5#iuu!D32>2*kgl|Yo1rMw=DpE0iDrr zfn`P0MPbpxygRNPI|$<^q^T}EO9|GgTs{#Z=<8n>8RtF8v%luTU>|Vd?m(%&kW3Fw5 z^n6!#*EU*?&2tP?^we*59vCLqU3@DH8_|=`UQZ6A}z6Uolta;YatsG?8DC>rc21# zY5dTV(I?68bh5fZkrEbCq&~I?vskNwYYV2ElmrC8Ev+Ri;61Dq7@PO}`DLJKgawGE z9vZ#G=;UtZO&$mRT8l%De6lOj3boe3%bKfQ@=K@}+?$%qYm{8`zPVq4TaI1PXO# zA|3FFOm6WpuiL5R(}rZjEd6_9tEzC7L&D%H?_=naFWxrEg6NOZ5n7C z@lEF1KmQBJ+87Po&d+wxOa-3?GPrADsqz+lPgLnh*)KuX7eZKLVyoQMrO zE+_&TtB!TZ1w3E9rz1^Tk8#uR(!+KeDyT*5vsH+?K>fvspJ(E*UrM*G5W;VAzIYf$ zTzoj4(zDxEZs3|Bh?x_!$R2XNWpfv$^L|y?E_x34!gy3V*Ywv43Yq;FOR|{FO z#<-No=_$NjZjCH0Z*XIt3^IFw8kSGhQlyl6h|HR^)^kDTkL)5wa+Q!5S?TWe42ED4 zII=Os;`^a0k!@ z;AE+_I2q^-C-h8>5QQm9+``VaM0qvAS21sw;02FSMS7XkGvwA)c2o>50p_C!wKhh7 z%~l;sX8zUSc1Lj--P=Dm+1=wA^Q?{P&VMB#!Xgf-uk_=&qA8-MO9NyHy&{{%ORvdj z#ZjRSuj!}QR#pR-8({gV+&4r|jahw$taJ`$R2P#1f4nt$Z95tCa@(YN3Dq;6Pk<}w zFppji$otB+<3g`(O}QgPZ+Sf&g%yg1rgA~1kefg9PK4vh9PMRXD`M_ivC_S>%38ai zSHv?DIj#~p5=8}-8vJTFYJVi#UDS62c(oHYz=B7tEp#DvZ~@MoMP53>>nW+n%V+{@ z$-1N!)V(^vlB+CW+oi1ZKE4cIu(BuLr)+>xG=bpn;W1F`6K%FE{GIr68=~slM83mI z6fIX}qg-e#9J4ubfASFakHijABdw(G`^03%B*S^1CJ`7$*}-Xz#p@iTe4 None: + assert len(USDA_TEXTURE_CLASSES) == 12 + # Names in the official USDA Soil Survey Manual order. + assert set(USDA_TEXTURE_CLASSES) == { + "sand", "loamy_sand", "sandy_loam", + "loam", "silt_loam", "silt", + "sandy_clay_loam", "clay_loam", "silty_clay_loam", + "sandy_clay", "silty_clay", "clay", + } + + def test_every_texture_class_has_an_hsg(self) -> None: + # NEH 630 Ch7 §630.0701 prose -> a textbook HSG for each class. + assert set(USDA_TEXTURE_TO_HSG.keys()) == set(USDA_TEXTURE_CLASSES) + + def test_each_hsg_appears_at_least_once(self) -> None: + # All four NEH groups must be reachable from at least one texture. + assert set(USDA_TEXTURE_TO_HSG.values()) == set(HSG_CLASSES) + + def test_clay_textures_are_d(self) -> None: + # The three clayey classes always map to D per §630.0701. + for cls in ("sandy_clay", "silty_clay", "clay"): + assert USDA_TEXTURE_TO_HSG[cls] == "D" + + def test_sand_and_loamy_sand_are_a(self) -> None: + # The most permeable classes always map to A. + assert USDA_TEXTURE_TO_HSG["sand"] == "A" + assert USDA_TEXTURE_TO_HSG["loamy_sand"] == "A" + + +class TestHsgEncoding: + def test_letter_to_int_round_trip(self) -> None: + for letter in HSG_CLASSES: + assert INT_TO_HSG[HSG_TO_INT[letter]] == letter + + def test_zero_is_reserved_for_nodata(self) -> None: + # 0 must not be assigned to any letter so categorical rasters can + # use it as a nodata sentinel. + assert 0 not in HSG_TO_INT.values() + + def test_int_codes_are_one_through_four(self) -> None: + assert sorted(HSG_TO_INT.values()) == [1, 2, 3, 4] diff --git a/tests/soil/test_fixture_regression.py b/tests/soil/test_fixture_regression.py new file mode 100644 index 0000000..66f3300 --- /dev/null +++ b/tests/soil/test_fixture_regression.py @@ -0,0 +1,44 @@ +"""Verify the committed SoilGrids fixture still matches a live ISRIC fetch. + +Mirrors the other module fixture regression tests: the single network-bound +contract guarding the offline soil tests. If ISRIC reprocesses SoilGrids +2.0 and the fixture drifts, this test fails first. +""" + +import numpy as np +import pytest + +from floodpath.soil import TextureGrid, get_soilgrids_texture +from tests.fixtures._generate_robit_bata_soil import ( + FIXTURE_BUFFER_DEG, + FIXTURE_DEPTHS, + FIXTURE_LAT, + FIXTURE_LON, +) + + +@pytest.mark.integration +class TestFixtureRegression: + def test_live_fetch_matches_committed_fixture( + self, + robit_bata_soil: TextureGrid, + ) -> None: + live = get_soilgrids_texture( + lat=FIXTURE_LAT, + lon=FIXTURE_LON, + buffer_deg=FIXTURE_BUFFER_DEG, + depths=FIXTURE_DEPTHS, + ) + assert live.shape == robit_bata_soil.shape + assert live.depths == robit_bata_soil.depths + # SoilGrids predictions are deterministic for a fixed product + # version, so values should round-trip exactly within float32. + np.testing.assert_allclose( + live.sand, robit_bata_soil.sand, rtol=0, atol=1e-3, equal_nan=True + ) + np.testing.assert_allclose( + live.silt, robit_bata_soil.silt, rtol=0, atol=1e-3, equal_nan=True + ) + np.testing.assert_allclose( + live.clay, robit_bata_soil.clay, rtol=0, atol=1e-3, equal_nan=True + ) diff --git a/tests/soil/test_hsg.py b/tests/soil/test_hsg.py new file mode 100644 index 0000000..20611d1 --- /dev/null +++ b/tests/soil/test_hsg.py @@ -0,0 +1,102 @@ +"""Unit tests for soil.hsg (texture_to_hsg).""" + +import numpy as np +import pytest +from rasterio.transform import Affine + +from floodpath.dem.models import BoundingBox +from floodpath.soil import HSG_TO_INT, TextureGrid, texture_to_hsg + + +def _toy_texture(sand: list[list[float]], + silt: list[list[float]], + clay: list[list[float]]) -> TextureGrid: + sand_a = np.array(sand, dtype=np.float32) + silt_a = np.array(silt, dtype=np.float32) + clay_a = np.array(clay, dtype=np.float32) + return TextureGrid( + sand=sand_a, + silt=silt_a, + clay=clay_a, + transform=Affine.translation(0, 0) * Affine.scale(0.001, -0.001), + crs="EPSG:4326", + bbox=BoundingBox(0.0, 0.0, 0.001 * sand_a.shape[1], 0.001 * sand_a.shape[0]), + depths=("0-5cm", "5-15cm", "15-30cm"), + ) + + +class TestTextureToHsg: + def test_canonical_quartet(self) -> None: + # Each cell = a textbook centroid for one HSG class. + # (sand, silt, clay) -> texture class -> HSG + # (92, 5, 3) -> sand -> A (1) + # (40, 40, 20) -> loam -> B (2) + # (30, 35, 35) -> clay_loam -> C (3) + # (10, 45, 45) -> silty_clay -> D (4) + tex = _toy_texture( + sand=[[92, 40], [30, 10]], + silt=[[ 5, 40], [35, 45]], + clay=[[ 3, 20], [35, 45]], + ) + hsg = texture_to_hsg(tex) + np.testing.assert_array_equal( + hsg.values, + np.array([[1, 2], [3, 4]], dtype=np.uint8), + ) + + def test_metadata_propagated(self) -> None: + tex = _toy_texture( + sand=[[40]], silt=[[40]], clay=[[20]], + ) + hsg = texture_to_hsg(tex) + assert hsg.transform == tex.transform + assert hsg.crs == tex.crs + assert hsg.bbox == tex.bbox + assert hsg.depths == tex.depths + + def test_dtype_is_uint8(self) -> None: + tex = _toy_texture(sand=[[40]], silt=[[40]], clay=[[20]]) + hsg = texture_to_hsg(tex) + assert hsg.values.dtype == np.uint8 + + def test_custom_mapping_overrides_defaults(self) -> None: + # Force every loam cell to D (used to model "compacted loam" sites). + tex = _toy_texture(sand=[[40]], silt=[[40]], clay=[[20]]) + hsg = texture_to_hsg(tex, mapping={"loam": "D"}) + # All other texture classes will fall to nodata since they're not in + # the override mapping; that's the expected behaviour for a strict + # custom table. + assert hsg.values[0, 0] == HSG_TO_INT["D"] + + def test_invalid_hsg_letter_raises(self) -> None: + tex = _toy_texture(sand=[[40]], silt=[[40]], clay=[[20]]) + with pytest.raises(ValueError, match="not a valid HSG letter"): + texture_to_hsg(tex, mapping={"loam": "X"}) + + +class TestPropertyClayDominanceImpliesD: + """Any cell with >=45% clay AND silt-dominant texture lands in D.""" + + def test_silty_clay_is_d(self) -> None: + tex = _toy_texture(sand=[[10]], silt=[[45]], clay=[[45]]) + hsg = texture_to_hsg(tex) + assert hsg.values[0, 0] == HSG_TO_INT["D"] + + def test_clay_is_d(self) -> None: + tex = _toy_texture(sand=[[10]], silt=[[20]], clay=[[70]]) + hsg = texture_to_hsg(tex) + assert hsg.values[0, 0] == HSG_TO_INT["D"] + + +class TestPropertySandDominanceImpliesA: + """Any cell that classifies to `sand` or `loamy_sand` lands in A.""" + + def test_pure_sand_is_a(self) -> None: + tex = _toy_texture(sand=[[100]], silt=[[0]], clay=[[0]]) + hsg = texture_to_hsg(tex) + assert hsg.values[0, 0] == HSG_TO_INT["A"] + + def test_loamy_sand_is_a(self) -> None: + tex = _toy_texture(sand=[[82]], silt=[[12]], clay=[[6]]) + hsg = texture_to_hsg(tex) + assert hsg.values[0, 0] == HSG_TO_INT["A"] diff --git a/tests/soil/test_models.py b/tests/soil/test_models.py new file mode 100644 index 0000000..9f5866a --- /dev/null +++ b/tests/soil/test_models.py @@ -0,0 +1,91 @@ +"""Unit tests for soil dataclasses (TextureGrid, HSGGrid).""" + +import numpy as np +import pytest +from rasterio.transform import Affine + +from floodpath.dem.models import BoundingBox +from floodpath.soil import HSGGrid, TextureGrid + + +def _toy_texture(sand: np.ndarray, silt: np.ndarray, clay: np.ndarray) -> TextureGrid: + return TextureGrid( + sand=sand.astype(np.float32), + silt=silt.astype(np.float32), + clay=clay.astype(np.float32), + transform=Affine.translation(0, 0) * Affine.scale(0.001, -0.001), + crs="EPSG:4326", + bbox=BoundingBox(0.0, 0.0, 0.001 * sand.shape[1], 0.001 * sand.shape[0]), + depths=("0-5cm", "5-15cm", "15-30cm"), + ) + + +def _toy_hsg(values: np.ndarray) -> HSGGrid: + return HSGGrid( + values=values.astype(np.uint8), + transform=Affine.translation(0, 0) * Affine.scale(0.001, -0.001), + crs="EPSG:4326", + bbox=BoundingBox(0.0, 0.0, 0.001 * values.shape[1], 0.001 * values.shape[0]), + depths=("0-5cm", "5-15cm", "15-30cm"), + ) + + +class TestTextureGrid: + def test_shape_property(self) -> None: + tex = _toy_texture( + np.full((4, 4), 60.0), + np.full((4, 4), 25.0), + np.full((4, 4), 15.0), + ) + assert tex.shape == (4, 4) + + def test_closure_residual_zero_when_perfect(self) -> None: + tex = _toy_texture( + np.full((2, 2), 50.0), + np.full((2, 2), 30.0), + np.full((2, 2), 20.0), + ) + assert tex.closure_residual().max() == pytest.approx(0.0) + + def test_closure_residual_flags_imbalance(self) -> None: + # SoilGrids regressions can drift by ~1-2 pp; the residual surfaces it. + tex = _toy_texture( + np.full((2, 2), 50.0), + np.full((2, 2), 30.0), + np.full((2, 2), 22.0), # sum = 102 + ) + assert tex.closure_residual().max() == pytest.approx(2.0) + + +class TestHSGGrid: + def test_class_counts_skips_nodata(self) -> None: + # 1=A, 2=B, 3=C, 4=D, 0=nodata + hsg = _toy_hsg(np.array([[1, 2, 0], [3, 4, 1]])) + counts = hsg.class_counts() + assert counts == {"A": 2, "B": 1, "C": 1, "D": 1} + + def test_fraction(self) -> None: + hsg = _toy_hsg(np.array([[1, 1], [2, 3]])) + assert hsg.fraction("A") == pytest.approx(0.5) + assert hsg.fraction("B") == pytest.approx(0.25) + assert hsg.fraction("D") == 0.0 + + def test_dominant_class(self) -> None: + hsg = _toy_hsg(np.array([[3, 3, 3], [3, 4, 4]])) + assert hsg.dominant_class() == "C" + + def test_dominant_class_raises_on_empty(self) -> None: + hsg = _toy_hsg(np.zeros((2, 2))) # all nodata + with pytest.raises(ValueError, match="no classified cells"): + hsg.dominant_class() + + def test_as_letter_grid(self) -> None: + hsg = _toy_hsg(np.array([[1, 2], [3, 4]])) + letters = hsg.as_letter_grid() + np.testing.assert_array_equal(letters, np.array([["A", "B"], ["C", "D"]])) + + def test_as_letter_grid_nodata_is_empty(self) -> None: + hsg = _toy_hsg(np.array([[1, 0]])) + letters = hsg.as_letter_grid() + assert letters[0, 0] == "A" + assert letters[0, 1] == "" diff --git a/tests/soil/test_soilgrids.py b/tests/soil/test_soilgrids.py new file mode 100644 index 0000000..0e05ff5 --- /dev/null +++ b/tests/soil/test_soilgrids.py @@ -0,0 +1,119 @@ +"""Unit + integration tests for soil.soilgrids.""" + +import numpy as np +import pytest + +from floodpath.soil import ( + SOILGRIDS_DEFAULT_DEPTHS, + TextureGrid, + get_soilgrids_texture, + texture_to_hsg, +) +from floodpath.soil.soilgrids import soilgrids_vrt_url + + +class TestUrlBuilder: + def test_default_statistic_is_mean(self) -> None: + url = soilgrids_vrt_url("sand", "5-15cm") + assert url.endswith("sand_5-15cm_mean.vrt") + + def test_custom_statistic(self) -> None: + url = soilgrids_vrt_url("clay", "0-5cm", statistic="Q0.5") + assert url.endswith("clay_0-5cm_Q0.5.vrt") + + def test_invalid_depth_raises(self) -> None: + with pytest.raises(ValueError, match="depth must be one of"): + soilgrids_vrt_url("sand", "999cm") + + +class TestGetSoilgridsOffline: + def test_empty_depths_raises(self) -> None: + with pytest.raises(ValueError, match="at least one slice"): + get_soilgrids_texture(11.805, 37.5625, depths=()) + + def test_invalid_depth_raises(self) -> None: + with pytest.raises(ValueError, match="not in published SoilGrids"): + get_soilgrids_texture(11.805, 37.5625, depths=("999cm",)) + + +class TestRobitBataSoilFixture: + """Pinned values for the committed Robit Bata SoilGrids fixture.""" + + def test_shape_and_metadata(self, robit_bata_soil: TextureGrid) -> None: + # 250 m SoilGrids cells over a 0.075 deg patch -> ~33x33. + assert robit_bata_soil.shape == (33, 33) + assert robit_bata_soil.depths == ("0-5cm", "5-15cm", "15-30cm") + assert robit_bata_soil.crs == "EPSG:4326" + + def test_texture_means_pinned(self, robit_bata_soil: TextureGrid) -> None: + # Pinned: 0-30 cm topsoil composition for the Robit Bata bbox + # (Vertisol-rich Ethiopian highlands -> very clay-heavy). + assert np.nanmean(robit_bata_soil.sand) == pytest.approx(15.40, abs=0.05) + assert np.nanmean(robit_bata_soil.silt) == pytest.approx(30.85, abs=0.05) + assert np.nanmean(robit_bata_soil.clay) == pytest.approx(53.75, abs=0.05) + + def test_clay_dominates(self, robit_bata_soil: TextureGrid) -> None: + # The patch sits in a Vertisol belt — clay must dominate the + # texture composition. + assert np.nanmean(robit_bata_soil.clay) > np.nanmean(robit_bata_soil.silt) + assert np.nanmean(robit_bata_soil.clay) > np.nanmean(robit_bata_soil.sand) + + def test_closure_within_tolerance(self, robit_bata_soil: TextureGrid) -> None: + # SoilGrids regressions can drift; max closure residual stays small. + residual = robit_bata_soil.closure_residual() + assert np.nanmax(residual) < 1.0 + + def test_valid_cell_count_pinned(self, robit_bata_soil: TextureGrid) -> None: + # Pinned: 1088/1089 cells valid (one edge cell is nodata after the + # Goode -> WGS84 reprojection corner). + valid = (~np.isnan(robit_bata_soil.sand)).sum() + assert int(valid) == 1088 + + def test_hsg_is_uniformly_d(self, robit_bata_soil: TextureGrid) -> None: + # With ~54% mean clay, every classified cell falls in HSG D + # (clayey textures per NEH 630 Ch7 §630.0701). + hsg = texture_to_hsg(robit_bata_soil) + counts = hsg.class_counts() + assert set(counts.keys()) == {"D"} + assert counts["D"] == 1088 + + +@pytest.mark.integration +class TestGetSoilgridsIntegration: + """Live ISRIC fetch on a small bbox. + + Uses a tiny patch over Djibouti to keep the integration footprint small + while exercising the full /vsicurl/ + WarpedVRT + parallel fetch path. + """ + + def test_djibouti_default_depths_returns_texture(self) -> None: + tex = get_soilgrids_texture( + lat=11.59, + lon=43.15, + buffer_deg=0.05, + depths=SOILGRIDS_DEFAULT_DEPTHS, + ) + assert tex.depths == SOILGRIDS_DEFAULT_DEPTHS + assert tex.crs == "EPSG:4326" + # Some cells must have valid texture predictions. + assert (~np.isnan(tex.sand)).any() + # Closure: where data exists, sand+silt+clay should sum near 100. + valid = ( + ~np.isnan(tex.sand) + & ~np.isnan(tex.silt) + & ~np.isnan(tex.clay) + ) + residual = np.abs( + tex.sand[valid] + tex.silt[valid] + tex.clay[valid] - 100.0 + ) + assert residual.max() < 5.0 # SoilGrids regressions drift up to a few % + + def test_single_depth_works(self) -> None: + tex = get_soilgrids_texture( + lat=11.59, + lon=43.15, + buffer_deg=0.05, + depths=("5-15cm",), + ) + assert tex.depths == ("5-15cm",) + assert tex.shape[0] > 0 and tex.shape[1] > 0 diff --git a/tests/soil/test_utils.py b/tests/soil/test_utils.py new file mode 100644 index 0000000..31e90b4 --- /dev/null +++ b/tests/soil/test_utils.py @@ -0,0 +1,98 @@ +"""Unit tests for the USDA texture-triangle classifier.""" + +import numpy as np +import pytest + +from floodpath.soil import ( + USDA_TEXTURE_CLASSES, + USDA_TEXTURE_TO_HSG, + usda_texture_class, +) + + +# Textbook centroid (sand, silt, clay) for each of the 12 USDA classes, +# from the NRCS online texture calculator. These are well inside their +# respective polygons in the texture triangle and should round-trip exactly. +CANONICAL_CENTROIDS: tuple[tuple[str, int, int, int], ...] = ( + ("sand", 92, 5, 3), + ("loamy_sand", 82, 12, 6), + ("sandy_loam", 65, 25, 10), + ("loam", 40, 40, 20), + ("silt_loam", 20, 65, 15), + ("silt", 10, 85, 5), + ("sandy_clay_loam", 60, 10, 30), + ("clay_loam", 30, 35, 35), + ("silty_clay_loam", 10, 55, 35), + ("sandy_clay", 50, 5, 45), + ("silty_clay", 10, 45, 45), + ("clay", 20, 20, 60), +) + + +class TestScalarCentroids: + @pytest.mark.parametrize("expected,sand,silt,clay", CANONICAL_CENTROIDS) + def test_each_centroid_classifies_correctly( + self, + expected: str, + sand: int, + silt: int, + clay: int, + ) -> None: + assert usda_texture_class(sand, silt, clay) == expected + + +class TestVectorisedClassifier: + def test_array_input_returns_array(self) -> None: + sand = np.array([s for _, s, _, _ in CANONICAL_CENTROIDS]) + silt = np.array([si for _, _, si, _ in CANONICAL_CENTROIDS]) + clay = np.array([c for _, _, _, c in CANONICAL_CENTROIDS]) + out = usda_texture_class(sand, silt, clay) + assert out.shape == sand.shape + for cls, _, _, _ in CANONICAL_CENTROIDS: + assert cls in out + + def test_2d_grid(self) -> None: + # 2x2 grid: pure sand, loam, clay, silty clay. + sand = np.array([[100, 40], [ 20, 10]], dtype=float) + silt = np.array([[ 0, 40], [ 20, 45]], dtype=float) + clay = np.array([[ 0, 20], [ 60, 45]], dtype=float) + out = usda_texture_class(sand, silt, clay) + assert out.shape == (2, 2) + assert out[0, 0] == "sand" + assert out[0, 1] == "loam" + assert out[1, 0] == "clay" + assert out[1, 1] == "silty_clay" + + +class TestEdgeCases: + def test_pure_sand(self) -> None: + assert usda_texture_class(100, 0, 0) == "sand" + + def test_pure_clay(self) -> None: + assert usda_texture_class(0, 0, 100) == "clay" + + def test_pure_silt(self) -> None: + # 100% silt: silt >= 80 AND clay < 12 -> "silt". + assert usda_texture_class(0, 100, 0) == "silt" + + def test_unclassifiable_falls_back_to_loam(self) -> None: + # An (off-triangle) input with all rules unmet falls through to loam. + # E.g. a clearly-impossible combo of moderate sand + huge silt. + # Pick (40, 35, 5) — barely outside loam (silt < 28) and outside + # silt loam (silt < 50) and outside sandy loam rule (sand < 43). + cls = usda_texture_class(40, 35, 5) + # The point should map to *some* defined class — fallback ensures + # we never return None or a non-USDA name. + assert cls in USDA_TEXTURE_CLASSES + + +class TestEveryClassIsReachable: + """Smoke check: each USDA class can be produced by some plausible input.""" + + def test_every_class_appears_in_centroid_set(self) -> None: + seen = {expected for expected, *_ in CANONICAL_CENTROIDS} + assert seen == set(USDA_TEXTURE_CLASSES) + + def test_centroid_hsgs_span_all_four_groups(self) -> None: + hsgs = {USDA_TEXTURE_TO_HSG[expected] for expected, *_ in CANONICAL_CENTROIDS} + assert hsgs == {"A", "B", "C", "D"}