diff --git a/README.md b/README.md index c57a883..e3fe758 100644 --- a/README.md +++ b/README.md @@ -71,7 +71,8 @@ print(f"Total damaged built-up: {damage.values.sum():,.0f} m²") | `floodpath.dem` | Copernicus GLO-30 (AWS Open Data, COG) | Elevation patch around any (lat, lon) | | `floodpath.hydrology` | derived from DEM via `pyflwdir` | Flow direction + accumulation, stream networks (with Strahler order), basin delineation, HAND | | `floodpath.exposure` | GHSL R2023A, WorldPop, OpenStreetMap (Overpass) | Built-up surface, population, building footprints | -| `floodpath.landuse` | ESA WorldCover (10 m, AWS Open Data, COG) | 11-class land-cover raster (2020 v100, 2021 v200) | +| `floodpath.landuse` | ESA WorldCover (10 m, AWS Open Data, COG) | 11-class land-cover raster (2020 v100, 2021 v200), Manning's roughness derivation | +| `floodpath.soil` | ISRIC SoilGrids 2.0 (250 m, COG) | Sand/silt/clay topsoil composition + USDA texture-triangle classification + NEH 630 Ch7 hydrologic soil group (A/B/C/D) | | `floodpath.damage` | JRC Huizinga 2017 + DEM/HAND/GHSL | Per-cell flood depth and damage in m² of built-up surface | ## Depth-damage curves diff --git a/floodpath/soil/__init__.py b/floodpath/soil/__init__.py new file mode 100644 index 0000000..ddb135a --- /dev/null +++ b/floodpath/soil/__init__.py @@ -0,0 +1,32 @@ +"""Soil module: hydrologic soil properties from SoilGrids 2.0. + +Provides the SOIL half of the SCS-CN runoff parameterisation. Pair with +`floodpath.landuse` to derive a Curve Number raster. +""" + +from .constants import ( + HSG_CLASSES, + HSG_TO_INT, + INT_TO_HSG, + SOILGRIDS_DEFAULT_DEPTHS, + USDA_TEXTURE_CLASSES, + USDA_TEXTURE_TO_HSG, +) +from .hsg import texture_to_hsg +from .models import HSGGrid, TextureGrid +from .soilgrids import get_soilgrids_texture +from .utils import usda_texture_class + +__all__ = [ + "HSGGrid", + "HSG_CLASSES", + "HSG_TO_INT", + "INT_TO_HSG", + "SOILGRIDS_DEFAULT_DEPTHS", + "TextureGrid", + "USDA_TEXTURE_CLASSES", + "USDA_TEXTURE_TO_HSG", + "get_soilgrids_texture", + "texture_to_hsg", + "usda_texture_class", +] diff --git a/floodpath/soil/constants.py b/floodpath/soil/constants.py new file mode 100644 index 0000000..7b097af --- /dev/null +++ b/floodpath/soil/constants.py @@ -0,0 +1,122 @@ +"""Constants for the soil module.""" + +from types import MappingProxyType + +# --- ISRIC SoilGrids 2.0 (250 m global, COG via WebDAV) -------------------- +# +# VRT files reference the underlying COG tiles in Goode Homolosine; GDAL's +# /vsicurl/ + a WarpedVRT projects them to WGS84 on the fly. The bucket +# honours HTTP range requests, so a window read is cheap. + +SOILGRIDS_BASE_URL: str = "https://files.isric.org/soilgrids/latest/data" +SOILGRIDS_NATIVE_CRS: str = ( + 'PROJCRS["unknown",BASEGEOGCRS["unknown",DATUM["unknown",' + 'ELLIPSOID["WGS 84",6378137,298.257223563,LENGTHUNIT["metre",1]]],' + 'PRIMEM["Greenwich",0,ANGLEUNIT["degree",0.0174532925199433]]],' + 'CONVERSION["unknown",METHOD["Interrupted Goode Homolosine"],' + 'PARAMETER["Longitude of natural origin",0,ANGLEUNIT["degree",0.0174532925199433]],' + 'PARAMETER["False easting",0,LENGTHUNIT["metre",1]],' + 'PARAMETER["False northing",0,LENGTHUNIT["metre",1]]],' + 'CS[Cartesian,2],AXIS["(E)",east,ORDER[1],LENGTHUNIT["metre",1]],' + 'AXIS["(N)",north,ORDER[2],LENGTHUNIT["metre",1]]]' +) +SOILGRIDS_TARGET_CRS: str = "EPSG:4326" + +# SoilGrids stores sand/silt/clay as integer g/kg (0-1000). Multiply by 0.1 +# to get percent by mass (0-100). +SOILGRIDS_TEXTURE_SCALE: float = 0.1 + +# SoilGrids nodata sentinel (int16). Cells with this value have no +# prediction (oceans, ice, glaciers, no-cover land). Must be masked +# *before* applying the scale factor or it pollutes downstream stats. +SOILGRIDS_NODATA: int = -32768 + +# Standard depth slices (in cm) published by SoilGrids 2.0. +SOILGRIDS_DEPTHS: tuple[str, ...] = ( + "0-5cm", + "5-15cm", + "15-30cm", + "30-60cm", + "60-100cm", + "100-200cm", +) +# Default depth-weighted topsoil window (0-30 cm). Closest available proxy +# for the 0-50 cm reference depth used in NEH 630 Ch7 Table 7-1. +SOILGRIDS_DEFAULT_DEPTHS: tuple[str, ...] = ("0-5cm", "5-15cm", "15-30cm") +# Width in cm of each depth slice — used as weights when averaging. +SOILGRIDS_DEPTH_WIDTHS_CM: MappingProxyType = MappingProxyType({ + "0-5cm": 5, + "5-15cm": 10, + "15-30cm": 15, + "30-60cm": 30, + "60-100cm": 40, + "100-200cm": 100, +}) + +SOILGRIDS_BUFFER_DEG: float = 0.1 +SOILGRIDS_TEXTURE_UNITS: str = "% by mass" + + +# --- USDA Soil Texture Triangle (12 classes) ------------------------------- +# +# Sand / silt / clay in % by mass. Boundaries follow the Soil Survey +# Manual (1993) and Soil Taxonomy 2nd ed. (1999). + +USDA_TEXTURE_CLASSES: tuple[str, ...] = ( + "sand", + "loamy_sand", + "sandy_loam", + "loam", + "silt_loam", + "silt", + "sandy_clay_loam", + "clay_loam", + "silty_clay_loam", + "sandy_clay", + "silty_clay", + "clay", +) + + +# --- USDA Texture class -> NEH 630 Ch7 Hydrologic Soil Group --------------- +# +# Source: NRCS National Engineering Handbook Part 630 Chapter 7 +# (Hydrologic Soil Groups, January 2009), §630.0701 textural criteria +# in conjunction with §630.0701 prose ("typical" assignment for soils +# with no shallow impermeable layer or high water table). +# +# §630.0701 group definitions: +# A <10% clay AND >90% sand or gravel; gravel/sand textures. +# B 10-20% clay AND 50-90% sand; loamy sand or sandy loam textures. +# C 20-40% clay AND <50% sand; loam, silt loam, sandy clay loam, +# clay loam, silty clay loam. +# D >40% clay AND <50% sand; clayey textures (clay, silty clay, +# sandy clay). +# +# A handful of texture classes straddle the prose ranges; we follow the +# most-cited consensus mapping used in SWAT, HEC-HMS, and the Kalyanapu / +# Yu / et-al. global SCS-CN literature. + +USDA_TEXTURE_TO_HSG: MappingProxyType = MappingProxyType({ + "sand": "A", + "loamy_sand": "A", + "sandy_loam": "B", + "loam": "B", + "silt_loam": "B", + "silt": "B", + "sandy_clay_loam": "C", + "clay_loam": "C", + "silty_clay_loam": "C", + "sandy_clay": "D", + "silty_clay": "D", + "clay": "D", +}) + +HSG_CLASSES: tuple[str, ...] = ("A", "B", "C", "D") + +# Categorical encoding for raster storage. 0 = nodata. +HSG_TO_INT: MappingProxyType = MappingProxyType({"A": 1, "B": 2, "C": 3, "D": 4}) +INT_TO_HSG: MappingProxyType = MappingProxyType({v: k for k, v in HSG_TO_INT.items()}) + +HSG_NODATA: int = 0 +HSG_UNITS: str = "NEH 630 Ch7 hydrologic soil group (1=A, 2=B, 3=C, 4=D)" diff --git a/floodpath/soil/hsg.py b/floodpath/soil/hsg.py new file mode 100644 index 0000000..7bde351 --- /dev/null +++ b/floodpath/soil/hsg.py @@ -0,0 +1,72 @@ +"""Map a TextureGrid to a Hydrologic Soil Group (HSG) raster. + +Per USDA NRCS NEH Part 630 Chapter 7 §630.0701 (January 2009): each cell's +USDA texture class is looked up in `USDA_TEXTURE_TO_HSG` to assign A/B/C/D. +This is the textural shortcut for the full Table 7-1 criteria (which would +require Ksat, depth-to-impermeable-layer, and water-table data we do not +have globally). + +The shortcut assumes "deep" soils with no shallow restrictive layer and +no high water table — appropriate for global SCS-CN runoff modelling at +~250 m. Users with site-calibrated Ksat data should compute HSG directly +from Table 7-1. +""" + +import numpy as np + +from .constants import ( + HSG_NODATA, + HSG_TO_INT, + USDA_TEXTURE_TO_HSG, +) +from .models import HSGGrid, TextureGrid +from .utils import usda_texture_class + + +def texture_to_hsg( + texture: TextureGrid, + mapping: dict[str, str] | None = None, +) -> HSGGrid: + """Convert a soil texture grid to a hydrologic soil group raster. + + Args: + texture: Per-cell sand/silt/clay percentages (TextureGrid). + mapping: Optional override of the USDA-class -> HSG-letter table. + Defaults to USDA_TEXTURE_TO_HSG (NEH 630 Ch7 §630.0701). + + Returns: + HSGGrid with int-encoded HSG classes (1=A, 2=B, 3=C, 4=D, 0=nodata). + """ + table = dict(USDA_TEXTURE_TO_HSG) if mapping is None else dict(mapping) + + # Cells with NaN in any texture component (SoilGrids nodata) cannot be + # classified — keep them at HSG_NODATA. Pass safe placeholders to the + # classifier and mask the output afterwards. + sand = np.where(np.isnan(texture.sand), 0.0, texture.sand) + silt = np.where(np.isnan(texture.silt), 0.0, texture.silt) + clay = np.where(np.isnan(texture.clay), 0.0, texture.clay) + nodata_mask = ( + np.isnan(texture.sand) + | np.isnan(texture.silt) + | np.isnan(texture.clay) + ) + + classes = usda_texture_class(sand, silt, clay) + # `classes` is an object array of texture-class names; map each unique + # class to its HSG int and apply via fancy indexing. + hsg_int = np.full(classes.shape, HSG_NODATA, dtype=np.uint8) + for cls_name, letter in table.items(): + if letter not in HSG_TO_INT: + raise ValueError( + f"mapping value {letter!r} is not a valid HSG letter" + ) + hsg_int[classes == cls_name] = HSG_TO_INT[letter] + hsg_int[nodata_mask] = HSG_NODATA + + return HSGGrid( + values=hsg_int, + transform=texture.transform, + crs=texture.crs, + bbox=texture.bbox, + depths=texture.depths, + ) diff --git a/floodpath/soil/models.py b/floodpath/soil/models.py new file mode 100644 index 0000000..2e2125e --- /dev/null +++ b/floodpath/soil/models.py @@ -0,0 +1,100 @@ +"""Dataclasses for the soil module.""" + +from dataclasses import dataclass + +import numpy as np +from rasterio.transform import Affine + +from floodpath.dem.models import BoundingBox + +from .constants import ( + HSG_CLASSES, + HSG_TO_INT, + INT_TO_HSG, + SOILGRIDS_TEXTURE_UNITS, +) + + +@dataclass +class TextureGrid: + """Per-cell soil texture composition (sand / silt / clay) as percentages. + + Each of `sand`, `silt`, `clay` is a float32 array in `% by mass`. They + should sum to ~100 at every cell (SoilGrids predictions can be off by + 1-2 due to independent regression — the texture-triangle classifier is + robust to that). + + `depths` records which SoilGrids depth slice(s) the values represent; + when more than one was averaged, the list reflects the depth-weighted + composition. + """ + + sand: np.ndarray + silt: np.ndarray + clay: np.ndarray + transform: Affine + crs: str + bbox: BoundingBox + depths: tuple[str, ...] + units: str = SOILGRIDS_TEXTURE_UNITS + + @property + def shape(self) -> tuple[int, int]: + """Return the (rows, cols) shape of the underlying rasters.""" + return self.sand.shape # type: ignore[return-value] + + def closure_residual(self) -> np.ndarray: + """Return per-cell |sand+silt+clay - 100|, useful for diagnostics.""" + return np.abs(self.sand + self.silt + self.clay - 100.0) + + +@dataclass +class HSGGrid: + """Per-cell NEH 630 Ch7 hydrologic soil group (A/B/C/D). + + Stored as an integer raster with the encoding in HSG_TO_INT (1=A, 2=B, + 3=C, 4=D, 0=nodata). The `epoch`-equivalent for soil is depth — recorded + in `depths` so consumers can introspect the assumption. + """ + + values: np.ndarray + transform: Affine + crs: str + bbox: BoundingBox + depths: tuple[str, ...] + units: str = "NEH 630 Ch7 HSG (1=A, 2=B, 3=C, 4=D)" + + @property + def shape(self) -> tuple[int, int]: + """Return the (rows, cols) shape of the underlying raster.""" + return self.values.shape # type: ignore[return-value] + + def class_counts(self) -> dict[str, int]: + """Count cells per HSG letter present in the grid (excludes nodata).""" + out: dict[str, int] = {} + for letter in HSG_CLASSES: + n = int(np.count_nonzero(self.values == HSG_TO_INT[letter])) + if n > 0: + out[letter] = n + return out + + def fraction(self, letter: str) -> float: + """Return the fraction of cells in the given HSG letter.""" + if self.values.size == 0: + return 0.0 + n = int(np.count_nonzero(self.values == HSG_TO_INT[letter])) + return float(n / self.values.size) + + def dominant_class(self) -> str: + """Return the HSG letter with the most cells; raises if grid is empty.""" + counts = self.class_counts() + if not counts: + raise ValueError("HSGGrid contains no classified cells") + return max(counts, key=counts.get) # type: ignore[arg-type] + + def as_letter_grid(self) -> np.ndarray: + """Return a US1-string array with HSG letters; nodata becomes empty string.""" + out = np.empty(self.values.shape, dtype="U1") + for code, letter in INT_TO_HSG.items(): + out[self.values == code] = letter + return out diff --git a/floodpath/soil/soilgrids.py b/floodpath/soil/soilgrids.py new file mode 100644 index 0000000..2beb2d6 --- /dev/null +++ b/floodpath/soil/soilgrids.py @@ -0,0 +1,198 @@ +"""Fetch ISRIC SoilGrids 2.0 soil-property patches via /vsicurl/.""" + +from concurrent.futures import ThreadPoolExecutor + +import numpy as np +import rasterio +from rasterio.enums import Resampling +from rasterio.env import Env +from rasterio.vrt import WarpedVRT +from rasterio.windows import from_bounds +from rasterio.windows import transform as window_transform + +from floodpath.dem.constants import GDAL_VSICURL_ENV +from floodpath.dem.models import BoundingBox +from floodpath.dem.utils import bbox_from_point + +from .constants import ( + SOILGRIDS_BASE_URL, + SOILGRIDS_BUFFER_DEG, + SOILGRIDS_DEFAULT_DEPTHS, + SOILGRIDS_DEPTH_WIDTHS_CM, + SOILGRIDS_DEPTHS, + SOILGRIDS_NODATA, + SOILGRIDS_TARGET_CRS, + SOILGRIDS_TEXTURE_SCALE, +) +from .models import TextureGrid + + +def soilgrids_vrt_url( + variable: str, + depth: str, + statistic: str = "mean", +) -> str: + """Build the public SoilGrids 2.0 VRT URL. + + Args: + variable: One of `sand`, `silt`, `clay`, `bdod`, `cec`, ... + depth: One of SOILGRIDS_DEPTHS, e.g. `5-15cm`. + statistic: Defaults to `mean`; also `Q0.05`, `Q0.5`, `Q0.95`, + `uncertainty`. + + Raises: + ValueError: `depth` is not one of the published slices. + """ + if depth not in SOILGRIDS_DEPTHS: + raise ValueError( + f"depth must be one of {SOILGRIDS_DEPTHS}, got {depth!r}" + ) + return ( + f"{SOILGRIDS_BASE_URL}/{variable}/" + f"{variable}_{depth}_{statistic}.vrt" + ) + + +def _read_warped_window( + url: str, + bbox: BoundingBox, +) -> tuple[np.ndarray, "rasterio.transform.Affine"]: + """Open a SoilGrids VRT, warp to WGS84, return the bbox window + transform. + + SoilGrids native CRS is Interrupted Goode Homolosine. We let GDAL + decide the destination resolution from the source pixel size projected + to WGS84 — the ~250 m native pixels become roughly 0.0022° at the + equator, so a 0.075° patch resolves to ~33 cells. + + Returned data is the raw int16 raster including the SoilGrids nodata + sentinel (-32768); callers are responsible for masking it. + """ + with rasterio.open(f"/vsicurl/{url}") as src: + with WarpedVRT( + src, + crs=SOILGRIDS_TARGET_CRS, + resampling=Resampling.nearest, + ) as vrt: + win = from_bounds(*bbox.bounds, transform=vrt.transform) + # WarpedVRT does not allow boundless reads; SoilGrids is global + # so any sane bbox is fully inside the source extent. + data = vrt.read(1, window=win) + clipped_transform = window_transform(win, vrt.transform) + return data, clipped_transform + + +def _scale_texture(raw: np.ndarray) -> np.ndarray: + """Apply the SoilGrids texture scale, masking nodata to NaN.""" + out = raw.astype(np.float32) * SOILGRIDS_TEXTURE_SCALE + out[raw == SOILGRIDS_NODATA] = np.nan + return out + + +def _fetch_var_at_depth( + bbox: BoundingBox, + variable: str, + depth: str, +) -> tuple[str, str, np.ndarray, "rasterio.transform.Affine"]: + """Fetch a single (variable, depth) combination. Returns scaled %.""" + url = soilgrids_vrt_url(variable, depth) + raw, transform = _read_warped_window(url, bbox) + return variable, depth, _scale_texture(raw), transform + + +def get_soilgrids_texture( + lat: float, + lon: float, + buffer_deg: float = SOILGRIDS_BUFFER_DEG, + depths: tuple[str, ...] = SOILGRIDS_DEFAULT_DEPTHS, +) -> TextureGrid: + """Return depth-weighted soil texture (sand/silt/clay %) around a point. + + The fetcher uses GDAL's `/vsicurl/` driver against the public ISRIC + bucket (`files.isric.org/soilgrids/latest/data/`) and a `WarpedVRT` + to reproject from SoilGrids' native Goode Homolosine to WGS84. + Range-read so payload is on the order of a few hundred KB per call. + + For each depth slice in `depths`, three rasters are fetched + (sand / silt / clay) — all in parallel via a thread pool, since these + are I/O-bound. They are then averaged with weights equal to each depth's + thickness in centimetres so the result represents the bulk topsoil + composition over the chosen interval. Nodata cells (oceans, glaciers) + are masked to NaN. + + Args: + lat: Latitude of the region center, decimal degrees. + lon: Longitude of the region center, decimal degrees. + buffer_deg: Half-width of the square bbox around the point, degrees. + depths: Subset of SOILGRIDS_DEPTHS to average. Default is + (`0-5cm`, `5-15cm`, `15-30cm`) — the 0-30 cm topsoil window, + the closest available proxy for the 0-50 cm reference depth + used in NEH 630 Ch7 Table 7-1. + + Returns: + TextureGrid with sand/silt/clay arrays in % by mass, NaN where + SoilGrids has no prediction. + + Raises: + ValueError: `depths` is empty, or any element is not a published + SoilGrids slice. + """ + if not depths: + raise ValueError("depths must contain at least one slice") + for d in depths: + if d not in SOILGRIDS_DEPTHS: + raise ValueError( + f"depth {d!r} not in published SoilGrids depths " + f"{SOILGRIDS_DEPTHS}" + ) + + bbox = bbox_from_point(lat, lon, buffer_deg) + + # Fan out all (variable, depth) pairs to a thread pool. ISRIC tolerates + # ~10 concurrent connections per client; with 3-9 fetches per call we + # stay well below that. + jobs = [(var, depth) for depth in depths for var in ("sand", "silt", "clay")] + + with Env(**GDAL_VSICURL_ENV): + with ThreadPoolExecutor(max_workers=min(len(jobs), 9)) as pool: + futures = [ + pool.submit(_fetch_var_at_depth, bbox, var, depth) + for var, depth in jobs + ] + results = [f.result() for f in futures] + + # results is a list of (variable, depth, scaled, transform). + # Group by variable, depth-weighted average. + transform = results[0][3] + by_var: dict[str, dict[str, np.ndarray]] = {"sand": {}, "silt": {}, "clay": {}} + for var, depth, scaled, _ in results: + by_var[var][depth] = scaled + + weight_total = float(sum(SOILGRIDS_DEPTH_WIDTHS_CM[d] for d in depths)) + avg: dict[str, np.ndarray] = {} + for var, depth_to_arr in by_var.items(): + # Use np.nansum so masked nodata cells stay NaN only where every + # depth was nodata; partially-defined columns get a partial average. + weighted = np.zeros_like(next(iter(depth_to_arr.values()))) + weight_seen = np.zeros_like(weighted) + for d in depths: + arr = depth_to_arr[d] + w = float(SOILGRIDS_DEPTH_WIDTHS_CM[d]) + valid = ~np.isnan(arr) + weighted[valid] += w * arr[valid] + weight_seen[valid] += w + with np.errstate(invalid="ignore", divide="ignore"): + avg[var] = np.where( + weight_seen > 0, + weighted / weight_seen, + np.nan, + ) + + return TextureGrid( + sand=avg["sand"].astype(np.float32), + silt=avg["silt"].astype(np.float32), + clay=avg["clay"].astype(np.float32), + transform=transform, + crs=SOILGRIDS_TARGET_CRS, + bbox=BoundingBox(*bbox.bounds), + depths=tuple(depths), + ) diff --git a/floodpath/soil/utils.py b/floodpath/soil/utils.py new file mode 100644 index 0000000..16c0e7c --- /dev/null +++ b/floodpath/soil/utils.py @@ -0,0 +1,128 @@ +"""USDA Soil Texture Triangle classification (12 classes). + +The boundaries follow the USDA Soil Survey Manual (1993) and Soil Taxonomy +(1999). Each cell of input (sand%, silt%, clay%) maps to exactly one of +the 12 classes in `USDA_TEXTURE_CLASSES`. Inputs need not sum to exactly +100 — silt is computed implicitly so small (~1%) closure errors from +SoilGrids regressions don't affect the classification. + +Implementation note: the order of clauses matters because the prose +boundaries overlap at endpoints; the resolution below matches the +canonical online USDA Texture Calculator +(https://www.nrcs.usda.gov/resources/education-and-teaching-materials/soil-texture-calculator). +""" + +from __future__ import annotations + +import numpy as np + + +def _ensure_array(x: float | np.ndarray) -> np.ndarray: + """Convert scalar or array input to a float64 numpy array.""" + return np.asarray(x, dtype=np.float64) + + +def usda_texture_class( + sand: float | np.ndarray, + silt: float | np.ndarray, + clay: float | np.ndarray, +) -> str | np.ndarray: + """Classify (sand, silt, clay) percentages into a USDA texture class. + + Args: + sand: Sand fraction, % by mass. Scalar or array. + silt: Silt fraction, % by mass. Same shape as `sand`. + clay: Clay fraction, % by mass. Same shape as `sand`. + + Returns: + If scalar inputs: a string from USDA_TEXTURE_CLASSES. + If array inputs: an object array of strings of the same shape. + """ + s = _ensure_array(sand) + si = _ensure_array(silt) + c = _ensure_array(clay) + scalar = s.ndim == 0 + + # Promote scalars to 1-element arrays so the vectorised logic is uniform. + if scalar: + s = s.reshape(1) + si = si.reshape(1) + c = c.reshape(1) + + out = np.empty(s.shape, dtype=object) + out[:] = "loam" # default; should be overwritten by the rules below + + # Order of evaluation: most specific / clay-heavy first, sand-heavy last. + # Using cumulative `done` mask so later rules don't overwrite earlier + # assignments — mirrors the USDA Texture Calculator decision tree. + done = np.zeros(s.shape, dtype=bool) + + def _assign(name: str, mask: np.ndarray) -> None: + m = mask & ~done + out[m] = name + done[m] = True + + # 1. Clay (clay >= 40, sand <= 45, silt < 40). + _assign("clay", (c >= 40) & (s <= 45) & (si < 40)) + + # 2. Silty clay (silt >= 40, clay >= 40). + _assign("silty_clay", (si >= 40) & (c >= 40)) + + # 3. Sandy clay (sand >= 45, clay >= 35). + _assign("sandy_clay", (s >= 45) & (c >= 35)) + + # 4. Clay loam (clay 27-40, sand 20-45). + _assign("clay_loam", (c >= 27) & (c < 40) & (s > 20) & (s <= 45)) + + # 5. Silty clay loam (silt >= 40, clay 27-40). + _assign("silty_clay_loam", (si >= 40) & (c >= 27) & (c < 40)) + + # 6. Sandy clay loam (sand 45-80, clay 20-35, silt < 28). + _assign( + "sandy_clay_loam", + (s > 45) & (c >= 20) & (c < 35) & (si < 28), + ) + + # 7. Loam (clay 7-27, silt 28-50, sand 23-52). + _assign( + "loam", + (c >= 7) & (c < 27) & (si >= 28) & (si < 50) & (s <= 52), + ) + + # 8. Silt (silt >= 80, clay < 12). + _assign("silt", (si >= 80) & (c < 12)) + + # 9. Silt loam (silt >= 50 AND clay 12-27) OR (silt 50-80 AND clay < 12). + _assign( + "silt_loam", + (((si >= 50) & (c >= 12) & (c < 27)) + | ((si >= 50) & (si < 80) & (c < 12))), + ) + + # 10. Sandy loam — three USDA sub-rules combined: + # a) sand 43-52 AND silt < 50 AND clay 7-20 + # b) sand 52-70 AND clay <= 20 AND silt + 2*clay >= 30 + # c) sand 70-85 AND silt + 2*clay >= 30 + sandy_loam_a = (s >= 43) & (s < 52) & (si < 50) & (c >= 7) & (c < 20) + sandy_loam_b = (s >= 52) & (s < 70) & (c <= 20) & (si + 2 * c >= 30) + sandy_loam_c = (s >= 70) & (s < 85) & (si + 2 * c >= 30) + _assign("sandy_loam", sandy_loam_a | sandy_loam_b | sandy_loam_c) + + # 11. Loamy sand (sand 70-90, clay <= 15, silt + 2*clay < 30 + # AND silt + 1.5*clay >= 15). + _assign( + "loamy_sand", + (s >= 70) & (s < 90) & (c <= 15) + & (si + 2 * c < 30) & (si + 1.5 * c >= 15), + ) + + # 12. Sand (sand >= 85, silt + 1.5*clay < 15). + _assign("sand", (s >= 85) & (si + 1.5 * c < 15)) + + # Anything still unclassified is forced to loam (a textural fallback for + # combinations slightly outside any class boundary due to closure errors). + out[~done] = "loam" + + if scalar: + return str(out[0]) + return out diff --git a/tests/conftest.py b/tests/conftest.py index 0748bd6..17fc0a9 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -16,6 +16,7 @@ from floodpath.hydrology.models import FlowGrid, Hand, StreamNetwork from floodpath.landuse import WORLDCOVER_CLASSES from floodpath.landuse.models import LanduseGrid +from floodpath.soil.models import TextureGrid ROBIT_BATA_STREAM_THRESHOLD: int = 200 ROBIT_BATA_GHSL_FIXTURE: Path = ( @@ -30,6 +31,9 @@ ROBIT_BATA_WORLDCOVER_FIXTURE: Path = ( Path(__file__).resolve().parent / "fixtures" / "robit_bata_worldcover.tif" ) +ROBIT_BATA_SOIL_FIXTURE: Path = ( + Path(__file__).resolve().parent / "fixtures" / "robit_bata_soil.tif" +) ROBIT_BATA_FIXTURE: Path = Path(__file__).resolve().parent / "fixtures" / "robit_bata.tif" @@ -191,6 +195,37 @@ def robit_bata_worldcover() -> LanduseGrid: ) +@pytest.fixture(scope="session") +def robit_bata_soil() -> TextureGrid: + """Load the committed Robit Bata SoilGrids fixture (33x33 cells, ~250 m). + + Multiband float32 GeoTIFF with sand/silt/clay (% by mass) at the 0-30 cm + depth-weighted topsoil composition (default `depths`). Regenerate via + `python tests/fixtures/_generate_robit_bata_soil.py`. + """ + with rasterio.open(ROBIT_BATA_SOIL_FIXTURE) as src: + sand = src.read(1) + silt = src.read(2) + clay = src.read(3) + transform = src.transform + crs = str(src.crs) + bounds = src.bounds + return TextureGrid( + sand=sand, + silt=silt, + clay=clay, + transform=transform, + crs=crs, + bbox=BoundingBox( + min_lon=bounds.left, + min_lat=bounds.bottom, + max_lon=bounds.right, + max_lat=bounds.top, + ), + depths=("0-5cm", "5-15cm", "15-30cm"), + ) + + @pytest.fixture(scope="session") def ne_brazil_dem() -> DEM: """Live DEM at (-5.0, -39.0) with the default buffer. diff --git a/tests/fixtures/_generate_robit_bata_soil.py b/tests/fixtures/_generate_robit_bata_soil.py new file mode 100644 index 0000000..c146a04 --- /dev/null +++ b/tests/fixtures/_generate_robit_bata_soil.py @@ -0,0 +1,79 @@ +"""Regenerate the Robit Bata SoilGrids 2.0 test fixture. + +Run when ISRIC reprocesses or we move test bboxes: + + python tests/fixtures/_generate_robit_bata_soil.py + +Stores sand / silt / clay (% by mass) as a 3-band float32 GeoTIFF at the +0-30 cm depth-weighted topsoil composition (default `depths`). +""" + +import sys +from pathlib import Path + +PROJECT_ROOT = Path(__file__).resolve().parents[2] +if str(PROJECT_ROOT) not in sys.path: + sys.path.insert(0, str(PROJECT_ROOT)) + +import numpy as np +import rasterio + +from floodpath.soil import get_soilgrids_texture +from floodpath.soil.constants import SOILGRIDS_DEFAULT_DEPTHS + +FIXTURE_PATH: Path = Path(__file__).resolve().parent / "robit_bata_soil.tif" +FIXTURE_LAT: float = 11.805 +FIXTURE_LON: float = 37.5625 +FIXTURE_BUFFER_DEG: float = 0.0375 +FIXTURE_DEPTHS: tuple[str, ...] = SOILGRIDS_DEFAULT_DEPTHS + + +def regenerate() -> None: + """Fetch the live SoilGrids patch and overwrite the on-disk fixture.""" + tex = get_soilgrids_texture( + lat=FIXTURE_LAT, + lon=FIXTURE_LON, + buffer_deg=FIXTURE_BUFFER_DEG, + depths=FIXTURE_DEPTHS, + ) + profile = { + "driver": "GTiff", + "height": tex.shape[0], + "width": tex.shape[1], + "count": 3, + "dtype": "float32", + "crs": tex.crs, + "transform": tex.transform, + "compress": "lzw", + "nodata": float("nan"), + } + with rasterio.open(FIXTURE_PATH, "w", **profile) as dst: + dst.write(tex.sand.astype(np.float32), 1) + dst.write(tex.silt.astype(np.float32), 2) + dst.write(tex.clay.astype(np.float32), 3) + dst.set_band_description(1, "sand") + dst.set_band_description(2, "silt") + dst.set_band_description(3, "clay") + size_kb = FIXTURE_PATH.stat().st_size / 1024 + valid = ~np.isnan(tex.sand) + print( + f"Wrote {FIXTURE_PATH} shape={tex.shape} " + f"depths={tex.depths} size={size_kb:.1f} KB" + ) + print( + f"Sand: {np.nanmean(tex.sand):.2f}% mean " + f"({np.nanmin(tex.sand):.2f}-{np.nanmax(tex.sand):.2f})" + ) + print( + f"Silt: {np.nanmean(tex.silt):.2f}% mean " + f"({np.nanmin(tex.silt):.2f}-{np.nanmax(tex.silt):.2f})" + ) + print( + f"Clay: {np.nanmean(tex.clay):.2f}% mean " + f"({np.nanmin(tex.clay):.2f}-{np.nanmax(tex.clay):.2f})" + ) + print(f"Valid cells: {int(valid.sum())}/{tex.sand.size}") + + +if __name__ == "__main__": + regenerate() diff --git a/tests/fixtures/robit_bata_soil.tif b/tests/fixtures/robit_bata_soil.tif new file mode 100644 index 0000000..3a36fa6 Binary files /dev/null and b/tests/fixtures/robit_bata_soil.tif differ diff --git a/tests/soil/__init__.py b/tests/soil/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/soil/test_constants.py b/tests/soil/test_constants.py new file mode 100644 index 0000000..70f6de7 --- /dev/null +++ b/tests/soil/test_constants.py @@ -0,0 +1,55 @@ +"""Sanity tests for the soil constants tables.""" + +import pytest + +from floodpath.soil import ( + HSG_CLASSES, + HSG_TO_INT, + INT_TO_HSG, + USDA_TEXTURE_CLASSES, + USDA_TEXTURE_TO_HSG, +) + + +class TestUsdaTextureClasses: + def test_twelve_canonical_classes(self) -> None: + assert len(USDA_TEXTURE_CLASSES) == 12 + # Names in the official USDA Soil Survey Manual order. + assert set(USDA_TEXTURE_CLASSES) == { + "sand", "loamy_sand", "sandy_loam", + "loam", "silt_loam", "silt", + "sandy_clay_loam", "clay_loam", "silty_clay_loam", + "sandy_clay", "silty_clay", "clay", + } + + def test_every_texture_class_has_an_hsg(self) -> None: + # NEH 630 Ch7 §630.0701 prose -> a textbook HSG for each class. + assert set(USDA_TEXTURE_TO_HSG.keys()) == set(USDA_TEXTURE_CLASSES) + + def test_each_hsg_appears_at_least_once(self) -> None: + # All four NEH groups must be reachable from at least one texture. + assert set(USDA_TEXTURE_TO_HSG.values()) == set(HSG_CLASSES) + + def test_clay_textures_are_d(self) -> None: + # The three clayey classes always map to D per §630.0701. + for cls in ("sandy_clay", "silty_clay", "clay"): + assert USDA_TEXTURE_TO_HSG[cls] == "D" + + def test_sand_and_loamy_sand_are_a(self) -> None: + # The most permeable classes always map to A. + assert USDA_TEXTURE_TO_HSG["sand"] == "A" + assert USDA_TEXTURE_TO_HSG["loamy_sand"] == "A" + + +class TestHsgEncoding: + def test_letter_to_int_round_trip(self) -> None: + for letter in HSG_CLASSES: + assert INT_TO_HSG[HSG_TO_INT[letter]] == letter + + def test_zero_is_reserved_for_nodata(self) -> None: + # 0 must not be assigned to any letter so categorical rasters can + # use it as a nodata sentinel. + assert 0 not in HSG_TO_INT.values() + + def test_int_codes_are_one_through_four(self) -> None: + assert sorted(HSG_TO_INT.values()) == [1, 2, 3, 4] diff --git a/tests/soil/test_fixture_regression.py b/tests/soil/test_fixture_regression.py new file mode 100644 index 0000000..66f3300 --- /dev/null +++ b/tests/soil/test_fixture_regression.py @@ -0,0 +1,44 @@ +"""Verify the committed SoilGrids fixture still matches a live ISRIC fetch. + +Mirrors the other module fixture regression tests: the single network-bound +contract guarding the offline soil tests. If ISRIC reprocesses SoilGrids +2.0 and the fixture drifts, this test fails first. +""" + +import numpy as np +import pytest + +from floodpath.soil import TextureGrid, get_soilgrids_texture +from tests.fixtures._generate_robit_bata_soil import ( + FIXTURE_BUFFER_DEG, + FIXTURE_DEPTHS, + FIXTURE_LAT, + FIXTURE_LON, +) + + +@pytest.mark.integration +class TestFixtureRegression: + def test_live_fetch_matches_committed_fixture( + self, + robit_bata_soil: TextureGrid, + ) -> None: + live = get_soilgrids_texture( + lat=FIXTURE_LAT, + lon=FIXTURE_LON, + buffer_deg=FIXTURE_BUFFER_DEG, + depths=FIXTURE_DEPTHS, + ) + assert live.shape == robit_bata_soil.shape + assert live.depths == robit_bata_soil.depths + # SoilGrids predictions are deterministic for a fixed product + # version, so values should round-trip exactly within float32. + np.testing.assert_allclose( + live.sand, robit_bata_soil.sand, rtol=0, atol=1e-3, equal_nan=True + ) + np.testing.assert_allclose( + live.silt, robit_bata_soil.silt, rtol=0, atol=1e-3, equal_nan=True + ) + np.testing.assert_allclose( + live.clay, robit_bata_soil.clay, rtol=0, atol=1e-3, equal_nan=True + ) diff --git a/tests/soil/test_hsg.py b/tests/soil/test_hsg.py new file mode 100644 index 0000000..20611d1 --- /dev/null +++ b/tests/soil/test_hsg.py @@ -0,0 +1,102 @@ +"""Unit tests for soil.hsg (texture_to_hsg).""" + +import numpy as np +import pytest +from rasterio.transform import Affine + +from floodpath.dem.models import BoundingBox +from floodpath.soil import HSG_TO_INT, TextureGrid, texture_to_hsg + + +def _toy_texture(sand: list[list[float]], + silt: list[list[float]], + clay: list[list[float]]) -> TextureGrid: + sand_a = np.array(sand, dtype=np.float32) + silt_a = np.array(silt, dtype=np.float32) + clay_a = np.array(clay, dtype=np.float32) + return TextureGrid( + sand=sand_a, + silt=silt_a, + clay=clay_a, + transform=Affine.translation(0, 0) * Affine.scale(0.001, -0.001), + crs="EPSG:4326", + bbox=BoundingBox(0.0, 0.0, 0.001 * sand_a.shape[1], 0.001 * sand_a.shape[0]), + depths=("0-5cm", "5-15cm", "15-30cm"), + ) + + +class TestTextureToHsg: + def test_canonical_quartet(self) -> None: + # Each cell = a textbook centroid for one HSG class. + # (sand, silt, clay) -> texture class -> HSG + # (92, 5, 3) -> sand -> A (1) + # (40, 40, 20) -> loam -> B (2) + # (30, 35, 35) -> clay_loam -> C (3) + # (10, 45, 45) -> silty_clay -> D (4) + tex = _toy_texture( + sand=[[92, 40], [30, 10]], + silt=[[ 5, 40], [35, 45]], + clay=[[ 3, 20], [35, 45]], + ) + hsg = texture_to_hsg(tex) + np.testing.assert_array_equal( + hsg.values, + np.array([[1, 2], [3, 4]], dtype=np.uint8), + ) + + def test_metadata_propagated(self) -> None: + tex = _toy_texture( + sand=[[40]], silt=[[40]], clay=[[20]], + ) + hsg = texture_to_hsg(tex) + assert hsg.transform == tex.transform + assert hsg.crs == tex.crs + assert hsg.bbox == tex.bbox + assert hsg.depths == tex.depths + + def test_dtype_is_uint8(self) -> None: + tex = _toy_texture(sand=[[40]], silt=[[40]], clay=[[20]]) + hsg = texture_to_hsg(tex) + assert hsg.values.dtype == np.uint8 + + def test_custom_mapping_overrides_defaults(self) -> None: + # Force every loam cell to D (used to model "compacted loam" sites). + tex = _toy_texture(sand=[[40]], silt=[[40]], clay=[[20]]) + hsg = texture_to_hsg(tex, mapping={"loam": "D"}) + # All other texture classes will fall to nodata since they're not in + # the override mapping; that's the expected behaviour for a strict + # custom table. + assert hsg.values[0, 0] == HSG_TO_INT["D"] + + def test_invalid_hsg_letter_raises(self) -> None: + tex = _toy_texture(sand=[[40]], silt=[[40]], clay=[[20]]) + with pytest.raises(ValueError, match="not a valid HSG letter"): + texture_to_hsg(tex, mapping={"loam": "X"}) + + +class TestPropertyClayDominanceImpliesD: + """Any cell with >=45% clay AND silt-dominant texture lands in D.""" + + def test_silty_clay_is_d(self) -> None: + tex = _toy_texture(sand=[[10]], silt=[[45]], clay=[[45]]) + hsg = texture_to_hsg(tex) + assert hsg.values[0, 0] == HSG_TO_INT["D"] + + def test_clay_is_d(self) -> None: + tex = _toy_texture(sand=[[10]], silt=[[20]], clay=[[70]]) + hsg = texture_to_hsg(tex) + assert hsg.values[0, 0] == HSG_TO_INT["D"] + + +class TestPropertySandDominanceImpliesA: + """Any cell that classifies to `sand` or `loamy_sand` lands in A.""" + + def test_pure_sand_is_a(self) -> None: + tex = _toy_texture(sand=[[100]], silt=[[0]], clay=[[0]]) + hsg = texture_to_hsg(tex) + assert hsg.values[0, 0] == HSG_TO_INT["A"] + + def test_loamy_sand_is_a(self) -> None: + tex = _toy_texture(sand=[[82]], silt=[[12]], clay=[[6]]) + hsg = texture_to_hsg(tex) + assert hsg.values[0, 0] == HSG_TO_INT["A"] diff --git a/tests/soil/test_models.py b/tests/soil/test_models.py new file mode 100644 index 0000000..9f5866a --- /dev/null +++ b/tests/soil/test_models.py @@ -0,0 +1,91 @@ +"""Unit tests for soil dataclasses (TextureGrid, HSGGrid).""" + +import numpy as np +import pytest +from rasterio.transform import Affine + +from floodpath.dem.models import BoundingBox +from floodpath.soil import HSGGrid, TextureGrid + + +def _toy_texture(sand: np.ndarray, silt: np.ndarray, clay: np.ndarray) -> TextureGrid: + return TextureGrid( + sand=sand.astype(np.float32), + silt=silt.astype(np.float32), + clay=clay.astype(np.float32), + transform=Affine.translation(0, 0) * Affine.scale(0.001, -0.001), + crs="EPSG:4326", + bbox=BoundingBox(0.0, 0.0, 0.001 * sand.shape[1], 0.001 * sand.shape[0]), + depths=("0-5cm", "5-15cm", "15-30cm"), + ) + + +def _toy_hsg(values: np.ndarray) -> HSGGrid: + return HSGGrid( + values=values.astype(np.uint8), + transform=Affine.translation(0, 0) * Affine.scale(0.001, -0.001), + crs="EPSG:4326", + bbox=BoundingBox(0.0, 0.0, 0.001 * values.shape[1], 0.001 * values.shape[0]), + depths=("0-5cm", "5-15cm", "15-30cm"), + ) + + +class TestTextureGrid: + def test_shape_property(self) -> None: + tex = _toy_texture( + np.full((4, 4), 60.0), + np.full((4, 4), 25.0), + np.full((4, 4), 15.0), + ) + assert tex.shape == (4, 4) + + def test_closure_residual_zero_when_perfect(self) -> None: + tex = _toy_texture( + np.full((2, 2), 50.0), + np.full((2, 2), 30.0), + np.full((2, 2), 20.0), + ) + assert tex.closure_residual().max() == pytest.approx(0.0) + + def test_closure_residual_flags_imbalance(self) -> None: + # SoilGrids regressions can drift by ~1-2 pp; the residual surfaces it. + tex = _toy_texture( + np.full((2, 2), 50.0), + np.full((2, 2), 30.0), + np.full((2, 2), 22.0), # sum = 102 + ) + assert tex.closure_residual().max() == pytest.approx(2.0) + + +class TestHSGGrid: + def test_class_counts_skips_nodata(self) -> None: + # 1=A, 2=B, 3=C, 4=D, 0=nodata + hsg = _toy_hsg(np.array([[1, 2, 0], [3, 4, 1]])) + counts = hsg.class_counts() + assert counts == {"A": 2, "B": 1, "C": 1, "D": 1} + + def test_fraction(self) -> None: + hsg = _toy_hsg(np.array([[1, 1], [2, 3]])) + assert hsg.fraction("A") == pytest.approx(0.5) + assert hsg.fraction("B") == pytest.approx(0.25) + assert hsg.fraction("D") == 0.0 + + def test_dominant_class(self) -> None: + hsg = _toy_hsg(np.array([[3, 3, 3], [3, 4, 4]])) + assert hsg.dominant_class() == "C" + + def test_dominant_class_raises_on_empty(self) -> None: + hsg = _toy_hsg(np.zeros((2, 2))) # all nodata + with pytest.raises(ValueError, match="no classified cells"): + hsg.dominant_class() + + def test_as_letter_grid(self) -> None: + hsg = _toy_hsg(np.array([[1, 2], [3, 4]])) + letters = hsg.as_letter_grid() + np.testing.assert_array_equal(letters, np.array([["A", "B"], ["C", "D"]])) + + def test_as_letter_grid_nodata_is_empty(self) -> None: + hsg = _toy_hsg(np.array([[1, 0]])) + letters = hsg.as_letter_grid() + assert letters[0, 0] == "A" + assert letters[0, 1] == "" diff --git a/tests/soil/test_soilgrids.py b/tests/soil/test_soilgrids.py new file mode 100644 index 0000000..0e05ff5 --- /dev/null +++ b/tests/soil/test_soilgrids.py @@ -0,0 +1,119 @@ +"""Unit + integration tests for soil.soilgrids.""" + +import numpy as np +import pytest + +from floodpath.soil import ( + SOILGRIDS_DEFAULT_DEPTHS, + TextureGrid, + get_soilgrids_texture, + texture_to_hsg, +) +from floodpath.soil.soilgrids import soilgrids_vrt_url + + +class TestUrlBuilder: + def test_default_statistic_is_mean(self) -> None: + url = soilgrids_vrt_url("sand", "5-15cm") + assert url.endswith("sand_5-15cm_mean.vrt") + + def test_custom_statistic(self) -> None: + url = soilgrids_vrt_url("clay", "0-5cm", statistic="Q0.5") + assert url.endswith("clay_0-5cm_Q0.5.vrt") + + def test_invalid_depth_raises(self) -> None: + with pytest.raises(ValueError, match="depth must be one of"): + soilgrids_vrt_url("sand", "999cm") + + +class TestGetSoilgridsOffline: + def test_empty_depths_raises(self) -> None: + with pytest.raises(ValueError, match="at least one slice"): + get_soilgrids_texture(11.805, 37.5625, depths=()) + + def test_invalid_depth_raises(self) -> None: + with pytest.raises(ValueError, match="not in published SoilGrids"): + get_soilgrids_texture(11.805, 37.5625, depths=("999cm",)) + + +class TestRobitBataSoilFixture: + """Pinned values for the committed Robit Bata SoilGrids fixture.""" + + def test_shape_and_metadata(self, robit_bata_soil: TextureGrid) -> None: + # 250 m SoilGrids cells over a 0.075 deg patch -> ~33x33. + assert robit_bata_soil.shape == (33, 33) + assert robit_bata_soil.depths == ("0-5cm", "5-15cm", "15-30cm") + assert robit_bata_soil.crs == "EPSG:4326" + + def test_texture_means_pinned(self, robit_bata_soil: TextureGrid) -> None: + # Pinned: 0-30 cm topsoil composition for the Robit Bata bbox + # (Vertisol-rich Ethiopian highlands -> very clay-heavy). + assert np.nanmean(robit_bata_soil.sand) == pytest.approx(15.40, abs=0.05) + assert np.nanmean(robit_bata_soil.silt) == pytest.approx(30.85, abs=0.05) + assert np.nanmean(robit_bata_soil.clay) == pytest.approx(53.75, abs=0.05) + + def test_clay_dominates(self, robit_bata_soil: TextureGrid) -> None: + # The patch sits in a Vertisol belt — clay must dominate the + # texture composition. + assert np.nanmean(robit_bata_soil.clay) > np.nanmean(robit_bata_soil.silt) + assert np.nanmean(robit_bata_soil.clay) > np.nanmean(robit_bata_soil.sand) + + def test_closure_within_tolerance(self, robit_bata_soil: TextureGrid) -> None: + # SoilGrids regressions can drift; max closure residual stays small. + residual = robit_bata_soil.closure_residual() + assert np.nanmax(residual) < 1.0 + + def test_valid_cell_count_pinned(self, robit_bata_soil: TextureGrid) -> None: + # Pinned: 1088/1089 cells valid (one edge cell is nodata after the + # Goode -> WGS84 reprojection corner). + valid = (~np.isnan(robit_bata_soil.sand)).sum() + assert int(valid) == 1088 + + def test_hsg_is_uniformly_d(self, robit_bata_soil: TextureGrid) -> None: + # With ~54% mean clay, every classified cell falls in HSG D + # (clayey textures per NEH 630 Ch7 §630.0701). + hsg = texture_to_hsg(robit_bata_soil) + counts = hsg.class_counts() + assert set(counts.keys()) == {"D"} + assert counts["D"] == 1088 + + +@pytest.mark.integration +class TestGetSoilgridsIntegration: + """Live ISRIC fetch on a small bbox. + + Uses a tiny patch over Djibouti to keep the integration footprint small + while exercising the full /vsicurl/ + WarpedVRT + parallel fetch path. + """ + + def test_djibouti_default_depths_returns_texture(self) -> None: + tex = get_soilgrids_texture( + lat=11.59, + lon=43.15, + buffer_deg=0.05, + depths=SOILGRIDS_DEFAULT_DEPTHS, + ) + assert tex.depths == SOILGRIDS_DEFAULT_DEPTHS + assert tex.crs == "EPSG:4326" + # Some cells must have valid texture predictions. + assert (~np.isnan(tex.sand)).any() + # Closure: where data exists, sand+silt+clay should sum near 100. + valid = ( + ~np.isnan(tex.sand) + & ~np.isnan(tex.silt) + & ~np.isnan(tex.clay) + ) + residual = np.abs( + tex.sand[valid] + tex.silt[valid] + tex.clay[valid] - 100.0 + ) + assert residual.max() < 5.0 # SoilGrids regressions drift up to a few % + + def test_single_depth_works(self) -> None: + tex = get_soilgrids_texture( + lat=11.59, + lon=43.15, + buffer_deg=0.05, + depths=("5-15cm",), + ) + assert tex.depths == ("5-15cm",) + assert tex.shape[0] > 0 and tex.shape[1] > 0 diff --git a/tests/soil/test_utils.py b/tests/soil/test_utils.py new file mode 100644 index 0000000..31e90b4 --- /dev/null +++ b/tests/soil/test_utils.py @@ -0,0 +1,98 @@ +"""Unit tests for the USDA texture-triangle classifier.""" + +import numpy as np +import pytest + +from floodpath.soil import ( + USDA_TEXTURE_CLASSES, + USDA_TEXTURE_TO_HSG, + usda_texture_class, +) + + +# Textbook centroid (sand, silt, clay) for each of the 12 USDA classes, +# from the NRCS online texture calculator. These are well inside their +# respective polygons in the texture triangle and should round-trip exactly. +CANONICAL_CENTROIDS: tuple[tuple[str, int, int, int], ...] = ( + ("sand", 92, 5, 3), + ("loamy_sand", 82, 12, 6), + ("sandy_loam", 65, 25, 10), + ("loam", 40, 40, 20), + ("silt_loam", 20, 65, 15), + ("silt", 10, 85, 5), + ("sandy_clay_loam", 60, 10, 30), + ("clay_loam", 30, 35, 35), + ("silty_clay_loam", 10, 55, 35), + ("sandy_clay", 50, 5, 45), + ("silty_clay", 10, 45, 45), + ("clay", 20, 20, 60), +) + + +class TestScalarCentroids: + @pytest.mark.parametrize("expected,sand,silt,clay", CANONICAL_CENTROIDS) + def test_each_centroid_classifies_correctly( + self, + expected: str, + sand: int, + silt: int, + clay: int, + ) -> None: + assert usda_texture_class(sand, silt, clay) == expected + + +class TestVectorisedClassifier: + def test_array_input_returns_array(self) -> None: + sand = np.array([s for _, s, _, _ in CANONICAL_CENTROIDS]) + silt = np.array([si for _, _, si, _ in CANONICAL_CENTROIDS]) + clay = np.array([c for _, _, _, c in CANONICAL_CENTROIDS]) + out = usda_texture_class(sand, silt, clay) + assert out.shape == sand.shape + for cls, _, _, _ in CANONICAL_CENTROIDS: + assert cls in out + + def test_2d_grid(self) -> None: + # 2x2 grid: pure sand, loam, clay, silty clay. + sand = np.array([[100, 40], [ 20, 10]], dtype=float) + silt = np.array([[ 0, 40], [ 20, 45]], dtype=float) + clay = np.array([[ 0, 20], [ 60, 45]], dtype=float) + out = usda_texture_class(sand, silt, clay) + assert out.shape == (2, 2) + assert out[0, 0] == "sand" + assert out[0, 1] == "loam" + assert out[1, 0] == "clay" + assert out[1, 1] == "silty_clay" + + +class TestEdgeCases: + def test_pure_sand(self) -> None: + assert usda_texture_class(100, 0, 0) == "sand" + + def test_pure_clay(self) -> None: + assert usda_texture_class(0, 0, 100) == "clay" + + def test_pure_silt(self) -> None: + # 100% silt: silt >= 80 AND clay < 12 -> "silt". + assert usda_texture_class(0, 100, 0) == "silt" + + def test_unclassifiable_falls_back_to_loam(self) -> None: + # An (off-triangle) input with all rules unmet falls through to loam. + # E.g. a clearly-impossible combo of moderate sand + huge silt. + # Pick (40, 35, 5) — barely outside loam (silt < 28) and outside + # silt loam (silt < 50) and outside sandy loam rule (sand < 43). + cls = usda_texture_class(40, 35, 5) + # The point should map to *some* defined class — fallback ensures + # we never return None or a non-USDA name. + assert cls in USDA_TEXTURE_CLASSES + + +class TestEveryClassIsReachable: + """Smoke check: each USDA class can be produced by some plausible input.""" + + def test_every_class_appears_in_centroid_set(self) -> None: + seen = {expected for expected, *_ in CANONICAL_CENTROIDS} + assert seen == set(USDA_TEXTURE_CLASSES) + + def test_centroid_hsgs_span_all_four_groups(self) -> None: + hsgs = {USDA_TEXTURE_TO_HSG[expected] for expected, *_ in CANONICAL_CENTROIDS} + assert hsgs == {"A", "B", "C", "D"}