diff --git a/docs/source/concepts/spatial.rst b/docs/source/concepts/spatial.rst index 8b7a6e2..728e009 100644 --- a/docs/source/concepts/spatial.rst +++ b/docs/source/concepts/spatial.rst @@ -25,6 +25,28 @@ parameter can be used to specify the aggregation method. The default is `mean`. :no-index: +Using a bounding box instead of a geometry +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Both :func:`mask` and :func:`reduce` accept an ``area`` keyword argument as an +alternative to providing a ``geodataframe``. ``area`` is a dictionary with keys +``"north"``, ``"south"``, ``"east"`` and ``"west"`` that defines a simple +bounding box: + +.. code-block:: python + + ekt.spatial.reduce( + ds, area={"north": 60, "south": 30, "east": 40, "west": -10}, how="mean" + ) + +The bounding box is converted internally to a single-polygon GeoDataFrame. +Providing both ``area`` and ``geodataframe`` raises a ``ValueError``. + +.. note:: + Areas that cross the anti-meridian (where ``west > east``) are not currently + supported. + + In addition to the above functions, the spatial module also includes several methods for computing the intermedieate steps of the aggregation process. These methods are documented in the API reference guide: :doc:`../autodocs/earthkit.transforms.spatial` diff --git a/docs/source/how-tos/spatial/howto_spatial_mean_era5_area.ipynb b/docs/source/how-tos/spatial/howto_spatial_mean_era5_area.ipynb new file mode 100644 index 0000000..ecd905a --- /dev/null +++ b/docs/source/how-tos/spatial/howto_spatial_mean_era5_area.ipynb @@ -0,0 +1,60 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Calculate the spatial mean of gridded data over an area/bounding box\n", + "\n", + "This notebook demonstrates how to compute the spatial mean of some sample 2m\n", + "temperature data for a given rectangular area bounding box.\n", + "This is done via the `area` keyword argument, it is a dictionary with keys\n", + "`\"north\"`, `\"south\"`, `\"east\"`, and `\"west\"`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from earthkit import data as ekd\n", + "from earthkit import transforms as ekt\n", + "from earthkit.transforms._tools import earthkit_remote_test_data_file\n", + "\n", + "remote_era5_file = earthkit_remote_test_data_file(\"era5-Europe-sfc-2m-temperature-3deg-2015-2017.grib\")\n", + "era5_data = ekd.from_source(\"url\", remote_era5_file)\n", + "ds = era5_data.to_xarray()\n", + "\n", + "# Define a bounding box covering central Europe\n", + "area = {\"north\": 55, \"south\": 45, \"east\": 20, \"west\": 5}\n", + "\n", + "# Calculate area mean using the bounding box\n", + "area_mean_bbox = ekt.spatial.reduce(ds, area=area, how=\"mean\", weights=\"latitude\")\n", + "\n", + "area_mean_bbox" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "rtd-dev-env", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.13" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/source/how-tos/spatial/howto_area_mean_era5_geometry.ipynb b/docs/source/how-tos/spatial/howto_spatial_mean_era5_geometry.ipynb similarity index 81% rename from docs/source/how-tos/spatial/howto_area_mean_era5_geometry.ipynb rename to docs/source/how-tos/spatial/howto_spatial_mean_era5_geometry.ipynb index a67b9bd..48c57df 100644 --- a/docs/source/how-tos/spatial/howto_area_mean_era5_geometry.ipynb +++ b/docs/source/how-tos/spatial/howto_spatial_mean_era5_geometry.ipynb @@ -4,9 +4,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Calculate the Area Mean of Gridded Data over a Geometries\n", + "# Calculate the spatial mean of gridded data over geometry data\n", "\n", - "This notebook demonstrates how to compute the area mean of some sample 2m temperature in ERA5 data for a given geometry (such as an administrative region) using earthkit-data and earthkit-transforms. This is useful for generating regional climate statistics.\n" + "This notebook demonstrates how to compute the spatial mean of some sample 2m temperature in ERA5 data for a given geometry (such as an administrative region) using earthkit-data and earthkit-transforms. This is useful for generating regional climate statistics.\n" ] }, { diff --git a/docs/source/how-tos/spatial/index.rst b/docs/source/how-tos/spatial/index.rst index 8a9c5ef..a48ccf3 100644 --- a/docs/source/how-tos/spatial/index.rst +++ b/docs/source/how-tos/spatial/index.rst @@ -11,4 +11,5 @@ subsets of data. :maxdepth: 1 howto_mask_era5_with_geometry.ipynb - howto_area_mean_era5_geometry.ipynb + howto_spatial_mean_era5_geometry.ipynb + howto_spatial_mean_era5_area.ipynb diff --git a/src/earthkit/transforms/spatial/_aggregate.py b/src/earthkit/transforms/spatial/_aggregate.py index 74f685b..e9e5338 100644 --- a/src/earthkit/transforms/spatial/_aggregate.py +++ b/src/earthkit/transforms/spatial/_aggregate.py @@ -11,6 +11,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import functools import logging import typing as T from copy import deepcopy @@ -151,7 +152,7 @@ def mask_contains_points( ) # get spatial dims and create output array: - spatial_dims = list(set(lat_dims + lon_dims)) + spatial_dims = list(dict.fromkeys(lat_dims + lon_dims)) outdata_shape = tuple(len(coords[dim]) for dim in spatial_dims) outdata = xp.full(outdata_shape, xp.nan) # loop over shapes and mask any point that is in the shape @@ -289,15 +290,82 @@ def get_mask_dim_index( return mask_dim_index +def _area_to_geodataframe(area: dict) -> gpd.GeoDataFrame: + """Convert an area dictionary to a GeoDataFrame with a bounding box polygon. + + Parameters + ---------- + area : dict + Dictionary with keys ``"north"``, ``"south"``, ``"east"``, ``"west"`` + defining the bounding box. + + Returns + ------- + gpd.GeoDataFrame + A GeoDataFrame containing a single bounding-box polygon. + + """ + from shapely.geometry import box + + required_keys = {"north", "south", "east", "west"} + missing = required_keys - set(area) + if missing: + raise ValueError(f"area dictionary is missing required keys: {missing}") + + if area["west"] > area["east"]: + raise ValueError("Areas that cross the anti-meridian (where 'west' > 'east') are not currently supported.") + + polygon = box(area["west"], area["south"], area["east"], area["north"]) + return gpd.GeoDataFrame(geometry=[polygon]) + + +def area_to_geodataframe_decorator(func): + """Decorator that converts an ``area`` kwarg to a ``geodataframe`` kwarg. + + If ``area`` is provided as a dictionary with keys + ``{"north", "south", "east", "west"}``, it is converted to a + `geopandas.GeoDataFrame` with a single bounding-box polygon and passed + as the ``geodataframe`` argument. + + Raises ``ValueError`` if both ``area`` and ``geodataframe`` are provided. + + .. note:: + Areas that cross the anti-meridian (where ``west > east``) are not + currently supported. + """ + + @functools.wraps(func) + def wrapper(*args, area: dict | None = None, **kwargs): + positional_geodataframe = args[1] if len(args) >= 2 else None + keyword_geodataframe = kwargs.get("geodataframe") + + if area is not None and (positional_geodataframe is not None or keyword_geodataframe is not None): + raise ValueError("Only one of 'area' or 'geodataframe' may be provided, not both.") + + if area is not None: + area_geodataframe = _area_to_geodataframe(area) + if len(args) >= 2: + args = (args[0], area_geodataframe, *args[2:]) + kwargs.pop("geodataframe", None) + else: + kwargs["geodataframe"] = area_geodataframe + + return func(*args, **kwargs) + + return wrapper + + @format_handler() +@area_to_geodataframe_decorator def mask( dataarray: xr.Dataset | xr.DataArray, - geodataframe: gpd.geodataframe.GeoDataFrame, + geodataframe: gpd.geodataframe.GeoDataFrame | None = None, mask_dim: str | None = None, lat_key: str | None = None, lon_key: str | None = None, chunk: bool = True, union_geometries: bool = False, + area: dict | None = None, **mask_kwargs, ) -> xr.Dataset | xr.DataArray: """Apply multiple shape masks to some gridded data. @@ -313,7 +381,13 @@ def mask( dataarray : Xarray data object (must have geospatial coordinates). geodataframe : - Geopandas Dataframe containing the polygons for aggregations + Geopandas Dataframe containing the polygons for aggregations. + Either ``geodataframe`` or ``area`` must be provided, but not both. + area : dict, optional + Dictionary with keys ``"north"``, ``"south"``, ``"east"``, ``"west"`` + defining a bounding box. Converted to a single-polygon GeoDataFrame + internally. Areas that cross the anti-meridian (``west > east``) are + not currently supported. mask_dim : dimension that will be created to accommodate the masked arrays, default is the index of the geodataframe @@ -342,6 +416,8 @@ def mask( Each slice of layer corresponds to a feature in layer. """ + if geodataframe is None: + raise ValueError("Either 'geodataframe' or 'area' must be provided.") spatial_info = get_spatial_info(dataarray, lat_key=lat_key, lon_key=lon_key) # Get spatial info required by mask functions: mask_kwargs = {**mask_kwargs, **{key: spatial_info[key] for key in ["lat_key", "lon_key", "regular"]}} @@ -375,10 +451,12 @@ def mask( @format_handler() +@area_to_geodataframe_decorator def reduce( dataarray: xr.Dataset | xr.DataArray, geodataframe: gpd.GeoDataFrame | None = None, mask_arrays: xr.DataArray | list[xr.DataArray] | None = None, + area: dict | None = None, **kwargs, ) -> xr.Dataset | xr.DataArray: """Apply a shape object to an xarray.DataArray object using the specified 'how' method. @@ -390,7 +468,13 @@ def reduce( dataarray : Xarray data object (must have geospatial coordinates). geodataframe : - Geopandas Dataframe containing the polygons for aggregations + Geopandas Dataframe containing the polygons for aggregations. + Cannot be provided together with ``area``. + area : dict, optional + Dictionary with keys ``"north"``, ``"south"``, ``"east"``, ``"west"`` + defining a bounding box. Converted to a single-polygon GeoDataFrame + internally. Areas that cross the anti-meridian (``west > east``) are + not currently supported. mask_arrays : precomputed mask array[s], if provided this will be used instead of creating a new mask. They must be on the same spatial grid as the dataarray. diff --git a/tests/test_30_spatial.py b/tests/test_30_spatial.py index 9dfcf76..bf2200d 100644 --- a/tests/test_30_spatial.py +++ b/tests/test_30_spatial.py @@ -965,3 +965,106 @@ def test_spatial_reduce_precomputed_mask_irr_list_two_masks(convention): result = spatial.reduce(_MCP_DA, mask_arrays=[mask_a, mask_b], how="mean", lat_key="latitude", lon_key="longitude") np.testing.assert_allclose(result.isel(index=0).item(), 3.0) np.testing.assert_allclose(result.isel(index=1).item(), 1.0) + + +# --------------------------------------------------------------------------- +# area kwarg tests — no network required +# --------------------------------------------------------------------------- + +_AREA_FULL = {"north": 90, "south": 0, "east": 90, "west": 0} + + +def test_area_to_geodataframe_creates_geodataframe(): + """_area_to_geodataframe returns a GeoDataFrame with a single polygon.""" + gdf = _spatial._area_to_geodataframe(_AREA_FULL) + assert isinstance(gdf, gpd.GeoDataFrame) + assert len(gdf) == 1 + bounds = gdf.geometry[0].bounds # (minx, miny, maxx, maxy) + assert bounds == (0, 0, 90, 90) + + +def test_area_to_geodataframe_missing_keys(): + """_area_to_geodataframe raises ValueError if keys are missing.""" + with pytest.raises(ValueError, match="missing required keys"): + _spatial._area_to_geodataframe({"north": 90, "south": 0}) + + +def test_reduce_with_area_kwarg(): + """spatial.reduce with area= returns correct result.""" + # SAMPLE_ARRAY: lat=[0,60,90], lon=[0,30,60,90], values = rows of 1,2,3 + # area covers lat=0 and lat=60 (mean of 1 and 2 = 1.5) + # Note: shapely contains_xy excludes points on the boundary of the polygon, + # so use north/east=90 to exclude the lat=90/lon=90 boundary points. + area = {"north": 90, "south": -1, "east": 90, "west": -1} + result = spatial.reduce(SAMPLE_ARRAY, area=area, how="mean") + assert isinstance(result, xr.DataArray) + np.testing.assert_allclose(result.item(), 1.5) + + +def test_reduce_with_area_subset(): + """spatial.reduce with area= covering a subset returns correct mean.""" + # Only lat=60 (row with value 2), all longitudes + # shapely.contains_xy is strict (boundary excluded), so use 59-61 range + area = {"north": 61, "south": 59, "east": 91, "west": -1} + result = spatial.reduce(SAMPLE_ARRAY, area=area, how="mean") + np.testing.assert_allclose(result.item(), 2.0) + + +def test_mask_with_area_kwarg(): + """spatial.mask with area= returns masked data.""" + area = {"north": 91, "south": 59, "east": 91, "west": -1} + result = spatial.mask(SAMPLE_ARRAY, area=area, chunk=False) + assert isinstance(result, xr.DataArray) + # lat=0 row should be all NaN (outside area) + assert np.all(np.isnan(result.sel(latitude=0).values)) + # lat=60 row should be preserved + assert np.all(result.sel(latitude=60).values == 2) + + +def test_area_and_geodataframe_raises_reduce(): + """spatial.reduce raises ValueError if both area and geodataframe are provided.""" + gdf = gpd.GeoDataFrame(geometry=[Polygon([(0, 0), (0, 1), (1, 1), (1, 0)])]) + with pytest.raises(ValueError, match="Only one of"): + spatial.reduce(SAMPLE_ARRAY, geodataframe=gdf, area=_AREA_FULL, how="mean") + + +def test_area_and_geodataframe_raises_mask(): + """spatial.mask raises ValueError if both area and geodataframe are provided.""" + gdf = gpd.GeoDataFrame(geometry=[Polygon([(0, 0), (0, 1), (1, 1), (1, 0)])]) + with pytest.raises(ValueError, match="Only one of"): + spatial.mask(SAMPLE_ARRAY, geodataframe=gdf, area=_AREA_FULL) + + +def test_area_and_positional_geodataframe_raises_reduce(): + """spatial.reduce raises ValueError if area and positional geodataframe are both given.""" + gdf = gpd.GeoDataFrame(geometry=[Polygon([(0, 0), (0, 1), (1, 1), (1, 0)])]) + with pytest.raises(ValueError, match="Only one of"): + spatial.reduce(SAMPLE_ARRAY, gdf, area=_AREA_FULL, how="mean") + + +def test_area_and_positional_geodataframe_raises_mask(): + """spatial.mask raises ValueError if area and positional geodataframe are both given.""" + gdf = gpd.GeoDataFrame(geometry=[Polygon([(0, 0), (0, 1), (1, 1), (1, 0)])]) + with pytest.raises(ValueError, match="Only one of"): + spatial.mask(SAMPLE_ARRAY, gdf, area=_AREA_FULL) + + +def test_mask_no_geodataframe_no_area_raises(): + """spatial.mask raises ValueError if neither geodataframe nor area is provided.""" + with pytest.raises(ValueError, match="Either"): + spatial.mask(SAMPLE_ARRAY) + + +def test_area_none_passthrough_reduce(): + """spatial.reduce with area=None (default) behaves normally.""" + result = spatial.reduce(SAMPLE_ARRAY, how="mean") + np.testing.assert_allclose(result.item(), 2.0) + + +def test_reduce_dataset_with_area(): + """spatial.reduce on a Dataset with area= returns a Dataset.""" + ds = xr.Dataset({"var": SAMPLE_ARRAY}) + area = {"north": 91, "south": -1, "east": 91, "west": -1} + result = spatial.reduce(ds, area=area, how="mean") + assert isinstance(result, xr.Dataset) + assert "var" in result