From 3e24905905ff48f6e83307815b7c9e12edda2b23 Mon Sep 17 00:00:00 2001 From: EddyCMWF Date: Fri, 15 May 2026 10:54:23 +0100 Subject: [PATCH 1/9] area-as-input --- docs/source/concepts/spatial.rst | 22 ++++ .../howto_area_mean_era5_geometry.ipynb | 26 +++++ src/earthkit/transforms/spatial/_aggregate.py | 82 +++++++++++++- tests/test_30_spatial.py | 102 ++++++++++++++++++ 4 files changed, 229 insertions(+), 3 deletions(-) diff --git a/docs/source/concepts/spatial.rst b/docs/source/concepts/spatial.rst index 8b7a6e2d..728e009e 100644 --- a/docs/source/concepts/spatial.rst +++ b/docs/source/concepts/spatial.rst @@ -25,6 +25,28 @@ parameter can be used to specify the aggregation method. The default is `mean`. :no-index: +Using a bounding box instead of a geometry +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Both :func:`mask` and :func:`reduce` accept an ``area`` keyword argument as an +alternative to providing a ``geodataframe``. ``area`` is a dictionary with keys +``"north"``, ``"south"``, ``"east"`` and ``"west"`` that defines a simple +bounding box: + +.. code-block:: python + + ekt.spatial.reduce( + ds, area={"north": 60, "south": 30, "east": 40, "west": -10}, how="mean" + ) + +The bounding box is converted internally to a single-polygon GeoDataFrame. +Providing both ``area`` and ``geodataframe`` raises a ``ValueError``. + +.. note:: + Areas that cross the anti-meridian (where ``west > east``) are not currently + supported. + + In addition to the above functions, the spatial module also includes several methods for computing the intermedieate steps of the aggregation process. These methods are documented in the API reference guide: :doc:`../autodocs/earthkit.transforms.spatial` diff --git a/docs/source/how-tos/spatial/howto_area_mean_era5_geometry.ipynb b/docs/source/how-tos/spatial/howto_area_mean_era5_geometry.ipynb index a67b9bd0..54ec6a2c 100644 --- a/docs/source/how-tos/spatial/howto_area_mean_era5_geometry.ipynb +++ b/docs/source/how-tos/spatial/howto_area_mean_era5_geometry.ipynb @@ -32,6 +32,32 @@ "area_mean" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Using a bounding box instead of a geometry\n", + "\n", + "Instead of loading a geometry file, you can define the region of interest as a\n", + "simple bounding box using the `area` keyword argument. This is a dictionary with\n", + "keys `\"north\"`, `\"south\"`, `\"east\"`, and `\"west\"`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Define a bounding box covering central Europe\n", + "area = {\"north\": 55, \"south\": 45, \"east\": 20, \"west\": 5}\n", + "\n", + "# Calculate area mean using the bounding box\n", + "area_mean_bbox = ekt.spatial.reduce(ds, area=area, how=\"mean\", weights=\"latitude\")\n", + "\n", + "area_mean_bbox" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/src/earthkit/transforms/spatial/_aggregate.py b/src/earthkit/transforms/spatial/_aggregate.py index 74f685b5..5215f662 100644 --- a/src/earthkit/transforms/spatial/_aggregate.py +++ b/src/earthkit/transforms/spatial/_aggregate.py @@ -11,6 +11,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import functools import logging import typing as T from copy import deepcopy @@ -289,10 +290,70 @@ def get_mask_dim_index( return mask_dim_index +def _area_to_geodataframe(area: dict) -> gpd.GeoDataFrame: + """Convert an area dictionary to a GeoDataFrame with a bounding box polygon. + + Parameters + ---------- + area : dict + Dictionary with keys ``"north"``, ``"south"``, ``"east"``, ``"west"`` + defining the bounding box. + + Returns + ------- + gpd.GeoDataFrame + A GeoDataFrame containing a single bounding-box polygon. + + """ + from shapely.geometry import box + + required_keys = {"north", "south", "east", "west"} + missing = required_keys - set(area) + if missing: + raise ValueError(f"area dictionary is missing required keys: {missing}") + + polygon = box(area["west"], area["south"], area["east"], area["north"]) + return gpd.GeoDataFrame(geometry=[polygon]) + + +def area_to_geodataframe_decorator(func): + """Decorator that converts an ``area`` kwarg to a ``geodataframe`` kwarg. + + If ``area`` is provided as a dictionary with keys + ``{"north", "south", "east", "west"}``, it is converted to a + `geopandas.GeoDataFrame` with a single bounding-box polygon and passed + as the ``geodataframe`` argument. + + Raises ``ValueError`` if both ``area`` and ``geodataframe`` are provided. + + .. note:: + Areas that cross the anti-meridian (where ``west > east``) are not + currently supported. + """ + + @functools.wraps(func) + def wrapper(*args, area: dict | None = None, **kwargs): + geodataframe = kwargs.get("geodataframe") + # Also check if geodataframe was passed as a positional arg (2nd arg) + if len(args) >= 2: + geodataframe = args[1] + + if area is not None and geodataframe is not None: + raise ValueError("Only one of 'area' or 'geodataframe' may be provided, not both.") + + if area is not None: + kwargs["geodataframe"] = _area_to_geodataframe(area) + + return func(*args, **kwargs) + + return wrapper + + @format_handler() +@area_to_geodataframe_decorator def mask( dataarray: xr.Dataset | xr.DataArray, - geodataframe: gpd.geodataframe.GeoDataFrame, + geodataframe: gpd.geodataframe.GeoDataFrame | None = None, mask_dim: str | None = None, lat_key: str | None = None, lon_key: str | None = None, @@ -313,7 +374,13 @@ def mask( dataarray : Xarray data object (must have geospatial coordinates). geodataframe : - Geopandas Dataframe containing the polygons for aggregations + Geopandas Dataframe containing the polygons for aggregations. + Either ``geodataframe`` or ``area`` must be provided, but not both. + area : dict, optional + Dictionary with keys ``"north"``, ``"south"``, ``"east"``, ``"west"`` + defining a bounding box. Converted to a single-polygon GeoDataFrame + internally. Areas that cross the anti-meridian (``west > east``) are + not currently supported. mask_dim : dimension that will be created to accommodate the masked arrays, default is the index of the geodataframe @@ -342,6 +409,8 @@ def mask( Each slice of layer corresponds to a feature in layer. """ + if geodataframe is None: + raise ValueError("Either 'geodataframe' or 'area' must be provided.") spatial_info = get_spatial_info(dataarray, lat_key=lat_key, lon_key=lon_key) # Get spatial info required by mask functions: mask_kwargs = {**mask_kwargs, **{key: spatial_info[key] for key in ["lat_key", "lon_key", "regular"]}} @@ -375,6 +444,7 @@ def mask( @format_handler() +@area_to_geodataframe_decorator def reduce( dataarray: xr.Dataset | xr.DataArray, geodataframe: gpd.GeoDataFrame | None = None, @@ -390,7 +460,13 @@ def reduce( dataarray : Xarray data object (must have geospatial coordinates). geodataframe : - Geopandas Dataframe containing the polygons for aggregations + Geopandas Dataframe containing the polygons for aggregations. + Cannot be provided together with ``area``. + area : dict, optional + Dictionary with keys ``"north"``, ``"south"``, ``"east"``, ``"west"`` + defining a bounding box. Converted to a single-polygon GeoDataFrame + internally. Areas that cross the anti-meridian (``west > east``) are + not currently supported. mask_arrays : precomputed mask array[s], if provided this will be used instead of creating a new mask. They must be on the same spatial grid as the dataarray. diff --git a/tests/test_30_spatial.py b/tests/test_30_spatial.py index 9dfcf763..5cfdaa79 100644 --- a/tests/test_30_spatial.py +++ b/tests/test_30_spatial.py @@ -965,3 +965,105 @@ def test_spatial_reduce_precomputed_mask_irr_list_two_masks(convention): result = spatial.reduce(_MCP_DA, mask_arrays=[mask_a, mask_b], how="mean", lat_key="latitude", lon_key="longitude") np.testing.assert_allclose(result.isel(index=0).item(), 3.0) np.testing.assert_allclose(result.isel(index=1).item(), 1.0) + + +# --------------------------------------------------------------------------- +# area kwarg tests — no network required +# --------------------------------------------------------------------------- + +_AREA_FULL = {"north": 90, "south": 0, "east": 90, "west": 0} + + +def test_area_to_geodataframe_creates_geodataframe(): + """_area_to_geodataframe returns a GeoDataFrame with a single polygon.""" + gdf = _spatial._area_to_geodataframe(_AREA_FULL) + assert isinstance(gdf, gpd.GeoDataFrame) + assert len(gdf) == 1 + bounds = gdf.geometry[0].bounds # (minx, miny, maxx, maxy) + assert bounds == (0, 0, 90, 90) + + +def test_area_to_geodataframe_missing_keys(): + """_area_to_geodataframe raises ValueError if keys are missing.""" + with pytest.raises(ValueError, match="missing required keys"): + _spatial._area_to_geodataframe({"north": 90, "south": 0}) + + +def test_reduce_with_area_kwarg(): + """spatial.reduce with area= returns correct result.""" + # SAMPLE_ARRAY: lat=[0,60,90], lon=[0,30,60,90], values = rows of 1,2,3 + # area covers lat=0 and lat=60 (mean of 1 and 2 = 1.5) + # Note: shapely contains_xy excludes points on the boundary of the polygon + area = {"north": 91, "south": -1, "east": 91, "west": -1} + result = spatial.reduce(SAMPLE_ARRAY, area=area, how="mean") + assert isinstance(result, xr.DataArray) + np.testing.assert_allclose(result.item(), 1.5) + + +def test_reduce_with_area_subset(): + """spatial.reduce with area= covering a subset returns correct mean.""" + # Only lat=60 (row with value 2), all longitudes + # shapely.contains_xy is strict (boundary excluded), so use 59-61 range + area = {"north": 61, "south": 59, "east": 91, "west": -1} + result = spatial.reduce(SAMPLE_ARRAY, area=area, how="mean") + np.testing.assert_allclose(result.item(), 2.0) + + +def test_mask_with_area_kwarg(): + """spatial.mask with area= returns masked data.""" + area = {"north": 91, "south": 59, "east": 91, "west": -1} + result = spatial.mask(SAMPLE_ARRAY, area=area, chunk=False) + assert isinstance(result, xr.DataArray) + # lat=0 row should be all NaN (outside area) + assert np.all(np.isnan(result.sel(latitude=0).values)) + # lat=60 row should be preserved + assert np.all(result.sel(latitude=60).values == 2) + + +def test_area_and_geodataframe_raises_reduce(): + """spatial.reduce raises ValueError if both area and geodataframe are provided.""" + gdf = gpd.GeoDataFrame(geometry=[Polygon([(0, 0), (0, 1), (1, 1), (1, 0)])]) + with pytest.raises(ValueError, match="Only one of"): + spatial.reduce(SAMPLE_ARRAY, geodataframe=gdf, area=_AREA_FULL, how="mean") + + +def test_area_and_geodataframe_raises_mask(): + """spatial.mask raises ValueError if both area and geodataframe are provided.""" + gdf = gpd.GeoDataFrame(geometry=[Polygon([(0, 0), (0, 1), (1, 1), (1, 0)])]) + with pytest.raises(ValueError, match="Only one of"): + spatial.mask(SAMPLE_ARRAY, geodataframe=gdf, area=_AREA_FULL) + + +def test_area_and_positional_geodataframe_raises_reduce(): + """spatial.reduce raises ValueError if area and positional geodataframe are both given.""" + gdf = gpd.GeoDataFrame(geometry=[Polygon([(0, 0), (0, 1), (1, 1), (1, 0)])]) + with pytest.raises(ValueError, match="Only one of"): + spatial.reduce(SAMPLE_ARRAY, gdf, area=_AREA_FULL, how="mean") + + +def test_area_and_positional_geodataframe_raises_mask(): + """spatial.mask raises ValueError if area and positional geodataframe are both given.""" + gdf = gpd.GeoDataFrame(geometry=[Polygon([(0, 0), (0, 1), (1, 1), (1, 0)])]) + with pytest.raises(ValueError, match="Only one of"): + spatial.mask(SAMPLE_ARRAY, gdf, area=_AREA_FULL) + + +def test_mask_no_geodataframe_no_area_raises(): + """spatial.mask raises ValueError if neither geodataframe nor area is provided.""" + with pytest.raises(ValueError, match="Either"): + spatial.mask(SAMPLE_ARRAY) + + +def test_area_none_passthrough_reduce(): + """spatial.reduce with area=None (default) behaves normally.""" + result = spatial.reduce(SAMPLE_ARRAY, how="mean") + np.testing.assert_allclose(result.item(), 2.0) + + +def test_reduce_dataset_with_area(): + """spatial.reduce on a Dataset with area= returns a Dataset.""" + ds = xr.Dataset({"var": SAMPLE_ARRAY}) + area = {"north": 91, "south": -1, "east": 91, "west": -1} + result = spatial.reduce(ds, area=area, how="mean") + assert isinstance(result, xr.Dataset) + assert "var" in result From 40b45546059144dbf7dce4f6639505c8432a73b5 Mon Sep 17 00:00:00 2001 From: EddyCMWF Date: Fri, 15 May 2026 11:00:30 +0100 Subject: [PATCH 2/9] notebooko updates --- ...ynb => howto_spatial_mean_era5_area.ipynb} | 30 +--------- .../howto_spatial_mean_era5_geometry.ipynb | 60 +++++++++++++++++++ docs/source/how-tos/spatial/index.rst | 3 +- 3 files changed, 64 insertions(+), 29 deletions(-) rename docs/source/how-tos/spatial/{howto_area_mean_era5_geometry.ipynb => howto_spatial_mean_era5_area.ipynb} (56%) create mode 100644 docs/source/how-tos/spatial/howto_spatial_mean_era5_geometry.ipynb diff --git a/docs/source/how-tos/spatial/howto_area_mean_era5_geometry.ipynb b/docs/source/how-tos/spatial/howto_spatial_mean_era5_area.ipynb similarity index 56% rename from docs/source/how-tos/spatial/howto_area_mean_era5_geometry.ipynb rename to docs/source/how-tos/spatial/howto_spatial_mean_era5_area.ipynb index 54ec6a2c..48c57dfc 100644 --- a/docs/source/how-tos/spatial/howto_area_mean_era5_geometry.ipynb +++ b/docs/source/how-tos/spatial/howto_spatial_mean_era5_area.ipynb @@ -4,9 +4,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Calculate the Area Mean of Gridded Data over a Geometries\n", + "# Calculate the spatial mean of gridded data over geometry data\n", "\n", - "This notebook demonstrates how to compute the area mean of some sample 2m temperature in ERA5 data for a given geometry (such as an administrative region) using earthkit-data and earthkit-transforms. This is useful for generating regional climate statistics.\n" + "This notebook demonstrates how to compute the spatial mean of some sample 2m temperature in ERA5 data for a given geometry (such as an administrative region) using earthkit-data and earthkit-transforms. This is useful for generating regional climate statistics.\n" ] }, { @@ -32,32 +32,6 @@ "area_mean" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Using a bounding box instead of a geometry\n", - "\n", - "Instead of loading a geometry file, you can define the region of interest as a\n", - "simple bounding box using the `area` keyword argument. This is a dictionary with\n", - "keys `\"north\"`, `\"south\"`, `\"east\"`, and `\"west\"`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Define a bounding box covering central Europe\n", - "area = {\"north\": 55, \"south\": 45, \"east\": 20, \"west\": 5}\n", - "\n", - "# Calculate area mean using the bounding box\n", - "area_mean_bbox = ekt.spatial.reduce(ds, area=area, how=\"mean\", weights=\"latitude\")\n", - "\n", - "area_mean_bbox" - ] - }, { "cell_type": "code", "execution_count": null, diff --git a/docs/source/how-tos/spatial/howto_spatial_mean_era5_geometry.ipynb b/docs/source/how-tos/spatial/howto_spatial_mean_era5_geometry.ipynb new file mode 100644 index 00000000..80ae9298 --- /dev/null +++ b/docs/source/how-tos/spatial/howto_spatial_mean_era5_geometry.ipynb @@ -0,0 +1,60 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Calculate the spatial mean of gridded data over an area/bounding box\n", + "\n", + "This notebook demonstrates how to compute the spatial mean of some sample 2m\n", + "temperature data for a given recetangular area bouding box.\n", + "This done via the `area` keyword argument, it is a dictionary with keys\n", + "`\"north\"`, `\"south\"`, `\"east\"`, and `\"west\"`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from earthkit import data as ekd\n", + "from earthkit import transforms as ekt\n", + "from earthkit.transforms._tools import earthkit_remote_test_data_file\n", + "\n", + "remote_era5_file = earthkit_remote_test_data_file(\"era5-Europe-sfc-2m-temperature-3deg-2015-2017.grib\")\n", + "era5_data = ekd.from_source(\"url\", remote_era5_file)\n", + "ds = era5_data.to_xarray()\n", + "\n", + "# Define a bounding box covering central Europe\n", + "area = {\"north\": 55, \"south\": 45, \"east\": 20, \"west\": 5}\n", + "\n", + "# Calculate area mean using the bounding box\n", + "area_mean_bbox = ekt.spatial.reduce(ds, area=area, how=\"mean\", weights=\"latitude\")\n", + "\n", + "area_mean_bbox" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "rtd-dev-env", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.13" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/source/how-tos/spatial/index.rst b/docs/source/how-tos/spatial/index.rst index 8a9c5ef5..a48ccf32 100644 --- a/docs/source/how-tos/spatial/index.rst +++ b/docs/source/how-tos/spatial/index.rst @@ -11,4 +11,5 @@ subsets of data. :maxdepth: 1 howto_mask_era5_with_geometry.ipynb - howto_area_mean_era5_geometry.ipynb + howto_spatial_mean_era5_geometry.ipynb + howto_spatial_mean_era5_area.ipynb From 7827d0aabaaff68e68619c941d3e54cd6c9a8607 Mon Sep 17 00:00:00 2001 From: Eddy Comyn-Platt <53045993+EddyCMWF@users.noreply.github.com> Date: Fri, 15 May 2026 11:17:38 +0100 Subject: [PATCH 3/9] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- src/earthkit/transforms/spatial/_aggregate.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/earthkit/transforms/spatial/_aggregate.py b/src/earthkit/transforms/spatial/_aggregate.py index 5215f662..23ff0b7b 100644 --- a/src/earthkit/transforms/spatial/_aggregate.py +++ b/src/earthkit/transforms/spatial/_aggregate.py @@ -312,6 +312,11 @@ def _area_to_geodataframe(area: dict) -> gpd.GeoDataFrame: if missing: raise ValueError(f"area dictionary is missing required keys: {missing}") + if area["west"] > area["east"]: + raise ValueError( + "Areas that cross the anti-meridian (where 'west' > 'east') are not currently supported." + ) + polygon = box(area["west"], area["south"], area["east"], area["north"]) return gpd.GeoDataFrame(geometry=[polygon]) From 766c4dd86ad569a01c52a5c4db659e5aa7bc51ac Mon Sep 17 00:00:00 2001 From: Eddy Comyn-Platt <53045993+EddyCMWF@users.noreply.github.com> Date: Fri, 15 May 2026 11:20:04 +0100 Subject: [PATCH 4/9] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- .../how-tos/spatial/howto_spatial_mean_era5_geometry.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/how-tos/spatial/howto_spatial_mean_era5_geometry.ipynb b/docs/source/how-tos/spatial/howto_spatial_mean_era5_geometry.ipynb index 80ae9298..ecd905a7 100644 --- a/docs/source/how-tos/spatial/howto_spatial_mean_era5_geometry.ipynb +++ b/docs/source/how-tos/spatial/howto_spatial_mean_era5_geometry.ipynb @@ -7,8 +7,8 @@ "# Calculate the spatial mean of gridded data over an area/bounding box\n", "\n", "This notebook demonstrates how to compute the spatial mean of some sample 2m\n", - "temperature data for a given recetangular area bouding box.\n", - "This done via the `area` keyword argument, it is a dictionary with keys\n", + "temperature data for a given rectangular area bounding box.\n", + "This is done via the `area` keyword argument, it is a dictionary with keys\n", "`\"north\"`, `\"south\"`, `\"east\"`, and `\"west\"`." ] }, From 296a9c1edaab1d30daa3bf3200fb8e84fe4edf61 Mon Sep 17 00:00:00 2001 From: EddyCMWF Date: Fri, 15 May 2026 11:41:22 +0100 Subject: [PATCH 5/9] correct NB names --- .../howto_spatial_mean_era5_area.ipynb | 24 ++++++++----------- .../howto_spatial_mean_era5_geometry.ipynb | 24 +++++++++++-------- 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/docs/source/how-tos/spatial/howto_spatial_mean_era5_area.ipynb b/docs/source/how-tos/spatial/howto_spatial_mean_era5_area.ipynb index 48c57dfc..ecd905a7 100644 --- a/docs/source/how-tos/spatial/howto_spatial_mean_era5_area.ipynb +++ b/docs/source/how-tos/spatial/howto_spatial_mean_era5_area.ipynb @@ -4,9 +4,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Calculate the spatial mean of gridded data over geometry data\n", + "# Calculate the spatial mean of gridded data over an area/bounding box\n", "\n", - "This notebook demonstrates how to compute the spatial mean of some sample 2m temperature in ERA5 data for a given geometry (such as an administrative region) using earthkit-data and earthkit-transforms. This is useful for generating regional climate statistics.\n" + "This notebook demonstrates how to compute the spatial mean of some sample 2m\n", + "temperature data for a given rectangular area bounding box.\n", + "This is done via the `area` keyword argument, it is a dictionary with keys\n", + "`\"north\"`, `\"south\"`, `\"east\"`, and `\"west\"`." ] }, { @@ -23,21 +26,14 @@ "era5_data = ekd.from_source(\"url\", remote_era5_file)\n", "ds = era5_data.to_xarray()\n", "\n", - "remote_geometry_url = earthkit_remote_test_data_file(\"NUTS_RG_60M_2021_4326_LEVL_0.geojson\")\n", - "geometry_data = ekd.from_source(\"url\", remote_geometry_url).to_geopandas()\n", + "# Define a bounding box covering central Europe\n", + "area = {\"north\": 55, \"south\": 45, \"east\": 20, \"west\": 5}\n", "\n", - "# Calculate area mean\n", - "area_mean = ekt.spatial.reduce(ds, geometry_data, how=\"mean\", weights=\"latitude\")\n", + "# Calculate area mean using the bounding box\n", + "area_mean_bbox = ekt.spatial.reduce(ds, area=area, how=\"mean\", weights=\"latitude\")\n", "\n", - "area_mean" + "area_mean_bbox" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/docs/source/how-tos/spatial/howto_spatial_mean_era5_geometry.ipynb b/docs/source/how-tos/spatial/howto_spatial_mean_era5_geometry.ipynb index ecd905a7..48c57dfc 100644 --- a/docs/source/how-tos/spatial/howto_spatial_mean_era5_geometry.ipynb +++ b/docs/source/how-tos/spatial/howto_spatial_mean_era5_geometry.ipynb @@ -4,12 +4,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Calculate the spatial mean of gridded data over an area/bounding box\n", + "# Calculate the spatial mean of gridded data over geometry data\n", "\n", - "This notebook demonstrates how to compute the spatial mean of some sample 2m\n", - "temperature data for a given rectangular area bounding box.\n", - "This is done via the `area` keyword argument, it is a dictionary with keys\n", - "`\"north\"`, `\"south\"`, `\"east\"`, and `\"west\"`." + "This notebook demonstrates how to compute the spatial mean of some sample 2m temperature in ERA5 data for a given geometry (such as an administrative region) using earthkit-data and earthkit-transforms. This is useful for generating regional climate statistics.\n" ] }, { @@ -26,14 +23,21 @@ "era5_data = ekd.from_source(\"url\", remote_era5_file)\n", "ds = era5_data.to_xarray()\n", "\n", - "# Define a bounding box covering central Europe\n", - "area = {\"north\": 55, \"south\": 45, \"east\": 20, \"west\": 5}\n", + "remote_geometry_url = earthkit_remote_test_data_file(\"NUTS_RG_60M_2021_4326_LEVL_0.geojson\")\n", + "geometry_data = ekd.from_source(\"url\", remote_geometry_url).to_geopandas()\n", "\n", - "# Calculate area mean using the bounding box\n", - "area_mean_bbox = ekt.spatial.reduce(ds, area=area, how=\"mean\", weights=\"latitude\")\n", + "# Calculate area mean\n", + "area_mean = ekt.spatial.reduce(ds, geometry_data, how=\"mean\", weights=\"latitude\")\n", "\n", - "area_mean_bbox" + "area_mean" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { From d67992fe0f0b5dd84699192659941c9576b6bbe3 Mon Sep 17 00:00:00 2001 From: Eddy Comyn-Platt <53045993+EddyCMWF@users.noreply.github.com> Date: Fri, 15 May 2026 11:59:20 +0100 Subject: [PATCH 6/9] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- tests/test_30_spatial.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_30_spatial.py b/tests/test_30_spatial.py index 5cfdaa79..bf2200d5 100644 --- a/tests/test_30_spatial.py +++ b/tests/test_30_spatial.py @@ -993,8 +993,9 @@ def test_reduce_with_area_kwarg(): """spatial.reduce with area= returns correct result.""" # SAMPLE_ARRAY: lat=[0,60,90], lon=[0,30,60,90], values = rows of 1,2,3 # area covers lat=0 and lat=60 (mean of 1 and 2 = 1.5) - # Note: shapely contains_xy excludes points on the boundary of the polygon - area = {"north": 91, "south": -1, "east": 91, "west": -1} + # Note: shapely contains_xy excludes points on the boundary of the polygon, + # so use north/east=90 to exclude the lat=90/lon=90 boundary points. + area = {"north": 90, "south": -1, "east": 90, "west": -1} result = spatial.reduce(SAMPLE_ARRAY, area=area, how="mean") assert isinstance(result, xr.DataArray) np.testing.assert_allclose(result.item(), 1.5) From edacc1b4fc1e1fbe695baab8644a45f4782441c8 Mon Sep 17 00:00:00 2001 From: Eddy Comyn-Platt <53045993+EddyCMWF@users.noreply.github.com> Date: Fri, 15 May 2026 13:02:18 +0100 Subject: [PATCH 7/9] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- src/earthkit/transforms/spatial/_aggregate.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/src/earthkit/transforms/spatial/_aggregate.py b/src/earthkit/transforms/spatial/_aggregate.py index 23ff0b7b..2b466af2 100644 --- a/src/earthkit/transforms/spatial/_aggregate.py +++ b/src/earthkit/transforms/spatial/_aggregate.py @@ -338,16 +338,23 @@ def area_to_geodataframe_decorator(func): @functools.wraps(func) def wrapper(*args, area: dict | None = None, **kwargs): - geodataframe = kwargs.get("geodataframe") - # Also check if geodataframe was passed as a positional arg (2nd arg) - if len(args) >= 2: - geodataframe = args[1] + positional_geodataframe = args[1] if len(args) >= 2 else None + keyword_geodataframe = kwargs.get("geodataframe") - if area is not None and geodataframe is not None: + if area is not None and ( + positional_geodataframe is not None or keyword_geodataframe is not None + ): raise ValueError("Only one of 'area' or 'geodataframe' may be provided, not both.") if area is not None: - kwargs["geodataframe"] = _area_to_geodataframe(area) + area_geodataframe = _area_to_geodataframe(area) + if len(args) >= 2: + args = list(args) + args[1] = area_geodataframe + args = tuple(args) + kwargs.pop("geodataframe", None) + else: + kwargs["geodataframe"] = area_geodataframe return func(*args, **kwargs) From cb71489027cbb77c8f5027c89e961b1dbcc45445 Mon Sep 17 00:00:00 2001 From: EddyCMWF Date: Fri, 15 May 2026 13:58:09 +0100 Subject: [PATCH 8/9] type check and dims order fix --- src/earthkit/transforms/spatial/_aggregate.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/earthkit/transforms/spatial/_aggregate.py b/src/earthkit/transforms/spatial/_aggregate.py index 2b466af2..29ced7ae 100644 --- a/src/earthkit/transforms/spatial/_aggregate.py +++ b/src/earthkit/transforms/spatial/_aggregate.py @@ -152,7 +152,7 @@ def mask_contains_points( ) # get spatial dims and create output array: - spatial_dims = list(set(lat_dims + lon_dims)) + spatial_dims = list(dict.fromkeys(lat_dims + lon_dims)) outdata_shape = tuple(len(coords[dim]) for dim in spatial_dims) outdata = xp.full(outdata_shape, xp.nan) # loop over shapes and mask any point that is in the shape @@ -349,9 +349,7 @@ def wrapper(*args, area: dict | None = None, **kwargs): if area is not None: area_geodataframe = _area_to_geodataframe(area) if len(args) >= 2: - args = list(args) - args[1] = area_geodataframe - args = tuple(args) + args = (args[0], area_geodataframe, *args[2:]) kwargs.pop("geodataframe", None) else: kwargs["geodataframe"] = area_geodataframe From 117d688258075259cea5883992f09d77386aabb2 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 15 May 2026 14:26:09 +0000 Subject: [PATCH 9/9] Expose area parameter in spatial mask/reduce signatures Agent-Logs-Url: https://github.com/ecmwf/earthkit-transforms/sessions/c3caafdf-dc7c-4612-b4c9-5380d8335e4c Co-authored-by: EddyCMWF <53045993+EddyCMWF@users.noreply.github.com> --- src/earthkit/transforms/spatial/_aggregate.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/earthkit/transforms/spatial/_aggregate.py b/src/earthkit/transforms/spatial/_aggregate.py index 29ced7ae..e9e53384 100644 --- a/src/earthkit/transforms/spatial/_aggregate.py +++ b/src/earthkit/transforms/spatial/_aggregate.py @@ -313,9 +313,7 @@ def _area_to_geodataframe(area: dict) -> gpd.GeoDataFrame: raise ValueError(f"area dictionary is missing required keys: {missing}") if area["west"] > area["east"]: - raise ValueError( - "Areas that cross the anti-meridian (where 'west' > 'east') are not currently supported." - ) + raise ValueError("Areas that cross the anti-meridian (where 'west' > 'east') are not currently supported.") polygon = box(area["west"], area["south"], area["east"], area["north"]) return gpd.GeoDataFrame(geometry=[polygon]) @@ -341,9 +339,7 @@ def wrapper(*args, area: dict | None = None, **kwargs): positional_geodataframe = args[1] if len(args) >= 2 else None keyword_geodataframe = kwargs.get("geodataframe") - if area is not None and ( - positional_geodataframe is not None or keyword_geodataframe is not None - ): + if area is not None and (positional_geodataframe is not None or keyword_geodataframe is not None): raise ValueError("Only one of 'area' or 'geodataframe' may be provided, not both.") if area is not None: @@ -369,6 +365,7 @@ def mask( lon_key: str | None = None, chunk: bool = True, union_geometries: bool = False, + area: dict | None = None, **mask_kwargs, ) -> xr.Dataset | xr.DataArray: """Apply multiple shape masks to some gridded data. @@ -459,6 +456,7 @@ def reduce( dataarray: xr.Dataset | xr.DataArray, geodataframe: gpd.GeoDataFrame | None = None, mask_arrays: xr.DataArray | list[xr.DataArray] | None = None, + area: dict | None = None, **kwargs, ) -> xr.Dataset | xr.DataArray: """Apply a shape object to an xarray.DataArray object using the specified 'how' method.