Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions docs/source/concepts/spatial.rst
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,28 @@ parameter can be used to specify the aggregation method. The default is `mean`.
:no-index:


Using a bounding box instead of a geometry
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Both :func:`mask` and :func:`reduce` accept an ``area`` keyword argument as an
alternative to providing a ``geodataframe``. ``area`` is a dictionary with keys
``"north"``, ``"south"``, ``"east"`` and ``"west"`` that defines a simple
bounding box:

.. code-block:: python

ekt.spatial.reduce(
ds, area={"north": 60, "south": 30, "east": 40, "west": -10}, how="mean"
)

The bounding box is converted internally to a single-polygon GeoDataFrame.
Providing both ``area`` and ``geodataframe`` raises a ``ValueError``.

.. note::
Areas that cross the anti-meridian (where ``west > east``) are not currently
supported.


In addition to the above functions, the spatial module also includes several methods
for computing the intermedieate steps of the aggregation process. These methods are
documented in the API reference guide: :doc:`../autodocs/earthkit.transforms.spatial`
60 changes: 60 additions & 0 deletions docs/source/how-tos/spatial/howto_spatial_mean_era5_area.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Calculate the spatial mean of gridded data over an area/bounding box\n",
"\n",
"This notebook demonstrates how to compute the spatial mean of some sample 2m\n",
"temperature data for a given rectangular area bounding box.\n",
"This is done via the `area` keyword argument, it is a dictionary with keys\n",
"`\"north\"`, `\"south\"`, `\"east\"`, and `\"west\"`."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from earthkit import data as ekd\n",
"from earthkit import transforms as ekt\n",
"from earthkit.transforms._tools import earthkit_remote_test_data_file\n",
"\n",
"remote_era5_file = earthkit_remote_test_data_file(\"era5-Europe-sfc-2m-temperature-3deg-2015-2017.grib\")\n",
"era5_data = ekd.from_source(\"url\", remote_era5_file)\n",
"ds = era5_data.to_xarray()\n",
"\n",
"# Define a bounding box covering central Europe\n",
"area = {\"north\": 55, \"south\": 45, \"east\": 20, \"west\": 5}\n",
"\n",
"# Calculate area mean using the bounding box\n",
"area_mean_bbox = ekt.spatial.reduce(ds, area=area, how=\"mean\", weights=\"latitude\")\n",
"\n",
"area_mean_bbox"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "rtd-dev-env",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.13"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"# Calculate the Area Mean of Gridded Data over a Geometries\n",
"# Calculate the spatial mean of gridded data over geometry data\n",
"\n",
"This notebook demonstrates how to compute the area mean of some sample 2m temperature in ERA5 data for a given geometry (such as an administrative region) using earthkit-data and earthkit-transforms. This is useful for generating regional climate statistics.\n"
"This notebook demonstrates how to compute the spatial mean of some sample 2m temperature in ERA5 data for a given geometry (such as an administrative region) using earthkit-data and earthkit-transforms. This is useful for generating regional climate statistics.\n"
]
},
{
Expand Down
3 changes: 2 additions & 1 deletion docs/source/how-tos/spatial/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,5 @@ subsets of data.
:maxdepth: 1

howto_mask_era5_with_geometry.ipynb
howto_area_mean_era5_geometry.ipynb
howto_spatial_mean_era5_geometry.ipynb
howto_spatial_mean_era5_area.ipynb
Comment thread
EddyCMWF marked this conversation as resolved.
92 changes: 88 additions & 4 deletions src/earthkit/transforms/spatial/_aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import functools
import logging
import typing as T
from copy import deepcopy
Expand Down Expand Up @@ -151,7 +152,7 @@ def mask_contains_points(
)

# get spatial dims and create output array:
spatial_dims = list(set(lat_dims + lon_dims))
spatial_dims = list(dict.fromkeys(lat_dims + lon_dims))
outdata_shape = tuple(len(coords[dim]) for dim in spatial_dims)
outdata = xp.full(outdata_shape, xp.nan)
# loop over shapes and mask any point that is in the shape
Expand Down Expand Up @@ -289,15 +290,82 @@ def get_mask_dim_index(
return mask_dim_index


def _area_to_geodataframe(area: dict) -> gpd.GeoDataFrame:
"""Convert an area dictionary to a GeoDataFrame with a bounding box polygon.

Parameters
----------
area : dict
Dictionary with keys ``"north"``, ``"south"``, ``"east"``, ``"west"``
defining the bounding box.

Returns
-------
gpd.GeoDataFrame
A GeoDataFrame containing a single bounding-box polygon.

"""
from shapely.geometry import box

required_keys = {"north", "south", "east", "west"}
missing = required_keys - set(area)
if missing:
raise ValueError(f"area dictionary is missing required keys: {missing}")

Comment thread
EddyCMWF marked this conversation as resolved.
if area["west"] > area["east"]:
raise ValueError("Areas that cross the anti-meridian (where 'west' > 'east') are not currently supported.")

polygon = box(area["west"], area["south"], area["east"], area["north"])
return gpd.GeoDataFrame(geometry=[polygon])


def area_to_geodataframe_decorator(func):
"""Decorator that converts an ``area`` kwarg to a ``geodataframe`` kwarg.

If ``area`` is provided as a dictionary with keys
``{"north", "south", "east", "west"}``, it is converted to a
`geopandas.GeoDataFrame` with a single bounding-box polygon and passed
as the ``geodataframe`` argument.

Raises ``ValueError`` if both ``area`` and ``geodataframe`` are provided.

.. note::
Areas that cross the anti-meridian (where ``west > east``) are not
currently supported.
"""

@functools.wraps(func)
def wrapper(*args, area: dict | None = None, **kwargs):
positional_geodataframe = args[1] if len(args) >= 2 else None
keyword_geodataframe = kwargs.get("geodataframe")

if area is not None and (positional_geodataframe is not None or keyword_geodataframe is not None):
raise ValueError("Only one of 'area' or 'geodataframe' may be provided, not both.")
Comment thread
EddyCMWF marked this conversation as resolved.

if area is not None:
area_geodataframe = _area_to_geodataframe(area)
if len(args) >= 2:
args = (args[0], area_geodataframe, *args[2:])
kwargs.pop("geodataframe", None)
else:
kwargs["geodataframe"] = area_geodataframe

return func(*args, **kwargs)
Comment thread
EddyCMWF marked this conversation as resolved.

return wrapper


@format_handler()
@area_to_geodataframe_decorator
def mask(
dataarray: xr.Dataset | xr.DataArray,
geodataframe: gpd.geodataframe.GeoDataFrame,
geodataframe: gpd.geodataframe.GeoDataFrame | None = None,
mask_dim: str | None = None,
lat_key: str | None = None,
lon_key: str | None = None,
chunk: bool = True,
union_geometries: bool = False,
area: dict | None = None,
**mask_kwargs,
) -> xr.Dataset | xr.DataArray:
"""Apply multiple shape masks to some gridded data.
Expand All @@ -313,7 +381,13 @@ def mask(
dataarray :
Xarray data object (must have geospatial coordinates).
geodataframe :
Geopandas Dataframe containing the polygons for aggregations
Geopandas Dataframe containing the polygons for aggregations.
Either ``geodataframe`` or ``area`` must be provided, but not both.
area : dict, optional
Dictionary with keys ``"north"``, ``"south"``, ``"east"``, ``"west"``
defining a bounding box. Converted to a single-polygon GeoDataFrame
internally. Areas that cross the anti-meridian (``west > east``) are
not currently supported.
mask_dim :
dimension that will be created to accommodate the masked arrays, default is the index
of the geodataframe
Expand Down Expand Up @@ -342,6 +416,8 @@ def mask(
Each slice of layer corresponds to a feature in layer.

"""
if geodataframe is None:
raise ValueError("Either 'geodataframe' or 'area' must be provided.")
spatial_info = get_spatial_info(dataarray, lat_key=lat_key, lon_key=lon_key)
# Get spatial info required by mask functions:
mask_kwargs = {**mask_kwargs, **{key: spatial_info[key] for key in ["lat_key", "lon_key", "regular"]}}
Expand Down Expand Up @@ -375,10 +451,12 @@ def mask(


@format_handler()
@area_to_geodataframe_decorator
def reduce(
dataarray: xr.Dataset | xr.DataArray,
geodataframe: gpd.GeoDataFrame | None = None,
mask_arrays: xr.DataArray | list[xr.DataArray] | None = None,
area: dict | None = None,
**kwargs,
) -> xr.Dataset | xr.DataArray:
"""Apply a shape object to an xarray.DataArray object using the specified 'how' method.
Expand All @@ -390,7 +468,13 @@ def reduce(
dataarray :
Xarray data object (must have geospatial coordinates).
geodataframe :
Geopandas Dataframe containing the polygons for aggregations
Geopandas Dataframe containing the polygons for aggregations.
Cannot be provided together with ``area``.
area : dict, optional
Dictionary with keys ``"north"``, ``"south"``, ``"east"``, ``"west"``
defining a bounding box. Converted to a single-polygon GeoDataFrame
internally. Areas that cross the anti-meridian (``west > east``) are
not currently supported.
mask_arrays :
precomputed mask array[s], if provided this will be used instead of creating a new mask.
They must be on the same spatial grid as the dataarray.
Expand Down
103 changes: 103 additions & 0 deletions tests/test_30_spatial.py
Original file line number Diff line number Diff line change
Expand Up @@ -965,3 +965,106 @@ def test_spatial_reduce_precomputed_mask_irr_list_two_masks(convention):
result = spatial.reduce(_MCP_DA, mask_arrays=[mask_a, mask_b], how="mean", lat_key="latitude", lon_key="longitude")
np.testing.assert_allclose(result.isel(index=0).item(), 3.0)
np.testing.assert_allclose(result.isel(index=1).item(), 1.0)


# ---------------------------------------------------------------------------
# area kwarg tests — no network required
# ---------------------------------------------------------------------------

_AREA_FULL = {"north": 90, "south": 0, "east": 90, "west": 0}


def test_area_to_geodataframe_creates_geodataframe():
"""_area_to_geodataframe returns a GeoDataFrame with a single polygon."""
gdf = _spatial._area_to_geodataframe(_AREA_FULL)
assert isinstance(gdf, gpd.GeoDataFrame)
assert len(gdf) == 1
bounds = gdf.geometry[0].bounds # (minx, miny, maxx, maxy)
assert bounds == (0, 0, 90, 90)


def test_area_to_geodataframe_missing_keys():
"""_area_to_geodataframe raises ValueError if keys are missing."""
with pytest.raises(ValueError, match="missing required keys"):
_spatial._area_to_geodataframe({"north": 90, "south": 0})


def test_reduce_with_area_kwarg():
"""spatial.reduce with area= returns correct result."""
# SAMPLE_ARRAY: lat=[0,60,90], lon=[0,30,60,90], values = rows of 1,2,3
# area covers lat=0 and lat=60 (mean of 1 and 2 = 1.5)
# Note: shapely contains_xy excludes points on the boundary of the polygon,
# so use north/east=90 to exclude the lat=90/lon=90 boundary points.
area = {"north": 90, "south": -1, "east": 90, "west": -1}
result = spatial.reduce(SAMPLE_ARRAY, area=area, how="mean")
assert isinstance(result, xr.DataArray)
np.testing.assert_allclose(result.item(), 1.5)
Comment thread
EddyCMWF marked this conversation as resolved.

Comment thread
EddyCMWF marked this conversation as resolved.

def test_reduce_with_area_subset():
"""spatial.reduce with area= covering a subset returns correct mean."""
# Only lat=60 (row with value 2), all longitudes
# shapely.contains_xy is strict (boundary excluded), so use 59-61 range
area = {"north": 61, "south": 59, "east": 91, "west": -1}
result = spatial.reduce(SAMPLE_ARRAY, area=area, how="mean")
np.testing.assert_allclose(result.item(), 2.0)


def test_mask_with_area_kwarg():
"""spatial.mask with area= returns masked data."""
area = {"north": 91, "south": 59, "east": 91, "west": -1}
result = spatial.mask(SAMPLE_ARRAY, area=area, chunk=False)
assert isinstance(result, xr.DataArray)
# lat=0 row should be all NaN (outside area)
assert np.all(np.isnan(result.sel(latitude=0).values))
# lat=60 row should be preserved
assert np.all(result.sel(latitude=60).values == 2)


def test_area_and_geodataframe_raises_reduce():
"""spatial.reduce raises ValueError if both area and geodataframe are provided."""
gdf = gpd.GeoDataFrame(geometry=[Polygon([(0, 0), (0, 1), (1, 1), (1, 0)])])
with pytest.raises(ValueError, match="Only one of"):
spatial.reduce(SAMPLE_ARRAY, geodataframe=gdf, area=_AREA_FULL, how="mean")


def test_area_and_geodataframe_raises_mask():
"""spatial.mask raises ValueError if both area and geodataframe are provided."""
gdf = gpd.GeoDataFrame(geometry=[Polygon([(0, 0), (0, 1), (1, 1), (1, 0)])])
with pytest.raises(ValueError, match="Only one of"):
spatial.mask(SAMPLE_ARRAY, geodataframe=gdf, area=_AREA_FULL)


def test_area_and_positional_geodataframe_raises_reduce():
"""spatial.reduce raises ValueError if area and positional geodataframe are both given."""
gdf = gpd.GeoDataFrame(geometry=[Polygon([(0, 0), (0, 1), (1, 1), (1, 0)])])
with pytest.raises(ValueError, match="Only one of"):
spatial.reduce(SAMPLE_ARRAY, gdf, area=_AREA_FULL, how="mean")


def test_area_and_positional_geodataframe_raises_mask():
"""spatial.mask raises ValueError if area and positional geodataframe are both given."""
gdf = gpd.GeoDataFrame(geometry=[Polygon([(0, 0), (0, 1), (1, 1), (1, 0)])])
with pytest.raises(ValueError, match="Only one of"):
spatial.mask(SAMPLE_ARRAY, gdf, area=_AREA_FULL)


def test_mask_no_geodataframe_no_area_raises():
"""spatial.mask raises ValueError if neither geodataframe nor area is provided."""
with pytest.raises(ValueError, match="Either"):
spatial.mask(SAMPLE_ARRAY)


def test_area_none_passthrough_reduce():
"""spatial.reduce with area=None (default) behaves normally."""
result = spatial.reduce(SAMPLE_ARRAY, how="mean")
np.testing.assert_allclose(result.item(), 2.0)


def test_reduce_dataset_with_area():
"""spatial.reduce on a Dataset with area= returns a Dataset."""
ds = xr.Dataset({"var": SAMPLE_ARRAY})
area = {"north": 91, "south": -1, "east": 91, "west": -1}
result = spatial.reduce(ds, area=area, how="mean")
assert isinstance(result, xr.Dataset)
assert "var" in result
Loading