From ed6bb13fa410f53f34c0083cbdaa499251f38c23 Mon Sep 17 00:00:00 2001 From: Pawel Wolff Date: Fri, 17 Apr 2026 15:14:46 +0200 Subject: [PATCH 1/7] Auxiliary coordinates feature --- docs/source/how-tos/xr_engine/index.rst | 1 + .../xr_engine/xarray_engine_aux_coords.ipynb | 4623 +++++++++++++++++ src/earthkit/data/indexing/xarray.py | 7 +- src/earthkit/data/xr_engine/builder.py | 62 + src/earthkit/data/xr_engine/engine.py | 8 +- src/earthkit/data/xr_engine/profile.py | 18 +- 6 files changed, 4714 insertions(+), 5 deletions(-) create mode 100644 docs/source/how-tos/xr_engine/xarray_engine_aux_coords.ipynb diff --git a/docs/source/how-tos/xr_engine/index.rst b/docs/source/how-tos/xr_engine/index.rst index 169f4f773..6a5311bd2 100644 --- a/docs/source/how-tos/xr_engine/index.rst +++ b/docs/source/how-tos/xr_engine/index.rst @@ -23,6 +23,7 @@ Xarray engine xarray_engine_dims_as_attrs.ipynb xarray_engine_extra_dims.ipynb xarray_engine_remapping.ipynb + xarray_engine_aux_coords.ipynb xarray_engine_holes.ipynb xarray_engine_chunks.ipynb xarray_engine_chunks_on_dask_cluster.ipynb diff --git a/docs/source/how-tos/xr_engine/xarray_engine_aux_coords.ipynb b/docs/source/how-tos/xr_engine/xarray_engine_aux_coords.ipynb new file mode 100644 index 000000000..a14f9278b --- /dev/null +++ b/docs/source/how-tos/xr_engine/xarray_engine_aux_coords.ipynb @@ -0,0 +1,4623 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "c2feafcc-430b-4718-983f-554e55dcd54a", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Xarray engine: auxiliary coordinates" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "1a6e355d-3fbf-4d92-b32f-a9d7e770f9db", + "metadata": { + "editable": true, + "scrolled": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "import earthkit.data as ekd" + ] + }, + { + "cell_type": "markdown", + "id": "f557ebaf-0c93-4eab-8523-8e38b931c054", + "metadata": {}, + "source": [ + "### Basic examples" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "fc6598cb-5516-424d-ba01-a8ee22a120cc", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " " + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
0t2024-06-03 00:00:002024-06-030 days 00:00:00700pressure0regular_ll
1r2024-06-03 00:00:002024-06-030 days 00:00:00700pressure0regular_ll
2t2024-06-03 00:00:002024-06-030 days 00:00:00500pressure0regular_ll
3r2024-06-03 00:00:002024-06-030 days 00:00:00500pressure0regular_ll
4t2024-06-03 06:00:002024-06-030 days 06:00:00700pressure0regular_ll
\n", + "
" + ], + "text/plain": [ + " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", + "0 t 2024-06-03 00:00:00 2024-06-03 0 days 00:00:00 \n", + "1 r 2024-06-03 00:00:00 2024-06-03 0 days 00:00:00 \n", + "2 t 2024-06-03 00:00:00 2024-06-03 0 days 00:00:00 \n", + "3 r 2024-06-03 00:00:00 2024-06-03 0 days 00:00:00 \n", + "4 t 2024-06-03 06:00:00 2024-06-03 0 days 06:00:00 \n", + "\n", + " vertical.level vertical.level_type ensemble.member geography.grid_type \n", + "0 700 pressure 0 regular_ll \n", + "1 700 pressure 0 regular_ll \n", + "2 500 pressure 0 regular_ll \n", + "3 500 pressure 0 regular_ll \n", + "4 700 pressure 0 regular_ll " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds_fl = ekd.from_source(\"sample\", \"pl.grib\").to_fieldlist()\n", + "ds_fl.ls().head()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "8856dcff-31ec-4c39-8725-a6f5e37e1065", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 176kB\n",
+       "Dimensions:                  (forecast_reference_time: 4, step: 2, level: 2,\n",
+       "                              latitude: 19, longitude: 36)\n",
+       "Coordinates:\n",
+       "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 32B 202...\n",
+       "    expver                   (forecast_reference_time) <U4 64B '0001' ... '0001'\n",
+       "  * step                     (step) timedelta64[ns] 16B 00:00:00 06:00:00\n",
+       "  * level                    (level) int64 16B 500 700\n",
+       "  * latitude                 (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n",
+       "  * longitude                (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n",
+       "Data variables:\n",
+       "    r                        (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n",
+       "    t                        (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n",
+       "Attributes:\n",
+       "    Conventions:  CF-1.8\n",
+       "    institution:  ECMWF
" + ], + "text/plain": [ + " Size: 176kB\n", + "Dimensions: (forecast_reference_time: 4, step: 2, level: 2,\n", + " latitude: 19, longitude: 36)\n", + "Coordinates:\n", + " * forecast_reference_time (forecast_reference_time) datetime64[ns] 32B 202...\n", + " expver (forecast_reference_time) \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 176kB\n",
+       "Dimensions:                  (forecast_reference_time: 4, step: 2, level: 2,\n",
+       "                              latitude: 19, longitude: 36)\n",
+       "Coordinates:\n",
+       "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 32B 202...\n",
+       "  * step                     (step) timedelta64[ns] 16B 00:00:00 06:00:00\n",
+       "    centre_and_expver        (forecast_reference_time, step) <U9 288B 'ecmf_0...\n",
+       "  * level                    (level) int64 16B 500 700\n",
+       "  * latitude                 (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n",
+       "  * longitude                (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n",
+       "Data variables:\n",
+       "    r                        (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n",
+       "    t                        (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n",
+       "Attributes:\n",
+       "    Conventions:  CF-1.8\n",
+       "    institution:  ECMWF
" + ], + "text/plain": [ + " Size: 176kB\n", + "Dimensions: (forecast_reference_time: 4, step: 2, level: 2,\n", + " latitude: 19, longitude: 36)\n", + "Coordinates:\n", + " * forecast_reference_time (forecast_reference_time) datetime64[ns] 32B 202...\n", + " * step (step) timedelta64[ns] 16B 00:00:00 06:00:00\n", + " centre_and_expver (forecast_reference_time, step) \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 176kB\n",
+       "Dimensions:                  (variable: 2, forecast_reference_time: 4, step: 2,\n",
+       "                              level: 2, latitude: 19, longitude: 36)\n",
+       "Coordinates:\n",
+       "  * variable                 (variable) <U1 8B 'r' 't'\n",
+       "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 32B 202...\n",
+       "  * step                     (step) timedelta64[ns] 16B 00:00:00 06:00:00\n",
+       "    centre_and_expver        (forecast_reference_time, step) <U9 288B 'ecmf_0...\n",
+       "  * level                    (level) int64 16B 500 700\n",
+       "  * latitude                 (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n",
+       "  * longitude                (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n",
+       "Data variables:\n",
+       "    data                     (variable, forecast_reference_time, step, level, latitude, longitude) float64 175kB ...\n",
+       "Attributes:\n",
+       "    Conventions:  CF-1.8\n",
+       "    institution:  ECMWF
" + ], + "text/plain": [ + " Size: 176kB\n", + "Dimensions: (variable: 2, forecast_reference_time: 4, step: 2,\n", + " level: 2, latitude: 19, longitude: 36)\n", + "Coordinates:\n", + " * variable (variable) \n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
metadata.shortNamemetadata.dataDatemetadata.dataTimemetadata.stepRangemetadata.dataTypemetadata.quantilemetadata.numbermetadata.numberOfForecastsInEnsemble
02tp2025120900-168pd1:313
12tp2025120900-168pd1:515
22tp2025120900-168pd1:10110
32tp2025120900-168pd2:323
42tp2025120900-168pd2:525
52tp2025120900-168pd2:10210
62tp2025120900-168pd3:333
72tp2025120900-168pd3:535
82tp2025120900-168pd3:10310
92tp2025120900-168pd4:545
102tp2025120900-168pd4:10410
112tp2025120900-168pd5:555
122tp2025120900-168pd5:10510
132tp2025120900-168pd6:10610
142tp2025120900-168pd7:10710
152tp2025120900-168pd8:10810
162tp2025120900-168pd9:10910
172tp2025120900-168pd10:101010
\n", + "" + ], + "text/plain": [ + " metadata.shortName metadata.dataDate metadata.dataTime \\\n", + "0 2tp 20251209 0 \n", + "1 2tp 20251209 0 \n", + "2 2tp 20251209 0 \n", + "3 2tp 20251209 0 \n", + "4 2tp 20251209 0 \n", + "5 2tp 20251209 0 \n", + "6 2tp 20251209 0 \n", + "7 2tp 20251209 0 \n", + "8 2tp 20251209 0 \n", + "9 2tp 20251209 0 \n", + "10 2tp 20251209 0 \n", + "11 2tp 20251209 0 \n", + "12 2tp 20251209 0 \n", + "13 2tp 20251209 0 \n", + "14 2tp 20251209 0 \n", + "15 2tp 20251209 0 \n", + "16 2tp 20251209 0 \n", + "17 2tp 20251209 0 \n", + "\n", + " metadata.stepRange metadata.dataType metadata.quantile metadata.number \\\n", + "0 0-168 pd 1:3 1 \n", + "1 0-168 pd 1:5 1 \n", + "2 0-168 pd 1:10 1 \n", + "3 0-168 pd 2:3 2 \n", + "4 0-168 pd 2:5 2 \n", + "5 0-168 pd 2:10 2 \n", + "6 0-168 pd 3:3 3 \n", + "7 0-168 pd 3:5 3 \n", + "8 0-168 pd 3:10 3 \n", + "9 0-168 pd 4:5 4 \n", + "10 0-168 pd 4:10 4 \n", + "11 0-168 pd 5:5 5 \n", + "12 0-168 pd 5:10 5 \n", + "13 0-168 pd 6:10 6 \n", + "14 0-168 pd 7:10 7 \n", + "15 0-168 pd 8:10 8 \n", + "16 0-168 pd 9:10 9 \n", + "17 0-168 pd 10:10 10 \n", + "\n", + " metadata.numberOfForecastsInEnsemble \n", + "0 3 \n", + "1 5 \n", + "2 10 \n", + "3 3 \n", + "4 5 \n", + "5 10 \n", + "6 3 \n", + "7 5 \n", + "8 10 \n", + "9 5 \n", + "10 10 \n", + "11 5 \n", + "12 10 \n", + "13 10 \n", + "14 10 \n", + "15 10 \n", + "16 10 \n", + "17 10 " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds_fl2.ls(\n", + " keys=[\n", + " \"metadata.shortName\",\n", + " \"metadata.dataDate\",\n", + " \"metadata.dataTime\",\n", + " \"metadata.stepRange\",\n", + " \"metadata.dataType\",\n", + " \"metadata.quantile\",\n", + " \"metadata.number\",\n", + " \"metadata.numberOfForecastsInEnsemble\",\n", + " ]\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "3ea48ddf-3fc0-455b-b381-e3c8b2a3debe", + "metadata": {}, + "source": [ + "Note that, in this context, the usual meaning of the metadata key ``\"number\"`` (and the related ``\"numberOfForecastsInEnsemble\"``) is overridden by ``\"quantile\"``. As a result, the ensemble dimension normally derived from ``\"number\"`` is no longer applicable.\n", + "\n", + "For this reason, we must:\n", + "- declare ``\"quantile\"`` as an extra dimension, and\n", + "- remove the predefined ensemble dimension ``\"number\"``, since it would otherwise conflict with the ``\"quantile\"`` dimension.\n", + "\n", + "Still, it might be useful to keep the information carried by ``\"number\"`` and ``\"numberOfForecastsInEnsemble\"`` is auxiliary coordinates." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "cd65d5ce-b511-4c12-88f7-f64f5b0c18e7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 13kB\n",
+       "Dimensions:                  (quantile: 18, forecast_reference_time: 1,\n",
+       "                              step: 1, level: 1, level_type: 1, latitude: 7,\n",
+       "                              longitude: 12)\n",
+       "Coordinates:\n",
+       "  * quantile                 (quantile) <U5 360B '10:10' '1:10' ... '9:10'\n",
+       "    quantile_rank            (quantile) <U2 144B '10' '1' '1' ... '7' '8' '9'\n",
+       "    nquantiles               (quantile) int64 144B 10 10 3 5 10 ... 10 10 10 10\n",
+       "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 8B 2025...\n",
+       "  * step                     (step) timedelta64[ns] 8B 7 days\n",
+       "  * level                    (level) int64 8B 0\n",
+       "  * level_type               (level_type) <U7 28B 'surface'\n",
+       "  * latitude                 (latitude) float64 56B 90.0 60.0 ... -60.0 -90.0\n",
+       "  * longitude                (longitude) float64 96B 0.0 30.0 ... 300.0 330.0\n",
+       "Data variables:\n",
+       "    2tp                      (quantile, forecast_reference_time, step, level, level_type, latitude, longitude) float64 12kB ...\n",
+       "Attributes:\n",
+       "    Conventions:  CF-1.8\n",
+       "    institution:  ECMWF
" + ], + "text/plain": [ + " Size: 13kB\n", + "Dimensions: (quantile: 18, forecast_reference_time: 1,\n", + " step: 1, level: 1, level_type: 1, latitude: 7,\n", + " longitude: 12)\n", + "Coordinates:\n", + " * quantile (quantile) \n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
0avg_2t2010-07-012010-06-0130 days2height_above_ground_level0regular_ll
1avg_2t2010-08-012010-07-0131 days2height_above_ground_level0regular_ll
\n", + "" + ], + "text/plain": [ + " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", + "0 avg_2t 2010-07-01 2010-06-01 30 days \n", + "1 avg_2t 2010-08-01 2010-07-01 31 days \n", + "\n", + " vertical.level vertical.level_type ensemble.member \\\n", + "0 2 height_above_ground_level 0 \n", + "1 2 height_above_ground_level 0 \n", + "\n", + " geography.grid_type \n", + "0 regular_ll \n", + "1 regular_ll " + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds_fl3 = ekd.from_source(\"file\", \"/Users/ecm8620/data/issue-948-avg_2t-2months.grib2\").to_fieldlist()\n", + "ds_fl3.ls()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "fdcac0e0-f501-4cd5-bf2b-59c9d760d8e7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 22kB\n",
+       "Dimensions:                  (forecast_reference_time: 2, latitude: 33,\n",
+       "                              longitude: 41)\n",
+       "Coordinates:\n",
+       "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 16B 201...\n",
+       "    step                     (forecast_reference_time) timedelta64[ns] 16B 30...\n",
+       "  * latitude                 (latitude) float64 264B 55.0 54.75 ... 47.25 47.0\n",
+       "  * longitude                (longitude) float64 328B 5.0 5.25 ... 14.75 15.0\n",
+       "Data variables:\n",
+       "    avg_2t                   (forecast_reference_time, latitude, longitude) float64 22kB ...\n",
+       "Attributes:\n",
+       "    Conventions:  CF-1.8\n",
+       "    institution:  ECMWF
" + ], + "text/plain": [ + " Size: 22kB\n", + "Dimensions: (forecast_reference_time: 2, latitude: 33,\n", + " longitude: 41)\n", + "Coordinates:\n", + " * forecast_reference_time (forecast_reference_time) datetime64[ns] 16B 201...\n", + " step (forecast_reference_time) timedelta64[ns] 16B 30...\n", + " * latitude (latitude) float64 264B 55.0 54.75 ... 47.25 47.0\n", + " * longitude (longitude) float64 328B 5.0 5.25 ... 14.75 15.0\n", + "Data variables:\n", + " avg_2t (forecast_reference_time, latitude, longitude) float64 22kB ...\n", + "Attributes:\n", + " Conventions: CF-1.8\n", + " institution: ECMWF" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds5 = ds_fl3.to_xarray(\n", + " drop_dims=\"step\",\n", + " aux_coords={\"step\": (\"time.step\", (\"forecast_reference_time\",))},\n", + ")\n", + "ds5.load()" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "aebe01e5-0a1f-4871-82d8-9baa2fb4272d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 22kB\n",
+       "Dimensions:     (s: 1, valid_time: 2, latitude: 33, longitude: 41)\n",
+       "Coordinates:\n",
+       "  * s           (s) <U4 16B '0001'\n",
+       "  * valid_time  (valid_time) datetime64[ns] 16B 2010-07-01 2010-08-01\n",
+       "  * latitude    (latitude) float64 264B 55.0 54.75 54.5 ... 47.5 47.25 47.0\n",
+       "  * longitude   (longitude) float64 328B 5.0 5.25 5.5 5.75 ... 14.5 14.75 15.0\n",
+       "Data variables:\n",
+       "    avg_2t      (s, valid_time, latitude, longitude) float64 22kB 284.2 ... 2...\n",
+       "Attributes:\n",
+       "    Conventions:  CF-1.8\n",
+       "    institution:  ECMWF
" + ], + "text/plain": [ + " Size: 22kB\n", + "Dimensions: (s: 1, valid_time: 2, latitude: 33, longitude: 41)\n", + "Coordinates:\n", + " * s (s) a Coord object + possibly the same for "valid_time" + + self.collect_aux_coords() + # build variable and global attributes xr_attrs = self.profile.attrs.builder.build(self.ds, var_builders, rename=True) xr_coords = self.coords() diff --git a/src/earthkit/data/xr_engine/engine.py b/src/earthkit/data/xr_engine/engine.py index dbd788c8e..009170199 100644 --- a/src/earthkit/data/xr_engine/engine.py +++ b/src/earthkit/data/xr_engine/engine.py @@ -39,6 +39,7 @@ def open_dataset( add_valid_time_coord=None, decode_times=None, decode_timedelta=None, + aux_coords=None, add_geo_coords=None, attrs_mode=None, attrs=None, @@ -256,6 +257,9 @@ def open_dataset( will have the attribute "units" appropriately set (to "minutes", "hours", etc.). If None (default), assume the same value of ``decode_times`` unless the ``profile`` overwrites it. + aux_coords: dict, None + Mapping from an auxiliary coordinate label metadata keys to a tuple: + (metadata key, the dataset dimension(s)). The default value is None. add_geo_coords: bool, None If True, add geographic coordinates to the dataset when field values are represented by a single "values" dimension. Its default value (None) expands @@ -312,8 +316,8 @@ def open_dataset( Define fill values to metadata keys. Default is None. remapping: dict, None Define new metadata keys for indexing. Any key provided in ``remapping`` may be referenced - when specifying options such as ``variable_key``, ``extra_dims``, ``ensure_dims``, and others. - Default is None. + when specifying options such as ``variable_key``, ``extra_dims``, ``ensure_dims``, ``aux_coords`` + and others. Default is None. lazy_load: bool, None If True, the resulting Dataset will load data lazily from the underlying data source. If False, a DataSet holding all the data in memory diff --git a/src/earthkit/data/xr_engine/profile.py b/src/earthkit/data/xr_engine/profile.py index 459e08523..93738a190 100644 --- a/src/earthkit/data/xr_engine/profile.py +++ b/src/earthkit/data/xr_engine/profile.py @@ -40,6 +40,21 @@ def add(self, remapping, patch=None): self.patch.update(patch) +class AuxCoords(dict): + def __init__(self, aux_coords): + super().__init__() + for coord_label, key_dims in ensure_dict(aux_coords).items(): + try: + key, dims = key_dims + dims = ensure_iterable(dims) + except Exception: + raise ValueError( + f"Auxiliary coordinate {coord_label} has invalid specification: got {key_dims} " + f"while a tuple (, ) is expected" + ) + self[coord_label] = (key, dims) + + class ProfileConf: def __init__(self): self._conf = {} @@ -184,7 +199,7 @@ def check(self, profile): class Profile: - USER_ONLY_OPTIONS = ["remapping", "patch", "fill_metadata"] + USER_ONLY_OPTIONS = ["remapping", "patch", "fill_metadata", "aux_coords"] DEFAULT_PROFILE_NAME = "earthkit" def __init__( @@ -213,6 +228,7 @@ def __init__( patch[k] = v self.remapping = RemappingBuilder(kwargs.pop("remapping", None), patch) + self.aux_coords = AuxCoords(kwargs.pop("aux_coords", None)) # variables mono_variable = kwargs.pop("mono_variable") From bd10f88dbaee3534faacd545ad511669402e176a Mon Sep 17 00:00:00 2001 From: Pawel Wolff Date: Tue, 21 Apr 2026 23:43:40 +0200 Subject: [PATCH 2/7] ListDiff._compare improved and now provides a sensible error when data do not form a hypercube Tests added --- src/earthkit/data/xr_engine/diff.py | 17 +- tests/xr_engine/test_xr_engine_aux_coords.py | 177 +++++++++++++++++++ 2 files changed, 179 insertions(+), 15 deletions(-) create mode 100644 tests/xr_engine/test_xr_engine_aux_coords.py diff --git a/src/earthkit/data/xr_engine/diff.py b/src/earthkit/data/xr_engine/diff.py index 7ca07cf97..f6436b879 100644 --- a/src/earthkit/data/xr_engine/diff.py +++ b/src/earthkit/data/xr_engine/diff.py @@ -7,7 +7,6 @@ # nor does it submit to any jurisdiction. # -import datetime import logging import math @@ -79,26 +78,14 @@ class ListDiff: @staticmethod def _compare(v1, v2): - if isinstance(v1, int) and isinstance(v2, int): - return v1 == v2, ListDiff.VALUE_DIFF - elif isinstance(v1, float) and isinstance(v2, float): + if isinstance(v1, float) and isinstance(v2, float): return math.isclose(v1, v2, rel_tol=1e-9), ListDiff.VALUE_DIFF - elif isinstance(v1, str) and isinstance(v2, str): - return v1 == v2, ListDiff.VALUE_DIFF - elif isinstance(v1, datetime.datetime) and isinstance(v2, datetime.datetime): - return v1 == v2, ListDiff.VALUE_DIFF - elif isinstance(v1, datetime.date) and isinstance(v2, datetime.date): - return v1 == v2, ListDiff.VALUE_DIFF - elif isinstance(v1, datetime.time) and isinstance(v2, datetime.time): - return v1 == v2, ListDiff.VALUE_DIFF - elif isinstance(v1, datetime.timedelta) and isinstance(v2, datetime.timedelta): - return v1 == v2, ListDiff.VALUE_DIFF elif v1 is None and v2 is None: return True, ListDiff.VALUE_DIFF elif type(v1) is not type(v2): return False, ListDiff.TYPE_DIFF else: - raise ValueError(f"Unsupported type: {type(v1)}") + return v1 == v2, ListDiff.VALUE_DIFF @staticmethod def diff(vals1, vals2, name=str()): diff --git a/tests/xr_engine/test_xr_engine_aux_coords.py b/tests/xr_engine/test_xr_engine_aux_coords.py new file mode 100644 index 000000000..ffcaa7220 --- /dev/null +++ b/tests/xr_engine/test_xr_engine_aux_coords.py @@ -0,0 +1,177 @@ +#!/usr/bin/env python3 + +# (C) Copyright 2020 ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. +# + +import numpy as np +import pytest + +from earthkit.data import from_source +from earthkit.data.utils.testing import earthkit_remote_test_data_file + + +@pytest.mark.cache +@pytest.mark.parametrize("lazy_load", [True, False]) +def test_xr_engine_aux_coords_simple(lazy_load): + """aux_coords with a single metadata key mapped to a single dimension.""" + fl = from_source("url", earthkit_remote_test_data_file("xr_engine/level/pl_small.grib")).to_fieldlist() + ds = fl.to_xarray( + aux_coords={"centre": ("metadata.centre", "forecast_reference_time")}, + lazy_load=lazy_load, + ) + + assert "centre" in ds.coords + assert "centre" not in ds.sizes + assert ds["centre"].dims == ("forecast_reference_time",) + assert (ds["centre"] == "ecmf").all() + + +@pytest.mark.cache +@pytest.mark.parametrize("lazy_load", [True, False]) +def test_xr_engine_aux_coords_multi_dim(lazy_load): + """aux_coords mapped to multiple dimensions.""" + fl = from_source("url", earthkit_remote_test_data_file("xr_engine/level/pl_small.grib")).to_fieldlist() + ds = fl.to_xarray( + aux_coords={"centre": ("metadata.centre", ("forecast_reference_time", "step"))}, + lazy_load=lazy_load, + ) + + assert "centre" in ds.coords + assert "centre" not in ds.sizes + assert ds["centre"].dims == ("forecast_reference_time", "step") + assert (ds["centre"] == "ecmf").all() + + +@pytest.mark.cache +@pytest.mark.parametrize("lazy_load", [True, False]) +def test_xr_engine_aux_coords_with_remapping(lazy_load): + """aux_coords using a remapped key.""" + ds0 = from_source("url", earthkit_remote_test_data_file("xr_engine/level/pl_small.grib")).to_fieldlist() + ds = ds0.to_xarray( + remapping={"centre_class": "{metadata.centre}_{metadata.class}"}, + aux_coords={"centre_class": ("centre_class", ("forecast_reference_time", "step"))}, + lazy_load=lazy_load, + ) + + assert "centre_class" in ds.coords + assert "centre_class" not in ds.sizes + assert ds["centre_class"].dims == ("forecast_reference_time", "step") + assert (ds["centre_class"] == "ecmf_od").all() + + +@pytest.mark.cache +@pytest.mark.parametrize("lazy_load", [True, False]) +def test_xr_engine_aux_coords_multiple_coords(lazy_load): + """Multiple aux_coords specified at once.""" + ds0 = from_source("url", earthkit_remote_test_data_file("xr_engine/level/pl_small.grib")).to_fieldlist() + ds = ds0.to_xarray( + profile="mars", + aux_coords={ + "centre": ("metadata.centre", "forecast_reference_time"), + "class_coord": ("metadata.class", "forecast_reference_time"), + }, + lazy_load=lazy_load, + ) + + assert "centre" in ds.coords + assert "class_coord" in ds.coords + assert "centre" not in ds.sizes + assert "class_coord" not in ds.sizes + assert ds["centre"].dims == ("forecast_reference_time",) + assert ds["class_coord"].dims == ("forecast_reference_time",) + assert (ds["centre"] == "ecmf").all() + assert (ds["class_coord"] == "od").all() + + +@pytest.mark.cache +def test_xr_engine_aux_coords_unknown_dim(): + """aux_coords referencing a non-existent dimension should raise.""" + fl = from_source("url", earthkit_remote_test_data_file("xr_engine/level/pl_small.grib")).to_fieldlist() + with pytest.raises(AssertionError, match="unknown dimension"): + fl.to_xarray(aux_coords={"centre": ("metadata.centre", "nonexistent_dim")}) + + +def test_xr_engine_aux_coords_invalid_spec(): + """aux_coords with invalid tuple specification should raise ValueError.""" + from earthkit.data.xr_engine.profile import AuxCoords + + with pytest.raises(ValueError, match="invalid specification"): + AuxCoords({"bad": "not_a_tuple"}) + + +@pytest.mark.cache +def test_xr_engine_aux_coords_empty(): + """Empty aux_coords should produce no extra coordinates.""" + fl = from_source("url", earthkit_remote_test_data_file("xr_engine/level/pl_small.grib")).to_fieldlist() + ds_no_aux = fl.to_xarray(aux_coords={}) + ds_none = fl.to_xarray() + + assert set(ds_no_aux.coords) == set(ds_none.coords) + + +@pytest.mark.cache +@pytest.mark.parametrize("lazy_load", [True, False]) +def test_xr_engine_aux_coords_drop_dim_as_aux(lazy_load): + """Drop a dimension and re-add it as an auxiliary coordinate.""" + fl = from_source("url", earthkit_remote_test_data_file("xr_engine/level/pl_small.grib")).to_fieldlist() + + ds = fl.to_xarray( + time_dims="valid_time", + aux_coords={"step": ("time.step", ("valid_time",))}, + lazy_load=lazy_load, + ) + + # step should be a coordinate but not a dimension + assert "step" in ds.coords + assert "step" not in ds.sizes + assert "valid_time" in ds.coords["step"].dims + assert (ds.coords["step"] == np.array([0, 6] * 4, dtype="m8[h]")).all() + + +@pytest.mark.cache +@pytest.mark.parametrize("lazy_load", [True, False]) +def test_xr_engine_aux_coords_with_mono_variable(lazy_load): + """aux_coords combined with mono_variable mode.""" + fl = from_source("url", earthkit_remote_test_data_file("xr_engine/level/pl_small.grib")).to_fieldlist() + ds = fl.to_xarray( + fixed_dims=["parameter.variable", "time.forecast_reference_time", "time.step", "vertical.level"], + mono_variable=True, + aux_coords={"metadata_paramId": ("metadata.paramId", "parameter.variable")}, + lazy_load=lazy_load, + ) + assert "metadata_paramId" in ds.coords + assert "metadata_paramId" not in ds.sizes + assert (ds["metadata_paramId"] == [157, 130]).all() + + +@pytest.mark.cache +@pytest.mark.parametrize("lazy_load", [True, False]) +def test_xr_engine_aux_coords_conflicting_values_strict(lazy_load): + """With strict=True, conflicting aux_coord values for same dim coords should raise.""" + fl = from_source("url", earthkit_remote_test_data_file("xr_engine/level/mixed_pl_ml_small.grib")).to_fieldlist() + + # levtype varies across levels, so mapping it to forecast_reference_time alone + # (which doesn't uniquely determine levtype) should conflict with strict=True + with pytest.raises(AssertionError, match="Conflicting values"): + _ = fl.to_xarray( + strict=True, + level_dim_mode="level_and_type", + aux_coords={"levtype": ("metadata.levtype", "forecast_reference_time")}, + lazy_load=lazy_load, + ) + + ds = fl.to_xarray( + strict=True, + level_dim_mode="level_and_type", + aux_coords={"levtype": ("metadata.levtype", "level_and_type")}, + lazy_load=lazy_load, + ) + assert "levtype" in ds.coords + assert "levtype" not in ds.sizes + assert (ds["levtype"] == ["ml", "pl", "pl", "ml"]).all() From 9d9f4a6e64e50ee198abbb9c2bf34a5e1d3b2aab Mon Sep 17 00:00:00 2001 From: Pawel Wolff Date: Tue, 21 Apr 2026 23:56:41 +0200 Subject: [PATCH 3/7] Tests additionally parametrised with allow_holes=False/True --- tests/xr_engine/test_xr_engine_aux_coords.py | 46 ++++++++++++++------ 1 file changed, 32 insertions(+), 14 deletions(-) diff --git a/tests/xr_engine/test_xr_engine_aux_coords.py b/tests/xr_engine/test_xr_engine_aux_coords.py index ffcaa7220..1e05d4c69 100644 --- a/tests/xr_engine/test_xr_engine_aux_coords.py +++ b/tests/xr_engine/test_xr_engine_aux_coords.py @@ -17,13 +17,15 @@ @pytest.mark.cache +@pytest.mark.parametrize("allow_holes", [False, True]) @pytest.mark.parametrize("lazy_load", [True, False]) -def test_xr_engine_aux_coords_simple(lazy_load): +def test_xr_engine_aux_coords_simple(lazy_load, allow_holes): """aux_coords with a single metadata key mapped to a single dimension.""" fl = from_source("url", earthkit_remote_test_data_file("xr_engine/level/pl_small.grib")).to_fieldlist() ds = fl.to_xarray( aux_coords={"centre": ("metadata.centre", "forecast_reference_time")}, lazy_load=lazy_load, + allow_holes=allow_holes, ) assert "centre" in ds.coords @@ -33,13 +35,15 @@ def test_xr_engine_aux_coords_simple(lazy_load): @pytest.mark.cache +@pytest.mark.parametrize("allow_holes", [False, True]) @pytest.mark.parametrize("lazy_load", [True, False]) -def test_xr_engine_aux_coords_multi_dim(lazy_load): +def test_xr_engine_aux_coords_multi_dim(lazy_load, allow_holes): """aux_coords mapped to multiple dimensions.""" fl = from_source("url", earthkit_remote_test_data_file("xr_engine/level/pl_small.grib")).to_fieldlist() ds = fl.to_xarray( aux_coords={"centre": ("metadata.centre", ("forecast_reference_time", "step"))}, lazy_load=lazy_load, + allow_holes=allow_holes, ) assert "centre" in ds.coords @@ -49,14 +53,16 @@ def test_xr_engine_aux_coords_multi_dim(lazy_load): @pytest.mark.cache +@pytest.mark.parametrize("allow_holes", [False, True]) @pytest.mark.parametrize("lazy_load", [True, False]) -def test_xr_engine_aux_coords_with_remapping(lazy_load): +def test_xr_engine_aux_coords_with_remapping(lazy_load, allow_holes): """aux_coords using a remapped key.""" ds0 = from_source("url", earthkit_remote_test_data_file("xr_engine/level/pl_small.grib")).to_fieldlist() ds = ds0.to_xarray( remapping={"centre_class": "{metadata.centre}_{metadata.class}"}, aux_coords={"centre_class": ("centre_class", ("forecast_reference_time", "step"))}, lazy_load=lazy_load, + allow_holes=allow_holes, ) assert "centre_class" in ds.coords @@ -66,8 +72,9 @@ def test_xr_engine_aux_coords_with_remapping(lazy_load): @pytest.mark.cache +@pytest.mark.parametrize("allow_holes", [False, True]) @pytest.mark.parametrize("lazy_load", [True, False]) -def test_xr_engine_aux_coords_multiple_coords(lazy_load): +def test_xr_engine_aux_coords_multiple_coords(lazy_load, allow_holes): """Multiple aux_coords specified at once.""" ds0 = from_source("url", earthkit_remote_test_data_file("xr_engine/level/pl_small.grib")).to_fieldlist() ds = ds0.to_xarray( @@ -77,6 +84,7 @@ def test_xr_engine_aux_coords_multiple_coords(lazy_load): "class_coord": ("metadata.class", "forecast_reference_time"), }, lazy_load=lazy_load, + allow_holes=allow_holes, ) assert "centre" in ds.coords @@ -90,11 +98,15 @@ def test_xr_engine_aux_coords_multiple_coords(lazy_load): @pytest.mark.cache -def test_xr_engine_aux_coords_unknown_dim(): +@pytest.mark.parametrize("allow_holes", [False, True]) +def test_xr_engine_aux_coords_unknown_dim(allow_holes): """aux_coords referencing a non-existent dimension should raise.""" fl = from_source("url", earthkit_remote_test_data_file("xr_engine/level/pl_small.grib")).to_fieldlist() with pytest.raises(AssertionError, match="unknown dimension"): - fl.to_xarray(aux_coords={"centre": ("metadata.centre", "nonexistent_dim")}) + fl.to_xarray( + aux_coords={"centre": ("metadata.centre", "nonexistent_dim")}, + allow_holes=allow_holes, + ) def test_xr_engine_aux_coords_invalid_spec(): @@ -106,18 +118,20 @@ def test_xr_engine_aux_coords_invalid_spec(): @pytest.mark.cache -def test_xr_engine_aux_coords_empty(): +@pytest.mark.parametrize("allow_holes", [False, True]) +def test_xr_engine_aux_coords_empty(allow_holes): """Empty aux_coords should produce no extra coordinates.""" fl = from_source("url", earthkit_remote_test_data_file("xr_engine/level/pl_small.grib")).to_fieldlist() - ds_no_aux = fl.to_xarray(aux_coords={}) - ds_none = fl.to_xarray() + ds_no_aux = fl.to_xarray(aux_coords={}, allow_holes=allow_holes) + ds_none = fl.to_xarray(allow_holes=allow_holes) assert set(ds_no_aux.coords) == set(ds_none.coords) @pytest.mark.cache +@pytest.mark.parametrize("allow_holes", [False, True]) @pytest.mark.parametrize("lazy_load", [True, False]) -def test_xr_engine_aux_coords_drop_dim_as_aux(lazy_load): +def test_xr_engine_aux_coords_drop_dim_as_aux(lazy_load, allow_holes): """Drop a dimension and re-add it as an auxiliary coordinate.""" fl = from_source("url", earthkit_remote_test_data_file("xr_engine/level/pl_small.grib")).to_fieldlist() @@ -125,6 +139,7 @@ def test_xr_engine_aux_coords_drop_dim_as_aux(lazy_load): time_dims="valid_time", aux_coords={"step": ("time.step", ("valid_time",))}, lazy_load=lazy_load, + allow_holes=allow_holes, ) # step should be a coordinate but not a dimension @@ -135,8 +150,9 @@ def test_xr_engine_aux_coords_drop_dim_as_aux(lazy_load): @pytest.mark.cache +@pytest.mark.parametrize("allow_holes", [False, True]) @pytest.mark.parametrize("lazy_load", [True, False]) -def test_xr_engine_aux_coords_with_mono_variable(lazy_load): +def test_xr_engine_aux_coords_with_mono_variable(lazy_load, allow_holes): """aux_coords combined with mono_variable mode.""" fl = from_source("url", earthkit_remote_test_data_file("xr_engine/level/pl_small.grib")).to_fieldlist() ds = fl.to_xarray( @@ -144,6 +160,7 @@ def test_xr_engine_aux_coords_with_mono_variable(lazy_load): mono_variable=True, aux_coords={"metadata_paramId": ("metadata.paramId", "parameter.variable")}, lazy_load=lazy_load, + allow_holes=allow_holes, ) assert "metadata_paramId" in ds.coords assert "metadata_paramId" not in ds.sizes @@ -151,19 +168,19 @@ def test_xr_engine_aux_coords_with_mono_variable(lazy_load): @pytest.mark.cache +@pytest.mark.parametrize("allow_holes", [False, True]) @pytest.mark.parametrize("lazy_load", [True, False]) -def test_xr_engine_aux_coords_conflicting_values_strict(lazy_load): +def test_xr_engine_aux_coords_conflicting_values_strict(lazy_load, allow_holes): """With strict=True, conflicting aux_coord values for same dim coords should raise.""" fl = from_source("url", earthkit_remote_test_data_file("xr_engine/level/mixed_pl_ml_small.grib")).to_fieldlist() - # levtype varies across levels, so mapping it to forecast_reference_time alone - # (which doesn't uniquely determine levtype) should conflict with strict=True with pytest.raises(AssertionError, match="Conflicting values"): _ = fl.to_xarray( strict=True, level_dim_mode="level_and_type", aux_coords={"levtype": ("metadata.levtype", "forecast_reference_time")}, lazy_load=lazy_load, + allow_holes=allow_holes, ) ds = fl.to_xarray( @@ -171,6 +188,7 @@ def test_xr_engine_aux_coords_conflicting_values_strict(lazy_load): level_dim_mode="level_and_type", aux_coords={"levtype": ("metadata.levtype", "level_and_type")}, lazy_load=lazy_load, + allow_holes=allow_holes, ) assert "levtype" in ds.coords assert "levtype" not in ds.sizes From 0409add5aba31fba8784fb93e2c5260838dbdcb4 Mon Sep 17 00:00:00 2001 From: Pawel Wolff Date: Thu, 23 Apr 2026 11:40:28 +0200 Subject: [PATCH 4/7] How-to notebook cleaned Docstrings on `aux_coord` improved Error message on conflicting values for aux coords improved --- .../xr_engine/xarray_engine_aux_coords.ipynb | 3843 +++-------------- src/earthkit/data/indexing/xarray.py | 2 +- src/earthkit/data/xr_engine/builder.py | 11 +- src/earthkit/data/xr_engine/engine.py | 2 +- 4 files changed, 569 insertions(+), 3289 deletions(-) diff --git a/docs/source/how-tos/xr_engine/xarray_engine_aux_coords.ipynb b/docs/source/how-tos/xr_engine/xarray_engine_aux_coords.ipynb index a14f9278b..8165ac2f9 100644 --- a/docs/source/how-tos/xr_engine/xarray_engine_aux_coords.ipynb +++ b/docs/source/how-tos/xr_engine/xarray_engine_aux_coords.ipynb @@ -33,23 +33,31 @@ }, { "cell_type": "markdown", - "id": "f557ebaf-0c93-4eab-8523-8e38b931c054", + "id": "f3117255-6cc1-4cf2-ba91-dc3134973b91", "metadata": {}, "source": [ - "### Basic examples" + "### Basic example" + ] + }, + { + "cell_type": "markdown", + "id": "e96e8da8-8219-4a79-92ad-515606816919", + "metadata": {}, + "source": [ + "First, we get some GRIB data containing control and perturbed forecasts." ] }, { "cell_type": "code", "execution_count": 2, - "id": "fc6598cb-5516-424d-ba01-a8ee22a120cc", + "id": "a8f1d8b7-4a3b-4186-a827-17dbb16eaa2b", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - " " + " " ] }, { @@ -81,6 +89,7 @@ " vertical.level_type\n", " ensemble.member\n", " geography.grid_type\n", + " metadata.dataType\n", " \n", " \n", " \n", @@ -90,21 +99,23 @@ " 2024-06-03 00:00:00\n", " 2024-06-03\n", " 0 days 00:00:00\n", - " 700\n", + " 500\n", " pressure\n", " 0\n", " regular_ll\n", + " cf\n", " \n", " \n", " 1\n", - " r\n", - " 2024-06-03 00:00:00\n", + " t\n", + " 2024-06-03 06:00:00\n", " 2024-06-03\n", - " 0 days 00:00:00\n", - " 700\n", + " 0 days 06:00:00\n", + " 500\n", " pressure\n", " 0\n", " regular_ll\n", + " cf\n", " \n", " \n", " 2\n", @@ -114,19 +125,21 @@ " 0 days 00:00:00\n", " 500\n", " pressure\n", - " 0\n", + " 1\n", " regular_ll\n", + " pf\n", " \n", " \n", " 3\n", - " r\n", + " t\n", " 2024-06-03 00:00:00\n", " 2024-06-03\n", " 0 days 00:00:00\n", " 500\n", " pressure\n", - " 0\n", + " 2\n", " regular_ll\n", + " pf\n", " \n", " \n", " 4\n", @@ -134,10 +147,23 @@ " 2024-06-03 06:00:00\n", " 2024-06-03\n", " 0 days 06:00:00\n", - " 700\n", + " 500\n", " pressure\n", - " 0\n", + " 1\n", + " regular_ll\n", + " pf\n", + " \n", + " \n", + " 5\n", + " t\n", + " 2024-06-03 06:00:00\n", + " 2024-06-03\n", + " 0 days 06:00:00\n", + " 500\n", + " pressure\n", + " 2\n", " regular_ll\n", + " pf\n", " \n", " \n", "\n", @@ -146,17 +172,27 @@ "text/plain": [ " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", "0 t 2024-06-03 00:00:00 2024-06-03 0 days 00:00:00 \n", - "1 r 2024-06-03 00:00:00 2024-06-03 0 days 00:00:00 \n", + "1 t 2024-06-03 06:00:00 2024-06-03 0 days 06:00:00 \n", "2 t 2024-06-03 00:00:00 2024-06-03 0 days 00:00:00 \n", - "3 r 2024-06-03 00:00:00 2024-06-03 0 days 00:00:00 \n", + "3 t 2024-06-03 00:00:00 2024-06-03 0 days 00:00:00 \n", "4 t 2024-06-03 06:00:00 2024-06-03 0 days 06:00:00 \n", - "\n", - " vertical.level vertical.level_type ensemble.member geography.grid_type \n", - "0 700 pressure 0 regular_ll \n", - "1 700 pressure 0 regular_ll \n", - "2 500 pressure 0 regular_ll \n", - "3 500 pressure 0 regular_ll \n", - "4 700 pressure 0 regular_ll " + "5 t 2024-06-03 06:00:00 2024-06-03 0 days 06:00:00 \n", + "\n", + " vertical.level vertical.level_type ensemble.member geography.grid_type \\\n", + "0 500 pressure 0 regular_ll \n", + "1 500 pressure 0 regular_ll \n", + "2 500 pressure 1 regular_ll \n", + "3 500 pressure 2 regular_ll \n", + "4 500 pressure 1 regular_ll \n", + "5 500 pressure 2 regular_ll \n", + "\n", + " metadata.dataType \n", + "0 cf \n", + "1 cf \n", + "2 pf \n", + "3 pf \n", + "4 pf \n", + "5 pf " ] }, "execution_count": 2, @@ -165,8 +201,16 @@ } ], "source": [ - "ds_fl = ekd.from_source(\"sample\", \"pl.grib\").to_fieldlist()\n", - "ds_fl.ls().head()" + "ds_fl = ekd.from_source(\"sample\", \"ens_cf_pf.grib\").to_fieldlist()\n", + "ds_fl.ls(extra_keys=[\"metadata.dataType\"])" + ] + }, + { + "cell_type": "markdown", + "id": "db15e80f-4beb-441d-b334-9fc1a300d1af", + "metadata": {}, + "source": [ + "Using the Xarray engine keyword `aux_coords` one can declare an auxiliary coordinate `\"forecast_type\"` whose values are derived from the GRIB metadata key `\"dataType\"`and depend on a single dimension `\"member\"`." ] }, { @@ -720,122 +764,74 @@ " filter: drop-shadow(1px 1px 5px var(--xr-font-color2));\n", " stroke-width: 0.8px;\n", "}\n", - "
<xarray.Dataset> Size: 176kB\n",
-       "Dimensions:                  (forecast_reference_time: 4, step: 2, level: 2,\n",
-       "                              latitude: 19, longitude: 36)\n",
+       "
<xarray.Dataset> Size: 33kB\n",
+       "Dimensions:        (member: 3, step: 2, latitude: 19, longitude: 36)\n",
        "Coordinates:\n",
-       "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 32B 202...\n",
-       "    expver                   (forecast_reference_time) <U4 64B '0001' ... '0001'\n",
-       "  * step                     (step) timedelta64[ns] 16B 00:00:00 06:00:00\n",
-       "  * level                    (level) int64 16B 500 700\n",
-       "  * latitude                 (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n",
-       "  * longitude                (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n",
+       "  * member         (member) <U1 12B '0' '1' '2'\n",
+       "    forecast_type  (member) <U2 24B 'cf' 'pf' 'pf'\n",
+       "  * step           (step) timedelta64[ns] 16B 00:00:00 06:00:00\n",
+       "  * latitude       (latitude) float64 152B 90.0 80.0 70.0 ... -70.0 -80.0 -90.0\n",
+       "  * longitude      (longitude) float64 288B 0.0 10.0 20.0 ... 330.0 340.0 350.0\n",
        "Data variables:\n",
-       "    r                        (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n",
-       "    t                        (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n",
+       "    t              (member, step, latitude, longitude) float64 33kB 250.2 ......\n",
        "Attributes:\n",
        "    Conventions:  CF-1.8\n",
-       "    institution:  ECMWF
  • Conventions :
    CF-1.8
    institution :
    ECMWF
  • " ], "text/plain": [ - " Size: 176kB\n", - "Dimensions: (forecast_reference_time: 4, step: 2, level: 2,\n", - " latitude: 19, longitude: 36)\n", + " Size: 33kB\n", + "Dimensions: (member: 3, step: 2, latitude: 19, longitude: 36)\n", "Coordinates:\n", - " * forecast_reference_time (forecast_reference_time) datetime64[ns] 32B 202...\n", - " expver (forecast_reference_time) \n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
    <xarray.Dataset> Size: 176kB\n",
    -       "Dimensions:                  (forecast_reference_time: 4, step: 2, level: 2,\n",
    -       "                              latitude: 19, longitude: 36)\n",
    -       "Coordinates:\n",
    -       "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 32B 202...\n",
    -       "  * step                     (step) timedelta64[ns] 16B 00:00:00 06:00:00\n",
    -       "    centre_and_expver        (forecast_reference_time, step) <U9 288B 'ecmf_0...\n",
    -       "  * level                    (level) int64 16B 500 700\n",
    -       "  * latitude                 (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n",
    -       "  * longitude                (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n",
    -       "Data variables:\n",
    -       "    r                        (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n",
    -       "    t                        (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n",
    -       "Attributes:\n",
    -       "    Conventions:  CF-1.8\n",
    -       "    institution:  ECMWF
    " - ], - "text/plain": [ - " Size: 176kB\n", - "Dimensions: (forecast_reference_time: 4, step: 2, level: 2,\n", - " latitude: 19, longitude: 36)\n", - "Coordinates:\n", - " * forecast_reference_time (forecast_reference_time) datetime64[ns] 32B 202...\n", - " * step (step) timedelta64[ns] 16B 00:00:00 06:00:00\n", - " centre_and_expver (forecast_reference_time, step) \n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
    <xarray.Dataset> Size: 176kB\n",
    -       "Dimensions:                  (variable: 2, forecast_reference_time: 4, step: 2,\n",
    -       "                              level: 2, latitude: 19, longitude: 36)\n",
    -       "Coordinates:\n",
    -       "  * variable                 (variable) <U1 8B 'r' 't'\n",
    -       "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 32B 202...\n",
    -       "  * step                     (step) timedelta64[ns] 16B 00:00:00 06:00:00\n",
    -       "    centre_and_expver        (forecast_reference_time, step) <U9 288B 'ecmf_0...\n",
    -       "  * level                    (level) int64 16B 500 700\n",
    -       "  * latitude                 (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n",
    -       "  * longitude                (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n",
    -       "Data variables:\n",
    -       "    data                     (variable, forecast_reference_time, step, level, latitude, longitude) float64 175kB ...\n",
    -       "Attributes:\n",
    -       "    Conventions:  CF-1.8\n",
    -       "    institution:  ECMWF
    " - ], - "text/plain": [ - " Size: 176kB\n", - "Dimensions: (variable: 2, forecast_reference_time: 4, step: 2,\n", - " level: 2, latitude: 19, longitude: 36)\n", - "Coordinates:\n", - " * variable (variable) \n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
    metadata.shortNamemetadata.dataDatemetadata.dataTimemetadata.stepRangemetadata.dataTypemetadata.quantilemetadata.numbermetadata.numberOfForecastsInEnsemble
    02tp2025120900-168pd1:313
    12tp2025120900-168pd1:515
    22tp2025120900-168pd1:10110
    32tp2025120900-168pd2:323
    42tp2025120900-168pd2:525
    52tp2025120900-168pd2:10210
    62tp2025120900-168pd3:333
    72tp2025120900-168pd3:535
    82tp2025120900-168pd3:10310
    92tp2025120900-168pd4:545
    102tp2025120900-168pd4:10410
    112tp2025120900-168pd5:555
    122tp2025120900-168pd5:10510
    132tp2025120900-168pd6:10610
    142tp2025120900-168pd7:10710
    152tp2025120900-168pd8:10810
    162tp2025120900-168pd9:10910
    172tp2025120900-168pd10:101010
    \n", - "" - ], - "text/plain": [ - " metadata.shortName metadata.dataDate metadata.dataTime \\\n", - "0 2tp 20251209 0 \n", - "1 2tp 20251209 0 \n", - "2 2tp 20251209 0 \n", - "3 2tp 20251209 0 \n", - "4 2tp 20251209 0 \n", - "5 2tp 20251209 0 \n", - "6 2tp 20251209 0 \n", - "7 2tp 20251209 0 \n", - "8 2tp 20251209 0 \n", - "9 2tp 20251209 0 \n", - "10 2tp 20251209 0 \n", - "11 2tp 20251209 0 \n", - "12 2tp 20251209 0 \n", - "13 2tp 20251209 0 \n", - "14 2tp 20251209 0 \n", - "15 2tp 20251209 0 \n", - "16 2tp 20251209 0 \n", - "17 2tp 20251209 0 \n", - "\n", - " metadata.stepRange metadata.dataType metadata.quantile metadata.number \\\n", - "0 0-168 pd 1:3 1 \n", - "1 0-168 pd 1:5 1 \n", - "2 0-168 pd 1:10 1 \n", - "3 0-168 pd 2:3 2 \n", - "4 0-168 pd 2:5 2 \n", - "5 0-168 pd 2:10 2 \n", - "6 0-168 pd 3:3 3 \n", - "7 0-168 pd 3:5 3 \n", - "8 0-168 pd 3:10 3 \n", - "9 0-168 pd 4:5 4 \n", - "10 0-168 pd 4:10 4 \n", - "11 0-168 pd 5:5 5 \n", - "12 0-168 pd 5:10 5 \n", - "13 0-168 pd 6:10 6 \n", - "14 0-168 pd 7:10 7 \n", - "15 0-168 pd 8:10 8 \n", - "16 0-168 pd 9:10 9 \n", - "17 0-168 pd 10:10 10 \n", - "\n", - " metadata.numberOfForecastsInEnsemble \n", - "0 3 \n", - "1 5 \n", - "2 10 \n", - "3 3 \n", - "4 5 \n", - "5 10 \n", - "6 3 \n", - "7 5 \n", - "8 10 \n", - "9 5 \n", - "10 10 \n", - "11 5 \n", - "12 10 \n", - "13 10 \n", - "14 10 \n", - "15 10 \n", - "16 10 \n", - "17 10 " - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ds_fl2.ls(\n", - " keys=[\n", - " \"metadata.shortName\",\n", - " \"metadata.dataDate\",\n", - " \"metadata.dataTime\",\n", - " \"metadata.stepRange\",\n", - " \"metadata.dataType\",\n", - " \"metadata.quantile\",\n", - " \"metadata.number\",\n", - " \"metadata.numberOfForecastsInEnsemble\",\n", - " ]\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "3ea48ddf-3fc0-455b-b381-e3c8b2a3debe", - "metadata": {}, - "source": [ - "Note that, in this context, the usual meaning of the metadata key ``\"number\"`` (and the related ``\"numberOfForecastsInEnsemble\"``) is overridden by ``\"quantile\"``. As a result, the ensemble dimension normally derived from ``\"number\"`` is no longer applicable.\n", - "\n", - "For this reason, we must:\n", - "- declare ``\"quantile\"`` as an extra dimension, and\n", - "- remove the predefined ensemble dimension ``\"number\"``, since it would otherwise conflict with the ``\"quantile\"`` dimension.\n", - "\n", - "Still, it might be useful to keep the information carried by ``\"number\"`` and ``\"numberOfForecastsInEnsemble\"`` is auxiliary coordinates." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "cd65d5ce-b511-4c12-88f7-f64f5b0c18e7", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
    \n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
    <xarray.Dataset> Size: 13kB\n",
    -       "Dimensions:                  (quantile: 18, forecast_reference_time: 1,\n",
    -       "                              step: 1, level: 1, level_type: 1, latitude: 7,\n",
    -       "                              longitude: 12)\n",
    -       "Coordinates:\n",
    -       "  * quantile                 (quantile) <U5 360B '10:10' '1:10' ... '9:10'\n",
    -       "    quantile_rank            (quantile) <U2 144B '10' '1' '1' ... '7' '8' '9'\n",
    -       "    nquantiles               (quantile) int64 144B 10 10 3 5 10 ... 10 10 10 10\n",
    -       "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 8B 2025...\n",
    -       "  * step                     (step) timedelta64[ns] 8B 7 days\n",
    -       "  * level                    (level) int64 8B 0\n",
    -       "  * level_type               (level_type) <U7 28B 'surface'\n",
    -       "  * latitude                 (latitude) float64 56B 90.0 60.0 ... -60.0 -90.0\n",
    -       "  * longitude                (longitude) float64 96B 0.0 30.0 ... 300.0 330.0\n",
    -       "Data variables:\n",
    -       "    2tp                      (quantile, forecast_reference_time, step, level, level_type, latitude, longitude) float64 12kB ...\n",
    -       "Attributes:\n",
    -       "    Conventions:  CF-1.8\n",
    -       "    institution:  ECMWF
    " - ], - "text/plain": [ - " Size: 13kB\n", - "Dimensions: (quantile: 18, forecast_reference_time: 1,\n", - " step: 1, level: 1, level_type: 1, latitude: 7,\n", - " longitude: 12)\n", - "Coordinates:\n", - " * quantile (quantile) \n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
    parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
    0avg_2t2010-07-012010-06-0130 days2height_above_ground_level0regular_ll
    1avg_2t2010-08-012010-07-0131 days2height_above_ground_level0regular_ll
    \n", - "" - ], - "text/plain": [ - " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", - "0 avg_2t 2010-07-01 2010-06-01 30 days \n", - "1 avg_2t 2010-08-01 2010-07-01 31 days \n", - "\n", - " vertical.level vertical.level_type ensemble.member \\\n", - "0 2 height_above_ground_level 0 \n", - "1 2 height_above_ground_level 0 \n", - "\n", - " geography.grid_type \n", - "0 regular_ll \n", - "1 regular_ll " - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ds_fl3 = ekd.from_source(\"file\", \"/Users/ecm8620/data/issue-948-avg_2t-2months.grib2\").to_fieldlist()\n", - "ds_fl3.ls()" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "fdcac0e0-f501-4cd5-bf2b-59c9d760d8e7", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
    \n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
    <xarray.Dataset> Size: 22kB\n",
    -       "Dimensions:                  (forecast_reference_time: 2, latitude: 33,\n",
    -       "                              longitude: 41)\n",
    -       "Coordinates:\n",
    -       "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 16B 201...\n",
    -       "    step                     (forecast_reference_time) timedelta64[ns] 16B 30...\n",
    -       "  * latitude                 (latitude) float64 264B 55.0 54.75 ... 47.25 47.0\n",
    -       "  * longitude                (longitude) float64 328B 5.0 5.25 ... 14.75 15.0\n",
    -       "Data variables:\n",
    -       "    avg_2t                   (forecast_reference_time, latitude, longitude) float64 22kB ...\n",
    -       "Attributes:\n",
    -       "    Conventions:  CF-1.8\n",
    -       "    institution:  ECMWF
    " + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    metadata.shortNamemetadata.dataDatemetadata.dataTimemetadata.stepRangemetadata.dataTypemetadata.quantilemetadata.numbermetadata.numberOfForecastsInEnsemble
    02tp2025120900-168pd1:313
    12tp2025120900-168pd1:515
    22tp2025120900-168pd1:10110
    32tp2025120900-168pd2:323
    42tp2025120900-168pd2:525
    52tp2025120900-168pd2:10210
    62tp2025120900-168pd3:333
    72tp2025120900-168pd3:535
    82tp2025120900-168pd3:10310
    92tp2025120900-168pd4:545
    102tp2025120900-168pd4:10410
    112tp2025120900-168pd5:555
    122tp2025120900-168pd5:10510
    132tp2025120900-168pd6:10610
    142tp2025120900-168pd7:10710
    152tp2025120900-168pd8:10810
    162tp2025120900-168pd9:10910
    172tp2025120900-168pd10:101010
    \n", + "" ], "text/plain": [ - " Size: 22kB\n", - "Dimensions: (forecast_reference_time: 2, latitude: 33,\n", - " longitude: 41)\n", - "Coordinates:\n", - " * forecast_reference_time (forecast_reference_time) datetime64[ns] 16B 201...\n", - " step (forecast_reference_time) timedelta64[ns] 16B 30...\n", - " * latitude (latitude) float64 264B 55.0 54.75 ... 47.25 47.0\n", - " * longitude (longitude) float64 328B 5.0 5.25 ... 14.75 15.0\n", - "Data variables:\n", - " avg_2t (forecast_reference_time, latitude, longitude) float64 22kB ...\n", - "Attributes:\n", - " Conventions: CF-1.8\n", - " institution: ECMWF" + " metadata.shortName metadata.dataDate metadata.dataTime \\\n", + "0 2tp 20251209 0 \n", + "1 2tp 20251209 0 \n", + "2 2tp 20251209 0 \n", + "3 2tp 20251209 0 \n", + "4 2tp 20251209 0 \n", + "5 2tp 20251209 0 \n", + "6 2tp 20251209 0 \n", + "7 2tp 20251209 0 \n", + "8 2tp 20251209 0 \n", + "9 2tp 20251209 0 \n", + "10 2tp 20251209 0 \n", + "11 2tp 20251209 0 \n", + "12 2tp 20251209 0 \n", + "13 2tp 20251209 0 \n", + "14 2tp 20251209 0 \n", + "15 2tp 20251209 0 \n", + "16 2tp 20251209 0 \n", + "17 2tp 20251209 0 \n", + "\n", + " metadata.stepRange metadata.dataType metadata.quantile metadata.number \\\n", + "0 0-168 pd 1:3 1 \n", + "1 0-168 pd 1:5 1 \n", + "2 0-168 pd 1:10 1 \n", + "3 0-168 pd 2:3 2 \n", + "4 0-168 pd 2:5 2 \n", + "5 0-168 pd 2:10 2 \n", + "6 0-168 pd 3:3 3 \n", + "7 0-168 pd 3:5 3 \n", + "8 0-168 pd 3:10 3 \n", + "9 0-168 pd 4:5 4 \n", + "10 0-168 pd 4:10 4 \n", + "11 0-168 pd 5:5 5 \n", + "12 0-168 pd 5:10 5 \n", + "13 0-168 pd 6:10 6 \n", + "14 0-168 pd 7:10 7 \n", + "15 0-168 pd 8:10 8 \n", + "16 0-168 pd 9:10 9 \n", + "17 0-168 pd 10:10 10 \n", + "\n", + " metadata.numberOfForecastsInEnsemble \n", + "0 3 \n", + "1 5 \n", + "2 10 \n", + "3 3 \n", + "4 5 \n", + "5 10 \n", + "6 3 \n", + "7 5 \n", + "8 10 \n", + "9 5 \n", + "10 10 \n", + "11 5 \n", + "12 10 \n", + "13 10 \n", + "14 10 \n", + "15 10 \n", + "16 10 \n", + "17 10 " ] }, - "execution_count": 10, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "ds5 = ds_fl3.to_xarray(\n", - " drop_dims=\"step\",\n", - " aux_coords={\"step\": (\"time.step\", (\"forecast_reference_time\",))},\n", - ")\n", - "ds5.load()" + "ds_fl2.ls(\n", + " keys=[\n", + " \"metadata.shortName\",\n", + " \"metadata.dataDate\",\n", + " \"metadata.dataTime\",\n", + " \"metadata.stepRange\",\n", + " \"metadata.dataType\",\n", + " \"metadata.quantile\",\n", + " \"metadata.number\",\n", + " \"metadata.numberOfForecastsInEnsemble\",\n", + " ]\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "3ea48ddf-3fc0-455b-b381-e3c8b2a3debe", + "metadata": {}, + "source": [ + "Note that, in this context, the usual meaning of the GRIB metadata key ``\"number\"`` (and the related ``\"numberOfForecastsInEnsemble\"``) is overridden by ``\"quantile\"``. As a result, the ensemble dimension normally derived from ``\"number\"`` is no longer applicable.\n", + "\n", + "For this reason, we must:\n", + "- declare the GRIB metadata key ``\"quantile\"`` as an extra dimension, and\n", + "- remove the predefined ensemble dimension ``\"number\"``, since it would otherwise conflict with the ``\"quantile\"`` dimension.\n", + "\n", + "Still, it might be useful to keep the information carried by ``\"number\"`` and ``\"numberOfForecastsInEnsemble\"`` as auxiliary coordinates." ] }, { "cell_type": "code", - "execution_count": 30, - "id": "aebe01e5-0a1f-4871-82d8-9baa2fb4272d", + "execution_count": 6, + "id": "cd65d5ce-b511-4c12-88f7-f64f5b0c18e7", "metadata": {}, "outputs": [ { @@ -4512,88 +1778,101 @@ " filter: drop-shadow(1px 1px 5px var(--xr-font-color2));\n", " stroke-width: 0.8px;\n", "}\n", - "
    <xarray.Dataset> Size: 22kB\n",
    -       "Dimensions:     (s: 1, valid_time: 2, latitude: 33, longitude: 41)\n",
    +       "
    <xarray.Dataset> Size: 13kB\n",
    +       "Dimensions:        (quantile: 18, latitude: 7, longitude: 12)\n",
            "Coordinates:\n",
    -       "  * s           (s) <U4 16B '0001'\n",
    -       "  * valid_time  (valid_time) datetime64[ns] 16B 2010-07-01 2010-08-01\n",
    -       "  * latitude    (latitude) float64 264B 55.0 54.75 54.5 ... 47.5 47.25 47.0\n",
    -       "  * longitude   (longitude) float64 328B 5.0 5.25 5.5 5.75 ... 14.5 14.75 15.0\n",
    +       "  * quantile       (quantile) <U5 360B '10:10' '1:10' '1:3' ... '8:10' '9:10'\n",
    +       "    quantile_rank  (quantile) <U2 144B '10' '1' '1' '1' '2' ... '6' '7' '8' '9'\n",
    +       "    nquantiles     (quantile) int64 144B 10 10 3 5 10 3 5 ... 5 10 5 10 10 10 10\n",
    +       "  * latitude       (latitude) float64 56B 90.0 60.0 30.0 0.0 -30.0 -60.0 -90.0\n",
    +       "  * longitude      (longitude) float64 96B 0.0 30.0 60.0 ... 270.0 300.0 330.0\n",
            "Data variables:\n",
    -       "    avg_2t      (s, valid_time, latitude, longitude) float64 22kB 284.2 ... 2...\n",
    +       "    2tp            (quantile, latitude, longitude) float64 12kB 13.37 ... 0.0\n",
            "Attributes:\n",
            "    Conventions:  CF-1.8\n",
    -       "    institution:  ECMWF
    " + " institution: ECMWF
    " ], "text/plain": [ - " Size: 22kB\n", - "Dimensions: (s: 1, valid_time: 2, latitude: 33, longitude: 41)\n", + " Size: 13kB\n", + "Dimensions: (quantile: 18, latitude: 7, longitude: 12)\n", "Coordinates:\n", - " * s (s) Date: Thu, 23 Apr 2026 12:12:55 +0200 Subject: [PATCH 5/7] Docstrings on `aux_coord` improved --- src/earthkit/data/indexing/xarray.py | 2 +- src/earthkit/data/xr_engine/engine.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/earthkit/data/indexing/xarray.py b/src/earthkit/data/indexing/xarray.py index 85d0c07a8..32b46ea15 100644 --- a/src/earthkit/data/indexing/xarray.py +++ b/src/earthkit/data/indexing/xarray.py @@ -255,7 +255,7 @@ def to_xarray(self, engine="earthkit", xarray_open_dataset_kwargs=None, **kwargs overwrites it. * aux_coords: dict, None Mapping from an auxiliary coordinate label to a tuple: - (metadata key, the dataset dimension(s)). The default value is None. + ``(metadata_key: str, dataset_dimension(s): str or iterable of str)``. The default value is None. * add_geo_coords: bool, None Add geographic coordinates to the dataset when field values are represented by a single "values" dimension. Its default value (None) expands diff --git a/src/earthkit/data/xr_engine/engine.py b/src/earthkit/data/xr_engine/engine.py index b95f7f613..97d8e3974 100644 --- a/src/earthkit/data/xr_engine/engine.py +++ b/src/earthkit/data/xr_engine/engine.py @@ -259,7 +259,7 @@ def open_dataset( overwrites it. aux_coords: dict, None Mapping from an auxiliary coordinate label to a tuple: - (metadata key, the dataset dimension(s)). The default value is None. + ``(metadata_key: str, dataset_dimension(s): str or iterable of str)``. The default value is None. add_geo_coords: bool, None If True, add geographic coordinates to the dataset when field values are represented by a single "values" dimension. Its default value (None) expands From dbe4f2950302668c115bf24a27c5969a28e1724b Mon Sep 17 00:00:00 2001 From: Pawel Wolff Date: Wed, 22 Apr 2026 23:28:06 +0200 Subject: [PATCH 6/7] `add_valid_time_coord=True` implemented via `aux_coords` --- src/earthkit/data/indexing/tensor.py | 94 -------------------------- src/earthkit/data/xr_engine/builder.py | 27 +++----- src/earthkit/data/xr_engine/coord.py | 4 +- src/earthkit/data/xr_engine/dim.py | 27 ++++++++ 4 files changed, 40 insertions(+), 112 deletions(-) diff --git a/src/earthkit/data/indexing/tensor.py b/src/earthkit/data/indexing/tensor.py index 18fd2355b..91b412d3f 100644 --- a/src/earthkit/data/indexing/tensor.py +++ b/src/earthkit/data/indexing/tensor.py @@ -505,100 +505,6 @@ def _subset(self, indexes): ds = self.source[tuple(dataset_indexes)] return self.from_tensor(self, ds, coords) - def make_valid_datetime(self, dims_map, dtype="datetime64[ns]"): - # TODO: make it more general - # PW: TODO: make it more general - it could allow to use it when allow_holes=True - - for k in ["valid_time", "time.valid_datetime", "metadata.valid_time", "metadata.valid_datetime"]: - if k in self.user_coords: - import datetime - - return (k,), [datetime.datetime.fromisoformat(x) for x in self.user_coords[k]] - - # in the tensor the dims.coords are GRIB keys - # dims_map is a mapping from dim names to GRIB keys - DIM_ROLES = { - "forecast_reference_time": ( - "forecast_reference_time", - "time.forecast_reference_time", - "time.base_datetime", - "metadata.base_datetime", - "metadata.indexing_datetime", - "metadata.indexing_time", - ), - "step": ( - "step", - "time.step", - "metadata.step_timedelta", - "metadata.step", - "metadata.endStep", - "metadata.stepRange", - ), - "date": ("date", "metadata.dataDate"), - "time": ("time", "metadata.dataTime"), - } - - # map dim roles to keys available in the tensor - keys = {} - for k in DIM_ROLES: - for d in dims_map: - if d.name == k: - keys[k] = d.key - break - if k not in keys: - for d in self.user_dims: - if d in DIM_ROLES[k]: - keys[k] = d - break - - DIM_COMBINATIONS = [ - ["forecast_reference_time", "step"], - ["forecast_reference_time"], - ["date", "time", "step"], - ["date", "time"], - ["date", "step"], - ["time", "step"], - ["step"], - ] - - for dims in DIM_COMBINATIONS: - if all(d in keys for d in dims): - dims_step = [keys[d] for d in dims] - # use same dim order as in user_dims - dims = [d for d in self.user_dims if d in dims_step] - if len(dims) != len(dims_step): - continue - assert len(dims) == len(dims_step), f"{dims=} {dims_step=}" - other_dims = [d for d in self.user_dims if d not in dims] - - if other_dims: - import datetime - - import numpy as np - - other_coords = {k: next(iter(self.user_coords[k])) for k in other_dims if k in self.user_coords} - - vals = np.array( - [x for x in self.source.sel(**other_coords).get("time.valid_datetime")], - dtype=dtype, - ) - - shape = tuple([self.user_dims[d] for d in dims]) - return tuple(dims), vals.reshape(shape) - else: - import datetime - - import numpy as np - - vals = np.array( - [x for x in self.source.get("time.valid_datetime")], - dtype=dtype, - ) - - shape = tuple([self.user_dims[d] for d in dims]) - return tuple(dims), vals.reshape(shape) - return None, None - def __getstate__(self): r = {} r["source"] = self.source diff --git a/src/earthkit/data/xr_engine/builder.py b/src/earthkit/data/xr_engine/builder.py index 09e018faf..5fcb90926 100644 --- a/src/earthkit/data/xr_engine/builder.py +++ b/src/earthkit/data/xr_engine/builder.py @@ -337,19 +337,6 @@ def _make_field_coords(self): r[k] = xarray.Variable(dims, v, self.profile.attrs.coord_attrs.get(k, {})) return r - def collect_date_coords(self, tensor): - if ( - self.profile.add_valid_time_coord - and "valid_time" not in tensor.user_dims - and "valid_datetime" not in tensor.user_coords - and "valid_time" not in self.tensor_coords - ): - from .coord import Coord - - _dims, _vals = tensor.make_valid_datetime(self.dims) - if _dims is not None and _vals is not None: - self.tensor_coords["valid_time"] = Coord.make("valid_time", _vals, dims=_dims) - def collect_aux_coords(self): from .coord import Coord @@ -408,9 +395,6 @@ def collect_aux_coords(self): def build(self): if self.profile.allow_holes: - if self.profile.add_valid_time_coord: - raise NotImplementedError("add_valid_time_coord=True not yet supported when allow_holes=True") - global_tensor_dims, self.raw_global_tensor_coords, _ = self.prepare_tensor( self.ds, self.dims, "" ) @@ -433,6 +417,16 @@ def build(self): # From now on, self.tensor_coords is a mapping: # dimension_name->a Coord object + possibly the same for "valid_time" + # Inject valid_time as an auxiliary coordinate when requested and when valid_time is not a dimension + if ( + self.profile.add_valid_time_coord + and "valid_time" not in [d.name for d in self.dims] + and "time.valid_datetime" not in self.tensor_coords + ): + time_dim_names = self.profile.dims.active_time_dim_names + if time_dim_names: + self.profile.aux_coords.setdefault("valid_time", ("time.valid_datetime", time_dim_names)) + self.collect_aux_coords() # build variable and global attributes @@ -518,7 +512,6 @@ def pre_build_variable(self, ds_var, dims, name): var_dims.append(k) var_dims.extend(tensor.field_dims) - self.collect_date_coords(tensor) data_maker = self.build_values remapping = self.profile.remapping.build() diff --git a/src/earthkit/data/xr_engine/coord.py b/src/earthkit/data/xr_engine/coord.py index 999fbf565..dfda7463f 100644 --- a/src/earthkit/data/xr_engine/coord.py +++ b/src/earthkit/data/xr_engine/coord.py @@ -83,7 +83,9 @@ def _to_datetime_list(vals): # datetime64 arrays are already in the required format if isinstance(vals, np.ndarray): if not np.issubdtype(vals.dtype, np.datetime64): - return to_datetime_list(vals.tolist()) + original_shape = vals.shape + flat = to_datetime_list(vals.flatten().tolist()) + return np.array(flat, dtype="datetime64[ns]").reshape(original_shape) else: return to_datetime_list(vals) diff --git a/src/earthkit/data/xr_engine/dim.py b/src/earthkit/data/xr_engine/dim.py index 714c839f7..c29c3e000 100644 --- a/src/earthkit/data/xr_engine/dim.py +++ b/src/earthkit/data/xr_engine/dim.py @@ -98,6 +98,8 @@ def _get_metadata_keys(keys): DATETIME_KEYS = BASE_DATETIME_KEYS + VALID_DATETIME_KEYS +_TIME_RELATED_KEYS = set(DATE_KEYS + TIME_KEYS + STEP_KEYS + MONTH_KEYS + VALID_DATETIME_KEYS + BASE_DATETIME_KEYS) + KEYS = ( ENS_KEYS, LEVEL_KEYS, @@ -1015,6 +1017,31 @@ def make_coords(self): def to_list(self): return list(self.dims.values()) + @property + def active_time_dim_names(self): + """Return the names of the active time dimensions in dim order. + + Handles both the normal case (dims built via ``TimeDimBuilder``) and + the ``fixed_dims`` case where dim names are raw metadata keys. + """ + time_dim_names = set() + if not self.fixed_dims: + for role_name in self.time_dims: + if role_name in ALL_TIME_ROLES: + # Add the role-resolved name and key + _, name = self.dim_roles.role(role_name, raise_error=False) + if name is not None: + time_dim_names.add(name) + else: + # When fixed_dims are used, `self.time_dims` is irrelevant, and we check all `self.fixed_dims` + # for time-related keys. + for dim_name, dim_key in self.fixed_dims.items(): + if dim_key in _TIME_RELATED_KEYS: + time_dim_names.add(dim_name) + + # Return in dim order + return [d.name for d in self.dims.values() if d.active and d.name in time_dim_names] + def get_dims(self, names): r = [] for name in names: From 2601996fe7cb641ad3364feb844fad268a7180ef Mon Sep 17 00:00:00 2001 From: Pawel Wolff Date: Thu, 23 Apr 2026 00:02:32 +0200 Subject: [PATCH 7/7] Tests added --- .../test_xr_engine_add_valid_time_coord.py | 261 ++++++++++++++++++ 1 file changed, 261 insertions(+) create mode 100644 tests/xr_engine/test_xr_engine_add_valid_time_coord.py diff --git a/tests/xr_engine/test_xr_engine_add_valid_time_coord.py b/tests/xr_engine/test_xr_engine_add_valid_time_coord.py new file mode 100644 index 000000000..5438e12c0 --- /dev/null +++ b/tests/xr_engine/test_xr_engine_add_valid_time_coord.py @@ -0,0 +1,261 @@ +#!/usr/bin/env python3 + +# (C) Copyright 2020 ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. +# + +"""Tests for add_valid_time_coord=True using the aux_coords-based implementation.""" + +import numpy as np +import pytest + +from earthkit.data import from_source + +# Expected valid_time values for pl.grib with 4 forecast_reference_times x 2 steps +VALID_TIME_FRT_STEP = np.array( + [ + ["2024-06-03T00:00:00", "2024-06-03T06:00:00"], + ["2024-06-03T12:00:00", "2024-06-03T18:00:00"], + ["2024-06-04T00:00:00", "2024-06-04T06:00:00"], + ["2024-06-04T12:00:00", "2024-06-04T18:00:00"], + ], + dtype="datetime64[ns]", +) + +# Expected valid_time values for date x time x step (2x2x2) +VALID_TIME_DATE_TIME_STEP = np.array( + [ + [ + ["2024-06-03T00:00:00", "2024-06-03T06:00:00"], + ["2024-06-03T12:00:00", "2024-06-03T18:00:00"], + ], + [ + ["2024-06-04T00:00:00", "2024-06-04T06:00:00"], + ["2024-06-04T12:00:00", "2024-06-04T18:00:00"], + ], + ], + dtype="datetime64[ns]", +) + + +@pytest.fixture(scope="session") +def pl_fl(): + return from_source("sample", "pl.grib").to_fieldlist() + + +# ------------------------------------------------------------------------- +# dim_name_from_role_name=True vs False +# ------------------------------------------------------------------------- + + +@pytest.mark.parametrize("allow_holes", [False, True]) +@pytest.mark.parametrize("lazy_load", [True, False]) +@pytest.mark.parametrize("dim_name_from_role_name", [True, False]) +def test_dim_name_from_role_name(pl_fl, lazy_load, allow_holes, dim_name_from_role_name): + """valid_time aux coord should work regardless of dim_name_from_role_name.""" + ds = pl_fl.to_xarray( + profile="earthkit", + add_valid_time_coord=True, + dim_name_from_role_name=dim_name_from_role_name, + lazy_load=lazy_load, + allow_holes=allow_holes, + ) + + assert "valid_time" in ds.coords + assert "valid_time" not in ds.sizes + assert ds.coords["valid_time"].dims == ("forecast_reference_time", "step") + assert ds.coords["valid_time"].shape == (4, 2) + np.testing.assert_array_equal(ds.coords["valid_time"].values, VALID_TIME_FRT_STEP) + + +# ------------------------------------------------------------------------- +# Different time_dims variants +# ------------------------------------------------------------------------- + + +@pytest.mark.parametrize("allow_holes", [False, True]) +@pytest.mark.parametrize("lazy_load", [True, False]) +def test_time_dims_date_time_step(pl_fl, lazy_load, allow_holes): + """time_dims=['date', 'time', 'step'] produces 3D valid_time.""" + ds = pl_fl.to_xarray( + profile="earthkit", + time_dims=["date", "time", "step"], + add_valid_time_coord=True, + lazy_load=lazy_load, + allow_holes=allow_holes, + ) + + assert "valid_time" in ds.coords + assert "valid_time" not in ds.sizes + assert ds.coords["valid_time"].dims == ("date", "time", "step") + assert ds.coords["valid_time"].shape == (2, 2, 2) + np.testing.assert_array_equal(ds.coords["valid_time"].values, VALID_TIME_DATE_TIME_STEP) + + +@pytest.mark.parametrize("allow_holes", [False, True]) +@pytest.mark.parametrize("lazy_load", [True, False]) +def test_time_dims_valid_time_no_aux(pl_fl, lazy_load, allow_holes): + """When time_dims='valid_time', valid_time is a dimension, not an aux coord.""" + ds = pl_fl.to_xarray( + profile="earthkit", + time_dims="valid_time", + add_valid_time_coord=True, + lazy_load=lazy_load, + allow_holes=allow_holes, + ) + + # valid_time should be a dimension, not an auxiliary coordinate + assert "valid_time" in ds.sizes + assert ds.sizes["valid_time"] == 8 + + +@pytest.mark.parametrize("allow_holes", [False, True]) +@pytest.mark.parametrize("lazy_load", [True, False]) +def test_time_dims_frt_only(pl_fl, lazy_load, allow_holes): + """time_dims=['forecast_reference_time'] with step squeezed out => 1D valid_time.""" + # Select single step to avoid step dimension + fl_single_step = pl_fl.sel({"metadata.step": 0}) + ds = fl_single_step.to_xarray( + profile="earthkit", + time_dims=["forecast_reference_time"], + add_valid_time_coord=True, + lazy_load=lazy_load, + allow_holes=allow_holes, + ) + + assert "valid_time" in ds.coords + assert "valid_time" not in ds.sizes + assert ds.coords["valid_time"].dims == ("forecast_reference_time",) + assert ds.coords["valid_time"].shape == (4,) + + +# ------------------------------------------------------------------------- +# Custom dim_roles (GRIB metadata keys) +# ------------------------------------------------------------------------- + + +@pytest.mark.parametrize("dim_name_from_role_name", [True, False]) +@pytest.mark.parametrize("allow_holes", [False, True]) +@pytest.mark.parametrize("lazy_load", [True, False]) +def test_custom_dim_roles(pl_fl, lazy_load, allow_holes, dim_name_from_role_name): + """Custom dim_roles mapping time roles to GRIB metadata keys.""" + ds = pl_fl.to_xarray( + profile="earthkit", + add_valid_time_coord=True, + dim_roles={ + "forecast_reference_time": "metadata.base_datetime", + "step": "metadata.endStep", + }, + lazy_load=lazy_load, + allow_holes=allow_holes, + dim_name_from_role_name=dim_name_from_role_name, + ) + + assert "valid_time" in ds.coords + assert "valid_time" not in ds.sizes + assert ( + ds.coords["valid_time"].dims == ("forecast_reference_time", "step") + if dim_name_from_role_name + else ("metadata.base_datetime", "metadata.endStep") + ) + assert ds.coords["valid_time"].shape == (4, 2) + np.testing.assert_array_equal(ds.coords["valid_time"].values, VALID_TIME_FRT_STEP) + + +# ------------------------------------------------------------------------- +# fixed_dims with mono_variable=True and False +# ------------------------------------------------------------------------- + + +@pytest.mark.parametrize("allow_holes", [False, True]) +@pytest.mark.parametrize("lazy_load", [True, False]) +def test_fixed_dims_mono_variable_true(pl_fl, lazy_load, allow_holes): + """fixed_dims with mono_variable=True.""" + ds = pl_fl.to_xarray( + fixed_dims=[ + "parameter.variable", + "time.forecast_reference_time", + "time.step", + "vertical.level", + ], + mono_variable=True, + add_valid_time_coord=True, + lazy_load=lazy_load, + allow_holes=allow_holes, + ) + + assert "valid_time" in ds.coords + assert "valid_time" not in ds.sizes + assert ds.coords["valid_time"].dims == ("forecast_reference_time", "step") + assert ds.coords["valid_time"].shape == (4, 2) + np.testing.assert_array_equal(ds.coords["valid_time"].values, VALID_TIME_FRT_STEP) + + +@pytest.mark.parametrize("allow_holes", [False, True]) +@pytest.mark.parametrize("lazy_load", [True, False]) +def test_fixed_dims_mono_variable_false(pl_fl, lazy_load, allow_holes): + """fixed_dims with mono_variable=False (default).""" + ds = pl_fl.to_xarray( + fixed_dims=[ + "time.forecast_reference_time", + "metadata.endStep", + "vertical.level", + ], + mono_variable=False, + add_valid_time_coord=True, + lazy_load=lazy_load, + allow_holes=allow_holes, + ) + + assert "valid_time" in ds.coords + assert "valid_time" not in ds.sizes + assert ds.coords["valid_time"].dims == ("forecast_reference_time", "endStep") + assert ds.coords["valid_time"].shape == (4, 2) + np.testing.assert_array_equal(ds.coords["valid_time"].values, VALID_TIME_FRT_STEP) + + +@pytest.mark.parametrize("allow_holes", [False, True]) +@pytest.mark.parametrize("lazy_load", [True, False]) +def test_fixed_dims_different_order(pl_fl, lazy_load, allow_holes): + """fixed_dims with time dims in reversed order.""" + ds = pl_fl.to_xarray( + fixed_dims=[ + "vertical.level", + "metadata.endStep", + "time.forecast_reference_time", + ], + add_valid_time_coord=True, + lazy_load=lazy_load, + allow_holes=allow_holes, + decode_times=False, + decode_timedelta=False, + ) + + assert "valid_time" in ds.coords + assert "valid_time" not in ds.sizes + # Dims should follow the fixed_dims order + assert ds.coords["valid_time"].dims == ("endStep", "forecast_reference_time") + assert ds.coords["valid_time"].shape == (2, 4) + + +# ------------------------------------------------------------------------- +# Edge case: add_valid_time_coord=False should not add it +# ------------------------------------------------------------------------- + + +@pytest.mark.parametrize("allow_holes", [False, True]) +def test_add_valid_time_coord_false(pl_fl, allow_holes): + """add_valid_time_coord=False should not add valid_time as aux coord.""" + ds = pl_fl.to_xarray( + profile="earthkit", + add_valid_time_coord=False, + allow_holes=allow_holes, + decode_times=False, + ) + + assert "valid_time" not in ds.coords