From ed6bb13fa410f53f34c0083cbdaa499251f38c23 Mon Sep 17 00:00:00 2001 From: Pawel Wolff Date: Fri, 17 Apr 2026 15:14:46 +0200 Subject: [PATCH 1/5] Auxiliary coordinates feature --- docs/source/how-tos/xr_engine/index.rst | 1 + .../xr_engine/xarray_engine_aux_coords.ipynb | 4623 +++++++++++++++++ src/earthkit/data/indexing/xarray.py | 7 +- src/earthkit/data/xr_engine/builder.py | 62 + src/earthkit/data/xr_engine/engine.py | 8 +- src/earthkit/data/xr_engine/profile.py | 18 +- 6 files changed, 4714 insertions(+), 5 deletions(-) create mode 100644 docs/source/how-tos/xr_engine/xarray_engine_aux_coords.ipynb diff --git a/docs/source/how-tos/xr_engine/index.rst b/docs/source/how-tos/xr_engine/index.rst index 169f4f773..6a5311bd2 100644 --- a/docs/source/how-tos/xr_engine/index.rst +++ b/docs/source/how-tos/xr_engine/index.rst @@ -23,6 +23,7 @@ Xarray engine xarray_engine_dims_as_attrs.ipynb xarray_engine_extra_dims.ipynb xarray_engine_remapping.ipynb + xarray_engine_aux_coords.ipynb xarray_engine_holes.ipynb xarray_engine_chunks.ipynb xarray_engine_chunks_on_dask_cluster.ipynb diff --git a/docs/source/how-tos/xr_engine/xarray_engine_aux_coords.ipynb b/docs/source/how-tos/xr_engine/xarray_engine_aux_coords.ipynb new file mode 100644 index 000000000..a14f9278b --- /dev/null +++ b/docs/source/how-tos/xr_engine/xarray_engine_aux_coords.ipynb @@ -0,0 +1,4623 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "c2feafcc-430b-4718-983f-554e55dcd54a", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Xarray engine: auxiliary coordinates" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "1a6e355d-3fbf-4d92-b32f-a9d7e770f9db", + "metadata": { + "editable": true, + "scrolled": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "import earthkit.data as ekd" + ] + }, + { + "cell_type": "markdown", + "id": "f557ebaf-0c93-4eab-8523-8e38b931c054", + "metadata": {}, + "source": [ + "### Basic examples" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "fc6598cb-5516-424d-ba01-a8ee22a120cc", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " " + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
0t2024-06-03 00:00:002024-06-030 days 00:00:00700pressure0regular_ll
1r2024-06-03 00:00:002024-06-030 days 00:00:00700pressure0regular_ll
2t2024-06-03 00:00:002024-06-030 days 00:00:00500pressure0regular_ll
3r2024-06-03 00:00:002024-06-030 days 00:00:00500pressure0regular_ll
4t2024-06-03 06:00:002024-06-030 days 06:00:00700pressure0regular_ll
\n", + "
" + ], + "text/plain": [ + " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", + "0 t 2024-06-03 00:00:00 2024-06-03 0 days 00:00:00 \n", + "1 r 2024-06-03 00:00:00 2024-06-03 0 days 00:00:00 \n", + "2 t 2024-06-03 00:00:00 2024-06-03 0 days 00:00:00 \n", + "3 r 2024-06-03 00:00:00 2024-06-03 0 days 00:00:00 \n", + "4 t 2024-06-03 06:00:00 2024-06-03 0 days 06:00:00 \n", + "\n", + " vertical.level vertical.level_type ensemble.member geography.grid_type \n", + "0 700 pressure 0 regular_ll \n", + "1 700 pressure 0 regular_ll \n", + "2 500 pressure 0 regular_ll \n", + "3 500 pressure 0 regular_ll \n", + "4 700 pressure 0 regular_ll " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds_fl = ekd.from_source(\"sample\", \"pl.grib\").to_fieldlist()\n", + "ds_fl.ls().head()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "8856dcff-31ec-4c39-8725-a6f5e37e1065", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 176kB\n",
+       "Dimensions:                  (forecast_reference_time: 4, step: 2, level: 2,\n",
+       "                              latitude: 19, longitude: 36)\n",
+       "Coordinates:\n",
+       "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 32B 202...\n",
+       "    expver                   (forecast_reference_time) <U4 64B '0001' ... '0001'\n",
+       "  * step                     (step) timedelta64[ns] 16B 00:00:00 06:00:00\n",
+       "  * level                    (level) int64 16B 500 700\n",
+       "  * latitude                 (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n",
+       "  * longitude                (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n",
+       "Data variables:\n",
+       "    r                        (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n",
+       "    t                        (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n",
+       "Attributes:\n",
+       "    Conventions:  CF-1.8\n",
+       "    institution:  ECMWF
" + ], + "text/plain": [ + " Size: 176kB\n", + "Dimensions: (forecast_reference_time: 4, step: 2, level: 2,\n", + " latitude: 19, longitude: 36)\n", + "Coordinates:\n", + " * forecast_reference_time (forecast_reference_time) datetime64[ns] 32B 202...\n", + " expver (forecast_reference_time) \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 176kB\n",
+       "Dimensions:                  (forecast_reference_time: 4, step: 2, level: 2,\n",
+       "                              latitude: 19, longitude: 36)\n",
+       "Coordinates:\n",
+       "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 32B 202...\n",
+       "  * step                     (step) timedelta64[ns] 16B 00:00:00 06:00:00\n",
+       "    centre_and_expver        (forecast_reference_time, step) <U9 288B 'ecmf_0...\n",
+       "  * level                    (level) int64 16B 500 700\n",
+       "  * latitude                 (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n",
+       "  * longitude                (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n",
+       "Data variables:\n",
+       "    r                        (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n",
+       "    t                        (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n",
+       "Attributes:\n",
+       "    Conventions:  CF-1.8\n",
+       "    institution:  ECMWF
" + ], + "text/plain": [ + " Size: 176kB\n", + "Dimensions: (forecast_reference_time: 4, step: 2, level: 2,\n", + " latitude: 19, longitude: 36)\n", + "Coordinates:\n", + " * forecast_reference_time (forecast_reference_time) datetime64[ns] 32B 202...\n", + " * step (step) timedelta64[ns] 16B 00:00:00 06:00:00\n", + " centre_and_expver (forecast_reference_time, step) \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 176kB\n",
+       "Dimensions:                  (variable: 2, forecast_reference_time: 4, step: 2,\n",
+       "                              level: 2, latitude: 19, longitude: 36)\n",
+       "Coordinates:\n",
+       "  * variable                 (variable) <U1 8B 'r' 't'\n",
+       "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 32B 202...\n",
+       "  * step                     (step) timedelta64[ns] 16B 00:00:00 06:00:00\n",
+       "    centre_and_expver        (forecast_reference_time, step) <U9 288B 'ecmf_0...\n",
+       "  * level                    (level) int64 16B 500 700\n",
+       "  * latitude                 (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n",
+       "  * longitude                (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n",
+       "Data variables:\n",
+       "    data                     (variable, forecast_reference_time, step, level, latitude, longitude) float64 175kB ...\n",
+       "Attributes:\n",
+       "    Conventions:  CF-1.8\n",
+       "    institution:  ECMWF
" + ], + "text/plain": [ + " Size: 176kB\n", + "Dimensions: (variable: 2, forecast_reference_time: 4, step: 2,\n", + " level: 2, latitude: 19, longitude: 36)\n", + "Coordinates:\n", + " * variable (variable) \n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
metadata.shortNamemetadata.dataDatemetadata.dataTimemetadata.stepRangemetadata.dataTypemetadata.quantilemetadata.numbermetadata.numberOfForecastsInEnsemble
02tp2025120900-168pd1:313
12tp2025120900-168pd1:515
22tp2025120900-168pd1:10110
32tp2025120900-168pd2:323
42tp2025120900-168pd2:525
52tp2025120900-168pd2:10210
62tp2025120900-168pd3:333
72tp2025120900-168pd3:535
82tp2025120900-168pd3:10310
92tp2025120900-168pd4:545
102tp2025120900-168pd4:10410
112tp2025120900-168pd5:555
122tp2025120900-168pd5:10510
132tp2025120900-168pd6:10610
142tp2025120900-168pd7:10710
152tp2025120900-168pd8:10810
162tp2025120900-168pd9:10910
172tp2025120900-168pd10:101010
\n", + "" + ], + "text/plain": [ + " metadata.shortName metadata.dataDate metadata.dataTime \\\n", + "0 2tp 20251209 0 \n", + "1 2tp 20251209 0 \n", + "2 2tp 20251209 0 \n", + "3 2tp 20251209 0 \n", + "4 2tp 20251209 0 \n", + "5 2tp 20251209 0 \n", + "6 2tp 20251209 0 \n", + "7 2tp 20251209 0 \n", + "8 2tp 20251209 0 \n", + "9 2tp 20251209 0 \n", + "10 2tp 20251209 0 \n", + "11 2tp 20251209 0 \n", + "12 2tp 20251209 0 \n", + "13 2tp 20251209 0 \n", + "14 2tp 20251209 0 \n", + "15 2tp 20251209 0 \n", + "16 2tp 20251209 0 \n", + "17 2tp 20251209 0 \n", + "\n", + " metadata.stepRange metadata.dataType metadata.quantile metadata.number \\\n", + "0 0-168 pd 1:3 1 \n", + "1 0-168 pd 1:5 1 \n", + "2 0-168 pd 1:10 1 \n", + "3 0-168 pd 2:3 2 \n", + "4 0-168 pd 2:5 2 \n", + "5 0-168 pd 2:10 2 \n", + "6 0-168 pd 3:3 3 \n", + "7 0-168 pd 3:5 3 \n", + "8 0-168 pd 3:10 3 \n", + "9 0-168 pd 4:5 4 \n", + "10 0-168 pd 4:10 4 \n", + "11 0-168 pd 5:5 5 \n", + "12 0-168 pd 5:10 5 \n", + "13 0-168 pd 6:10 6 \n", + "14 0-168 pd 7:10 7 \n", + "15 0-168 pd 8:10 8 \n", + "16 0-168 pd 9:10 9 \n", + "17 0-168 pd 10:10 10 \n", + "\n", + " metadata.numberOfForecastsInEnsemble \n", + "0 3 \n", + "1 5 \n", + "2 10 \n", + "3 3 \n", + "4 5 \n", + "5 10 \n", + "6 3 \n", + "7 5 \n", + "8 10 \n", + "9 5 \n", + "10 10 \n", + "11 5 \n", + "12 10 \n", + "13 10 \n", + "14 10 \n", + "15 10 \n", + "16 10 \n", + "17 10 " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds_fl2.ls(\n", + " keys=[\n", + " \"metadata.shortName\",\n", + " \"metadata.dataDate\",\n", + " \"metadata.dataTime\",\n", + " \"metadata.stepRange\",\n", + " \"metadata.dataType\",\n", + " \"metadata.quantile\",\n", + " \"metadata.number\",\n", + " \"metadata.numberOfForecastsInEnsemble\",\n", + " ]\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "3ea48ddf-3fc0-455b-b381-e3c8b2a3debe", + "metadata": {}, + "source": [ + "Note that, in this context, the usual meaning of the metadata key ``\"number\"`` (and the related ``\"numberOfForecastsInEnsemble\"``) is overridden by ``\"quantile\"``. As a result, the ensemble dimension normally derived from ``\"number\"`` is no longer applicable.\n", + "\n", + "For this reason, we must:\n", + "- declare ``\"quantile\"`` as an extra dimension, and\n", + "- remove the predefined ensemble dimension ``\"number\"``, since it would otherwise conflict with the ``\"quantile\"`` dimension.\n", + "\n", + "Still, it might be useful to keep the information carried by ``\"number\"`` and ``\"numberOfForecastsInEnsemble\"`` is auxiliary coordinates." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "cd65d5ce-b511-4c12-88f7-f64f5b0c18e7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 13kB\n",
+       "Dimensions:                  (quantile: 18, forecast_reference_time: 1,\n",
+       "                              step: 1, level: 1, level_type: 1, latitude: 7,\n",
+       "                              longitude: 12)\n",
+       "Coordinates:\n",
+       "  * quantile                 (quantile) <U5 360B '10:10' '1:10' ... '9:10'\n",
+       "    quantile_rank            (quantile) <U2 144B '10' '1' '1' ... '7' '8' '9'\n",
+       "    nquantiles               (quantile) int64 144B 10 10 3 5 10 ... 10 10 10 10\n",
+       "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 8B 2025...\n",
+       "  * step                     (step) timedelta64[ns] 8B 7 days\n",
+       "  * level                    (level) int64 8B 0\n",
+       "  * level_type               (level_type) <U7 28B 'surface'\n",
+       "  * latitude                 (latitude) float64 56B 90.0 60.0 ... -60.0 -90.0\n",
+       "  * longitude                (longitude) float64 96B 0.0 30.0 ... 300.0 330.0\n",
+       "Data variables:\n",
+       "    2tp                      (quantile, forecast_reference_time, step, level, level_type, latitude, longitude) float64 12kB ...\n",
+       "Attributes:\n",
+       "    Conventions:  CF-1.8\n",
+       "    institution:  ECMWF
" + ], + "text/plain": [ + " Size: 13kB\n", + "Dimensions: (quantile: 18, forecast_reference_time: 1,\n", + " step: 1, level: 1, level_type: 1, latitude: 7,\n", + " longitude: 12)\n", + "Coordinates:\n", + " * quantile (quantile) \n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
0avg_2t2010-07-012010-06-0130 days2height_above_ground_level0regular_ll
1avg_2t2010-08-012010-07-0131 days2height_above_ground_level0regular_ll
\n", + "" + ], + "text/plain": [ + " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", + "0 avg_2t 2010-07-01 2010-06-01 30 days \n", + "1 avg_2t 2010-08-01 2010-07-01 31 days \n", + "\n", + " vertical.level vertical.level_type ensemble.member \\\n", + "0 2 height_above_ground_level 0 \n", + "1 2 height_above_ground_level 0 \n", + "\n", + " geography.grid_type \n", + "0 regular_ll \n", + "1 regular_ll " + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds_fl3 = ekd.from_source(\"file\", \"/Users/ecm8620/data/issue-948-avg_2t-2months.grib2\").to_fieldlist()\n", + "ds_fl3.ls()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "fdcac0e0-f501-4cd5-bf2b-59c9d760d8e7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 22kB\n",
+       "Dimensions:                  (forecast_reference_time: 2, latitude: 33,\n",
+       "                              longitude: 41)\n",
+       "Coordinates:\n",
+       "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 16B 201...\n",
+       "    step                     (forecast_reference_time) timedelta64[ns] 16B 30...\n",
+       "  * latitude                 (latitude) float64 264B 55.0 54.75 ... 47.25 47.0\n",
+       "  * longitude                (longitude) float64 328B 5.0 5.25 ... 14.75 15.0\n",
+       "Data variables:\n",
+       "    avg_2t                   (forecast_reference_time, latitude, longitude) float64 22kB ...\n",
+       "Attributes:\n",
+       "    Conventions:  CF-1.8\n",
+       "    institution:  ECMWF
" + ], + "text/plain": [ + " Size: 22kB\n", + "Dimensions: (forecast_reference_time: 2, latitude: 33,\n", + " longitude: 41)\n", + "Coordinates:\n", + " * forecast_reference_time (forecast_reference_time) datetime64[ns] 16B 201...\n", + " step (forecast_reference_time) timedelta64[ns] 16B 30...\n", + " * latitude (latitude) float64 264B 55.0 54.75 ... 47.25 47.0\n", + " * longitude (longitude) float64 328B 5.0 5.25 ... 14.75 15.0\n", + "Data variables:\n", + " avg_2t (forecast_reference_time, latitude, longitude) float64 22kB ...\n", + "Attributes:\n", + " Conventions: CF-1.8\n", + " institution: ECMWF" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds5 = ds_fl3.to_xarray(\n", + " drop_dims=\"step\",\n", + " aux_coords={\"step\": (\"time.step\", (\"forecast_reference_time\",))},\n", + ")\n", + "ds5.load()" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "aebe01e5-0a1f-4871-82d8-9baa2fb4272d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 22kB\n",
+       "Dimensions:     (s: 1, valid_time: 2, latitude: 33, longitude: 41)\n",
+       "Coordinates:\n",
+       "  * s           (s) <U4 16B '0001'\n",
+       "  * valid_time  (valid_time) datetime64[ns] 16B 2010-07-01 2010-08-01\n",
+       "  * latitude    (latitude) float64 264B 55.0 54.75 54.5 ... 47.5 47.25 47.0\n",
+       "  * longitude   (longitude) float64 328B 5.0 5.25 5.5 5.75 ... 14.5 14.75 15.0\n",
+       "Data variables:\n",
+       "    avg_2t      (s, valid_time, latitude, longitude) float64 22kB 284.2 ... 2...\n",
+       "Attributes:\n",
+       "    Conventions:  CF-1.8\n",
+       "    institution:  ECMWF
" + ], + "text/plain": [ + " Size: 22kB\n", + "Dimensions: (s: 1, valid_time: 2, latitude: 33, longitude: 41)\n", + "Coordinates:\n", + " * s (s) a Coord object + possibly the same for "valid_time" + + self.collect_aux_coords() + # build variable and global attributes xr_attrs = self.profile.attrs.builder.build(self.ds, var_builders, rename=True) xr_coords = self.coords() diff --git a/src/earthkit/data/xr_engine/engine.py b/src/earthkit/data/xr_engine/engine.py index dbd788c8e..009170199 100644 --- a/src/earthkit/data/xr_engine/engine.py +++ b/src/earthkit/data/xr_engine/engine.py @@ -39,6 +39,7 @@ def open_dataset( add_valid_time_coord=None, decode_times=None, decode_timedelta=None, + aux_coords=None, add_geo_coords=None, attrs_mode=None, attrs=None, @@ -256,6 +257,9 @@ def open_dataset( will have the attribute "units" appropriately set (to "minutes", "hours", etc.). If None (default), assume the same value of ``decode_times`` unless the ``profile`` overwrites it. + aux_coords: dict, None + Mapping from an auxiliary coordinate label metadata keys to a tuple: + (metadata key, the dataset dimension(s)). The default value is None. add_geo_coords: bool, None If True, add geographic coordinates to the dataset when field values are represented by a single "values" dimension. Its default value (None) expands @@ -312,8 +316,8 @@ def open_dataset( Define fill values to metadata keys. Default is None. remapping: dict, None Define new metadata keys for indexing. Any key provided in ``remapping`` may be referenced - when specifying options such as ``variable_key``, ``extra_dims``, ``ensure_dims``, and others. - Default is None. + when specifying options such as ``variable_key``, ``extra_dims``, ``ensure_dims``, ``aux_coords`` + and others. Default is None. lazy_load: bool, None If True, the resulting Dataset will load data lazily from the underlying data source. If False, a DataSet holding all the data in memory diff --git a/src/earthkit/data/xr_engine/profile.py b/src/earthkit/data/xr_engine/profile.py index 459e08523..93738a190 100644 --- a/src/earthkit/data/xr_engine/profile.py +++ b/src/earthkit/data/xr_engine/profile.py @@ -40,6 +40,21 @@ def add(self, remapping, patch=None): self.patch.update(patch) +class AuxCoords(dict): + def __init__(self, aux_coords): + super().__init__() + for coord_label, key_dims in ensure_dict(aux_coords).items(): + try: + key, dims = key_dims + dims = ensure_iterable(dims) + except Exception: + raise ValueError( + f"Auxiliary coordinate {coord_label} has invalid specification: got {key_dims} " + f"while a tuple (, ) is expected" + ) + self[coord_label] = (key, dims) + + class ProfileConf: def __init__(self): self._conf = {} @@ -184,7 +199,7 @@ def check(self, profile): class Profile: - USER_ONLY_OPTIONS = ["remapping", "patch", "fill_metadata"] + USER_ONLY_OPTIONS = ["remapping", "patch", "fill_metadata", "aux_coords"] DEFAULT_PROFILE_NAME = "earthkit" def __init__( @@ -213,6 +228,7 @@ def __init__( patch[k] = v self.remapping = RemappingBuilder(kwargs.pop("remapping", None), patch) + self.aux_coords = AuxCoords(kwargs.pop("aux_coords", None)) # variables mono_variable = kwargs.pop("mono_variable") From bd10f88dbaee3534faacd545ad511669402e176a Mon Sep 17 00:00:00 2001 From: Pawel Wolff Date: Tue, 21 Apr 2026 23:43:40 +0200 Subject: [PATCH 2/5] ListDiff._compare improved and now provides a sensible error when data do not form a hypercube Tests added --- src/earthkit/data/xr_engine/diff.py | 17 +- tests/xr_engine/test_xr_engine_aux_coords.py | 177 +++++++++++++++++++ 2 files changed, 179 insertions(+), 15 deletions(-) create mode 100644 tests/xr_engine/test_xr_engine_aux_coords.py diff --git a/src/earthkit/data/xr_engine/diff.py b/src/earthkit/data/xr_engine/diff.py index 7ca07cf97..f6436b879 100644 --- a/src/earthkit/data/xr_engine/diff.py +++ b/src/earthkit/data/xr_engine/diff.py @@ -7,7 +7,6 @@ # nor does it submit to any jurisdiction. # -import datetime import logging import math @@ -79,26 +78,14 @@ class ListDiff: @staticmethod def _compare(v1, v2): - if isinstance(v1, int) and isinstance(v2, int): - return v1 == v2, ListDiff.VALUE_DIFF - elif isinstance(v1, float) and isinstance(v2, float): + if isinstance(v1, float) and isinstance(v2, float): return math.isclose(v1, v2, rel_tol=1e-9), ListDiff.VALUE_DIFF - elif isinstance(v1, str) and isinstance(v2, str): - return v1 == v2, ListDiff.VALUE_DIFF - elif isinstance(v1, datetime.datetime) and isinstance(v2, datetime.datetime): - return v1 == v2, ListDiff.VALUE_DIFF - elif isinstance(v1, datetime.date) and isinstance(v2, datetime.date): - return v1 == v2, ListDiff.VALUE_DIFF - elif isinstance(v1, datetime.time) and isinstance(v2, datetime.time): - return v1 == v2, ListDiff.VALUE_DIFF - elif isinstance(v1, datetime.timedelta) and isinstance(v2, datetime.timedelta): - return v1 == v2, ListDiff.VALUE_DIFF elif v1 is None and v2 is None: return True, ListDiff.VALUE_DIFF elif type(v1) is not type(v2): return False, ListDiff.TYPE_DIFF else: - raise ValueError(f"Unsupported type: {type(v1)}") + return v1 == v2, ListDiff.VALUE_DIFF @staticmethod def diff(vals1, vals2, name=str()): diff --git a/tests/xr_engine/test_xr_engine_aux_coords.py b/tests/xr_engine/test_xr_engine_aux_coords.py new file mode 100644 index 000000000..ffcaa7220 --- /dev/null +++ b/tests/xr_engine/test_xr_engine_aux_coords.py @@ -0,0 +1,177 @@ +#!/usr/bin/env python3 + +# (C) Copyright 2020 ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. +# + +import numpy as np +import pytest + +from earthkit.data import from_source +from earthkit.data.utils.testing import earthkit_remote_test_data_file + + +@pytest.mark.cache +@pytest.mark.parametrize("lazy_load", [True, False]) +def test_xr_engine_aux_coords_simple(lazy_load): + """aux_coords with a single metadata key mapped to a single dimension.""" + fl = from_source("url", earthkit_remote_test_data_file("xr_engine/level/pl_small.grib")).to_fieldlist() + ds = fl.to_xarray( + aux_coords={"centre": ("metadata.centre", "forecast_reference_time")}, + lazy_load=lazy_load, + ) + + assert "centre" in ds.coords + assert "centre" not in ds.sizes + assert ds["centre"].dims == ("forecast_reference_time",) + assert (ds["centre"] == "ecmf").all() + + +@pytest.mark.cache +@pytest.mark.parametrize("lazy_load", [True, False]) +def test_xr_engine_aux_coords_multi_dim(lazy_load): + """aux_coords mapped to multiple dimensions.""" + fl = from_source("url", earthkit_remote_test_data_file("xr_engine/level/pl_small.grib")).to_fieldlist() + ds = fl.to_xarray( + aux_coords={"centre": ("metadata.centre", ("forecast_reference_time", "step"))}, + lazy_load=lazy_load, + ) + + assert "centre" in ds.coords + assert "centre" not in ds.sizes + assert ds["centre"].dims == ("forecast_reference_time", "step") + assert (ds["centre"] == "ecmf").all() + + +@pytest.mark.cache +@pytest.mark.parametrize("lazy_load", [True, False]) +def test_xr_engine_aux_coords_with_remapping(lazy_load): + """aux_coords using a remapped key.""" + ds0 = from_source("url", earthkit_remote_test_data_file("xr_engine/level/pl_small.grib")).to_fieldlist() + ds = ds0.to_xarray( + remapping={"centre_class": "{metadata.centre}_{metadata.class}"}, + aux_coords={"centre_class": ("centre_class", ("forecast_reference_time", "step"))}, + lazy_load=lazy_load, + ) + + assert "centre_class" in ds.coords + assert "centre_class" not in ds.sizes + assert ds["centre_class"].dims == ("forecast_reference_time", "step") + assert (ds["centre_class"] == "ecmf_od").all() + + +@pytest.mark.cache +@pytest.mark.parametrize("lazy_load", [True, False]) +def test_xr_engine_aux_coords_multiple_coords(lazy_load): + """Multiple aux_coords specified at once.""" + ds0 = from_source("url", earthkit_remote_test_data_file("xr_engine/level/pl_small.grib")).to_fieldlist() + ds = ds0.to_xarray( + profile="mars", + aux_coords={ + "centre": ("metadata.centre", "forecast_reference_time"), + "class_coord": ("metadata.class", "forecast_reference_time"), + }, + lazy_load=lazy_load, + ) + + assert "centre" in ds.coords + assert "class_coord" in ds.coords + assert "centre" not in ds.sizes + assert "class_coord" not in ds.sizes + assert ds["centre"].dims == ("forecast_reference_time",) + assert ds["class_coord"].dims == ("forecast_reference_time",) + assert (ds["centre"] == "ecmf").all() + assert (ds["class_coord"] == "od").all() + + +@pytest.mark.cache +def test_xr_engine_aux_coords_unknown_dim(): + """aux_coords referencing a non-existent dimension should raise.""" + fl = from_source("url", earthkit_remote_test_data_file("xr_engine/level/pl_small.grib")).to_fieldlist() + with pytest.raises(AssertionError, match="unknown dimension"): + fl.to_xarray(aux_coords={"centre": ("metadata.centre", "nonexistent_dim")}) + + +def test_xr_engine_aux_coords_invalid_spec(): + """aux_coords with invalid tuple specification should raise ValueError.""" + from earthkit.data.xr_engine.profile import AuxCoords + + with pytest.raises(ValueError, match="invalid specification"): + AuxCoords({"bad": "not_a_tuple"}) + + +@pytest.mark.cache +def test_xr_engine_aux_coords_empty(): + """Empty aux_coords should produce no extra coordinates.""" + fl = from_source("url", earthkit_remote_test_data_file("xr_engine/level/pl_small.grib")).to_fieldlist() + ds_no_aux = fl.to_xarray(aux_coords={}) + ds_none = fl.to_xarray() + + assert set(ds_no_aux.coords) == set(ds_none.coords) + + +@pytest.mark.cache +@pytest.mark.parametrize("lazy_load", [True, False]) +def test_xr_engine_aux_coords_drop_dim_as_aux(lazy_load): + """Drop a dimension and re-add it as an auxiliary coordinate.""" + fl = from_source("url", earthkit_remote_test_data_file("xr_engine/level/pl_small.grib")).to_fieldlist() + + ds = fl.to_xarray( + time_dims="valid_time", + aux_coords={"step": ("time.step", ("valid_time",))}, + lazy_load=lazy_load, + ) + + # step should be a coordinate but not a dimension + assert "step" in ds.coords + assert "step" not in ds.sizes + assert "valid_time" in ds.coords["step"].dims + assert (ds.coords["step"] == np.array([0, 6] * 4, dtype="m8[h]")).all() + + +@pytest.mark.cache +@pytest.mark.parametrize("lazy_load", [True, False]) +def test_xr_engine_aux_coords_with_mono_variable(lazy_load): + """aux_coords combined with mono_variable mode.""" + fl = from_source("url", earthkit_remote_test_data_file("xr_engine/level/pl_small.grib")).to_fieldlist() + ds = fl.to_xarray( + fixed_dims=["parameter.variable", "time.forecast_reference_time", "time.step", "vertical.level"], + mono_variable=True, + aux_coords={"metadata_paramId": ("metadata.paramId", "parameter.variable")}, + lazy_load=lazy_load, + ) + assert "metadata_paramId" in ds.coords + assert "metadata_paramId" not in ds.sizes + assert (ds["metadata_paramId"] == [157, 130]).all() + + +@pytest.mark.cache +@pytest.mark.parametrize("lazy_load", [True, False]) +def test_xr_engine_aux_coords_conflicting_values_strict(lazy_load): + """With strict=True, conflicting aux_coord values for same dim coords should raise.""" + fl = from_source("url", earthkit_remote_test_data_file("xr_engine/level/mixed_pl_ml_small.grib")).to_fieldlist() + + # levtype varies across levels, so mapping it to forecast_reference_time alone + # (which doesn't uniquely determine levtype) should conflict with strict=True + with pytest.raises(AssertionError, match="Conflicting values"): + _ = fl.to_xarray( + strict=True, + level_dim_mode="level_and_type", + aux_coords={"levtype": ("metadata.levtype", "forecast_reference_time")}, + lazy_load=lazy_load, + ) + + ds = fl.to_xarray( + strict=True, + level_dim_mode="level_and_type", + aux_coords={"levtype": ("metadata.levtype", "level_and_type")}, + lazy_load=lazy_load, + ) + assert "levtype" in ds.coords + assert "levtype" not in ds.sizes + assert (ds["levtype"] == ["ml", "pl", "pl", "ml"]).all() From 9d9f4a6e64e50ee198abbb9c2bf34a5e1d3b2aab Mon Sep 17 00:00:00 2001 From: Pawel Wolff Date: Tue, 21 Apr 2026 23:56:41 +0200 Subject: [PATCH 3/5] Tests additionally parametrised with allow_holes=False/True --- tests/xr_engine/test_xr_engine_aux_coords.py | 46 ++++++++++++++------ 1 file changed, 32 insertions(+), 14 deletions(-) diff --git a/tests/xr_engine/test_xr_engine_aux_coords.py b/tests/xr_engine/test_xr_engine_aux_coords.py index ffcaa7220..1e05d4c69 100644 --- a/tests/xr_engine/test_xr_engine_aux_coords.py +++ b/tests/xr_engine/test_xr_engine_aux_coords.py @@ -17,13 +17,15 @@ @pytest.mark.cache +@pytest.mark.parametrize("allow_holes", [False, True]) @pytest.mark.parametrize("lazy_load", [True, False]) -def test_xr_engine_aux_coords_simple(lazy_load): +def test_xr_engine_aux_coords_simple(lazy_load, allow_holes): """aux_coords with a single metadata key mapped to a single dimension.""" fl = from_source("url", earthkit_remote_test_data_file("xr_engine/level/pl_small.grib")).to_fieldlist() ds = fl.to_xarray( aux_coords={"centre": ("metadata.centre", "forecast_reference_time")}, lazy_load=lazy_load, + allow_holes=allow_holes, ) assert "centre" in ds.coords @@ -33,13 +35,15 @@ def test_xr_engine_aux_coords_simple(lazy_load): @pytest.mark.cache +@pytest.mark.parametrize("allow_holes", [False, True]) @pytest.mark.parametrize("lazy_load", [True, False]) -def test_xr_engine_aux_coords_multi_dim(lazy_load): +def test_xr_engine_aux_coords_multi_dim(lazy_load, allow_holes): """aux_coords mapped to multiple dimensions.""" fl = from_source("url", earthkit_remote_test_data_file("xr_engine/level/pl_small.grib")).to_fieldlist() ds = fl.to_xarray( aux_coords={"centre": ("metadata.centre", ("forecast_reference_time", "step"))}, lazy_load=lazy_load, + allow_holes=allow_holes, ) assert "centre" in ds.coords @@ -49,14 +53,16 @@ def test_xr_engine_aux_coords_multi_dim(lazy_load): @pytest.mark.cache +@pytest.mark.parametrize("allow_holes", [False, True]) @pytest.mark.parametrize("lazy_load", [True, False]) -def test_xr_engine_aux_coords_with_remapping(lazy_load): +def test_xr_engine_aux_coords_with_remapping(lazy_load, allow_holes): """aux_coords using a remapped key.""" ds0 = from_source("url", earthkit_remote_test_data_file("xr_engine/level/pl_small.grib")).to_fieldlist() ds = ds0.to_xarray( remapping={"centre_class": "{metadata.centre}_{metadata.class}"}, aux_coords={"centre_class": ("centre_class", ("forecast_reference_time", "step"))}, lazy_load=lazy_load, + allow_holes=allow_holes, ) assert "centre_class" in ds.coords @@ -66,8 +72,9 @@ def test_xr_engine_aux_coords_with_remapping(lazy_load): @pytest.mark.cache +@pytest.mark.parametrize("allow_holes", [False, True]) @pytest.mark.parametrize("lazy_load", [True, False]) -def test_xr_engine_aux_coords_multiple_coords(lazy_load): +def test_xr_engine_aux_coords_multiple_coords(lazy_load, allow_holes): """Multiple aux_coords specified at once.""" ds0 = from_source("url", earthkit_remote_test_data_file("xr_engine/level/pl_small.grib")).to_fieldlist() ds = ds0.to_xarray( @@ -77,6 +84,7 @@ def test_xr_engine_aux_coords_multiple_coords(lazy_load): "class_coord": ("metadata.class", "forecast_reference_time"), }, lazy_load=lazy_load, + allow_holes=allow_holes, ) assert "centre" in ds.coords @@ -90,11 +98,15 @@ def test_xr_engine_aux_coords_multiple_coords(lazy_load): @pytest.mark.cache -def test_xr_engine_aux_coords_unknown_dim(): +@pytest.mark.parametrize("allow_holes", [False, True]) +def test_xr_engine_aux_coords_unknown_dim(allow_holes): """aux_coords referencing a non-existent dimension should raise.""" fl = from_source("url", earthkit_remote_test_data_file("xr_engine/level/pl_small.grib")).to_fieldlist() with pytest.raises(AssertionError, match="unknown dimension"): - fl.to_xarray(aux_coords={"centre": ("metadata.centre", "nonexistent_dim")}) + fl.to_xarray( + aux_coords={"centre": ("metadata.centre", "nonexistent_dim")}, + allow_holes=allow_holes, + ) def test_xr_engine_aux_coords_invalid_spec(): @@ -106,18 +118,20 @@ def test_xr_engine_aux_coords_invalid_spec(): @pytest.mark.cache -def test_xr_engine_aux_coords_empty(): +@pytest.mark.parametrize("allow_holes", [False, True]) +def test_xr_engine_aux_coords_empty(allow_holes): """Empty aux_coords should produce no extra coordinates.""" fl = from_source("url", earthkit_remote_test_data_file("xr_engine/level/pl_small.grib")).to_fieldlist() - ds_no_aux = fl.to_xarray(aux_coords={}) - ds_none = fl.to_xarray() + ds_no_aux = fl.to_xarray(aux_coords={}, allow_holes=allow_holes) + ds_none = fl.to_xarray(allow_holes=allow_holes) assert set(ds_no_aux.coords) == set(ds_none.coords) @pytest.mark.cache +@pytest.mark.parametrize("allow_holes", [False, True]) @pytest.mark.parametrize("lazy_load", [True, False]) -def test_xr_engine_aux_coords_drop_dim_as_aux(lazy_load): +def test_xr_engine_aux_coords_drop_dim_as_aux(lazy_load, allow_holes): """Drop a dimension and re-add it as an auxiliary coordinate.""" fl = from_source("url", earthkit_remote_test_data_file("xr_engine/level/pl_small.grib")).to_fieldlist() @@ -125,6 +139,7 @@ def test_xr_engine_aux_coords_drop_dim_as_aux(lazy_load): time_dims="valid_time", aux_coords={"step": ("time.step", ("valid_time",))}, lazy_load=lazy_load, + allow_holes=allow_holes, ) # step should be a coordinate but not a dimension @@ -135,8 +150,9 @@ def test_xr_engine_aux_coords_drop_dim_as_aux(lazy_load): @pytest.mark.cache +@pytest.mark.parametrize("allow_holes", [False, True]) @pytest.mark.parametrize("lazy_load", [True, False]) -def test_xr_engine_aux_coords_with_mono_variable(lazy_load): +def test_xr_engine_aux_coords_with_mono_variable(lazy_load, allow_holes): """aux_coords combined with mono_variable mode.""" fl = from_source("url", earthkit_remote_test_data_file("xr_engine/level/pl_small.grib")).to_fieldlist() ds = fl.to_xarray( @@ -144,6 +160,7 @@ def test_xr_engine_aux_coords_with_mono_variable(lazy_load): mono_variable=True, aux_coords={"metadata_paramId": ("metadata.paramId", "parameter.variable")}, lazy_load=lazy_load, + allow_holes=allow_holes, ) assert "metadata_paramId" in ds.coords assert "metadata_paramId" not in ds.sizes @@ -151,19 +168,19 @@ def test_xr_engine_aux_coords_with_mono_variable(lazy_load): @pytest.mark.cache +@pytest.mark.parametrize("allow_holes", [False, True]) @pytest.mark.parametrize("lazy_load", [True, False]) -def test_xr_engine_aux_coords_conflicting_values_strict(lazy_load): +def test_xr_engine_aux_coords_conflicting_values_strict(lazy_load, allow_holes): """With strict=True, conflicting aux_coord values for same dim coords should raise.""" fl = from_source("url", earthkit_remote_test_data_file("xr_engine/level/mixed_pl_ml_small.grib")).to_fieldlist() - # levtype varies across levels, so mapping it to forecast_reference_time alone - # (which doesn't uniquely determine levtype) should conflict with strict=True with pytest.raises(AssertionError, match="Conflicting values"): _ = fl.to_xarray( strict=True, level_dim_mode="level_and_type", aux_coords={"levtype": ("metadata.levtype", "forecast_reference_time")}, lazy_load=lazy_load, + allow_holes=allow_holes, ) ds = fl.to_xarray( @@ -171,6 +188,7 @@ def test_xr_engine_aux_coords_conflicting_values_strict(lazy_load): level_dim_mode="level_and_type", aux_coords={"levtype": ("metadata.levtype", "level_and_type")}, lazy_load=lazy_load, + allow_holes=allow_holes, ) assert "levtype" in ds.coords assert "levtype" not in ds.sizes From 0409add5aba31fba8784fb93e2c5260838dbdcb4 Mon Sep 17 00:00:00 2001 From: Pawel Wolff Date: Thu, 23 Apr 2026 11:40:28 +0200 Subject: [PATCH 4/5] How-to notebook cleaned Docstrings on `aux_coord` improved Error message on conflicting values for aux coords improved --- .../xr_engine/xarray_engine_aux_coords.ipynb | 3843 +++-------------- src/earthkit/data/indexing/xarray.py | 2 +- src/earthkit/data/xr_engine/builder.py | 11 +- src/earthkit/data/xr_engine/engine.py | 2 +- 4 files changed, 569 insertions(+), 3289 deletions(-) diff --git a/docs/source/how-tos/xr_engine/xarray_engine_aux_coords.ipynb b/docs/source/how-tos/xr_engine/xarray_engine_aux_coords.ipynb index a14f9278b..8165ac2f9 100644 --- a/docs/source/how-tos/xr_engine/xarray_engine_aux_coords.ipynb +++ b/docs/source/how-tos/xr_engine/xarray_engine_aux_coords.ipynb @@ -33,23 +33,31 @@ }, { "cell_type": "markdown", - "id": "f557ebaf-0c93-4eab-8523-8e38b931c054", + "id": "f3117255-6cc1-4cf2-ba91-dc3134973b91", "metadata": {}, "source": [ - "### Basic examples" + "### Basic example" + ] + }, + { + "cell_type": "markdown", + "id": "e96e8da8-8219-4a79-92ad-515606816919", + "metadata": {}, + "source": [ + "First, we get some GRIB data containing control and perturbed forecasts." ] }, { "cell_type": "code", "execution_count": 2, - "id": "fc6598cb-5516-424d-ba01-a8ee22a120cc", + "id": "a8f1d8b7-4a3b-4186-a827-17dbb16eaa2b", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - " " + " " ] }, { @@ -81,6 +89,7 @@ " vertical.level_type\n", " ensemble.member\n", " geography.grid_type\n", + " metadata.dataType\n", " \n", " \n", " \n", @@ -90,21 +99,23 @@ " 2024-06-03 00:00:00\n", " 2024-06-03\n", " 0 days 00:00:00\n", - " 700\n", + " 500\n", " pressure\n", " 0\n", " regular_ll\n", + " cf\n", " \n", " \n", " 1\n", - " r\n", - " 2024-06-03 00:00:00\n", + " t\n", + " 2024-06-03 06:00:00\n", " 2024-06-03\n", - " 0 days 00:00:00\n", - " 700\n", + " 0 days 06:00:00\n", + " 500\n", " pressure\n", " 0\n", " regular_ll\n", + " cf\n", " \n", " \n", " 2\n", @@ -114,19 +125,21 @@ " 0 days 00:00:00\n", " 500\n", " pressure\n", - " 0\n", + " 1\n", " regular_ll\n", + " pf\n", " \n", " \n", " 3\n", - " r\n", + " t\n", " 2024-06-03 00:00:00\n", " 2024-06-03\n", " 0 days 00:00:00\n", " 500\n", " pressure\n", - " 0\n", + " 2\n", " regular_ll\n", + " pf\n", " \n", " \n", " 4\n", @@ -134,10 +147,23 @@ " 2024-06-03 06:00:00\n", " 2024-06-03\n", " 0 days 06:00:00\n", - " 700\n", + " 500\n", " pressure\n", - " 0\n", + " 1\n", + " regular_ll\n", + " pf\n", + " \n", + " \n", + " 5\n", + " t\n", + " 2024-06-03 06:00:00\n", + " 2024-06-03\n", + " 0 days 06:00:00\n", + " 500\n", + " pressure\n", + " 2\n", " regular_ll\n", + " pf\n", " \n", " \n", "\n", @@ -146,17 +172,27 @@ "text/plain": [ " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", "0 t 2024-06-03 00:00:00 2024-06-03 0 days 00:00:00 \n", - "1 r 2024-06-03 00:00:00 2024-06-03 0 days 00:00:00 \n", + "1 t 2024-06-03 06:00:00 2024-06-03 0 days 06:00:00 \n", "2 t 2024-06-03 00:00:00 2024-06-03 0 days 00:00:00 \n", - "3 r 2024-06-03 00:00:00 2024-06-03 0 days 00:00:00 \n", + "3 t 2024-06-03 00:00:00 2024-06-03 0 days 00:00:00 \n", "4 t 2024-06-03 06:00:00 2024-06-03 0 days 06:00:00 \n", - "\n", - " vertical.level vertical.level_type ensemble.member geography.grid_type \n", - "0 700 pressure 0 regular_ll \n", - "1 700 pressure 0 regular_ll \n", - "2 500 pressure 0 regular_ll \n", - "3 500 pressure 0 regular_ll \n", - "4 700 pressure 0 regular_ll " + "5 t 2024-06-03 06:00:00 2024-06-03 0 days 06:00:00 \n", + "\n", + " vertical.level vertical.level_type ensemble.member geography.grid_type \\\n", + "0 500 pressure 0 regular_ll \n", + "1 500 pressure 0 regular_ll \n", + "2 500 pressure 1 regular_ll \n", + "3 500 pressure 2 regular_ll \n", + "4 500 pressure 1 regular_ll \n", + "5 500 pressure 2 regular_ll \n", + "\n", + " metadata.dataType \n", + "0 cf \n", + "1 cf \n", + "2 pf \n", + "3 pf \n", + "4 pf \n", + "5 pf " ] }, "execution_count": 2, @@ -165,8 +201,16 @@ } ], "source": [ - "ds_fl = ekd.from_source(\"sample\", \"pl.grib\").to_fieldlist()\n", - "ds_fl.ls().head()" + "ds_fl = ekd.from_source(\"sample\", \"ens_cf_pf.grib\").to_fieldlist()\n", + "ds_fl.ls(extra_keys=[\"metadata.dataType\"])" + ] + }, + { + "cell_type": "markdown", + "id": "db15e80f-4beb-441d-b334-9fc1a300d1af", + "metadata": {}, + "source": [ + "Using the Xarray engine keyword `aux_coords` one can declare an auxiliary coordinate `\"forecast_type\"` whose values are derived from the GRIB metadata key `\"dataType\"`and depend on a single dimension `\"member\"`." ] }, { @@ -720,122 +764,74 @@ " filter: drop-shadow(1px 1px 5px var(--xr-font-color2));\n", " stroke-width: 0.8px;\n", "}\n", - "
<xarray.Dataset> Size: 176kB\n",
-       "Dimensions:                  (forecast_reference_time: 4, step: 2, level: 2,\n",
-       "                              latitude: 19, longitude: 36)\n",
+       "
<xarray.Dataset> Size: 33kB\n",
+       "Dimensions:        (member: 3, step: 2, latitude: 19, longitude: 36)\n",
        "Coordinates:\n",
-       "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 32B 202...\n",
-       "    expver                   (forecast_reference_time) <U4 64B '0001' ... '0001'\n",
-       "  * step                     (step) timedelta64[ns] 16B 00:00:00 06:00:00\n",
-       "  * level                    (level) int64 16B 500 700\n",
-       "  * latitude                 (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n",
-       "  * longitude                (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n",
+       "  * member         (member) <U1 12B '0' '1' '2'\n",
+       "    forecast_type  (member) <U2 24B 'cf' 'pf' 'pf'\n",
+       "  * step           (step) timedelta64[ns] 16B 00:00:00 06:00:00\n",
+       "  * latitude       (latitude) float64 152B 90.0 80.0 70.0 ... -70.0 -80.0 -90.0\n",
+       "  * longitude      (longitude) float64 288B 0.0 10.0 20.0 ... 330.0 340.0 350.0\n",
        "Data variables:\n",
-       "    r                        (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n",
-       "    t                        (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n",
+       "    t              (member, step, latitude, longitude) float64 33kB 250.2 ......\n",
        "Attributes:\n",
        "    Conventions:  CF-1.8\n",
-       "    institution:  ECMWF
  • Conventions :
    CF-1.8
    institution :
    ECMWF
  • " ], "text/plain": [ - " Size: 176kB\n", - "Dimensions: (forecast_reference_time: 4, step: 2, level: 2,\n", - " latitude: 19, longitude: 36)\n", + " Size: 33kB\n", + "Dimensions: (member: 3, step: 2, latitude: 19, longitude: 36)\n", "Coordinates:\n", - " * forecast_reference_time (forecast_reference_time) datetime64[ns] 32B 202...\n", - " expver (forecast_reference_time) \n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
    <xarray.Dataset> Size: 176kB\n",
    -       "Dimensions:                  (forecast_reference_time: 4, step: 2, level: 2,\n",
    -       "                              latitude: 19, longitude: 36)\n",
    -       "Coordinates:\n",
    -       "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 32B 202...\n",
    -       "  * step                     (step) timedelta64[ns] 16B 00:00:00 06:00:00\n",
    -       "    centre_and_expver        (forecast_reference_time, step) <U9 288B 'ecmf_0...\n",
    -       "  * level                    (level) int64 16B 500 700\n",
    -       "  * latitude                 (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n",
    -       "  * longitude                (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n",
    -       "Data variables:\n",
    -       "    r                        (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n",
    -       "    t                        (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n",
    -       "Attributes:\n",
    -       "    Conventions:  CF-1.8\n",
    -       "    institution:  ECMWF
    " - ], - "text/plain": [ - " Size: 176kB\n", - "Dimensions: (forecast_reference_time: 4, step: 2, level: 2,\n", - " latitude: 19, longitude: 36)\n", - "Coordinates:\n", - " * forecast_reference_time (forecast_reference_time) datetime64[ns] 32B 202...\n", - " * step (step) timedelta64[ns] 16B 00:00:00 06:00:00\n", - " centre_and_expver (forecast_reference_time, step) \n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
    <xarray.Dataset> Size: 176kB\n",
    -       "Dimensions:                  (variable: 2, forecast_reference_time: 4, step: 2,\n",
    -       "                              level: 2, latitude: 19, longitude: 36)\n",
    -       "Coordinates:\n",
    -       "  * variable                 (variable) <U1 8B 'r' 't'\n",
    -       "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 32B 202...\n",
    -       "  * step                     (step) timedelta64[ns] 16B 00:00:00 06:00:00\n",
    -       "    centre_and_expver        (forecast_reference_time, step) <U9 288B 'ecmf_0...\n",
    -       "  * level                    (level) int64 16B 500 700\n",
    -       "  * latitude                 (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n",
    -       "  * longitude                (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n",
    -       "Data variables:\n",
    -       "    data                     (variable, forecast_reference_time, step, level, latitude, longitude) float64 175kB ...\n",
    -       "Attributes:\n",
    -       "    Conventions:  CF-1.8\n",
    -       "    institution:  ECMWF
    " - ], - "text/plain": [ - " Size: 176kB\n", - "Dimensions: (variable: 2, forecast_reference_time: 4, step: 2,\n", - " level: 2, latitude: 19, longitude: 36)\n", - "Coordinates:\n", - " * variable (variable) \n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
    metadata.shortNamemetadata.dataDatemetadata.dataTimemetadata.stepRangemetadata.dataTypemetadata.quantilemetadata.numbermetadata.numberOfForecastsInEnsemble
    02tp2025120900-168pd1:313
    12tp2025120900-168pd1:515
    22tp2025120900-168pd1:10110
    32tp2025120900-168pd2:323
    42tp2025120900-168pd2:525
    52tp2025120900-168pd2:10210
    62tp2025120900-168pd3:333
    72tp2025120900-168pd3:535
    82tp2025120900-168pd3:10310
    92tp2025120900-168pd4:545
    102tp2025120900-168pd4:10410
    112tp2025120900-168pd5:555
    122tp2025120900-168pd5:10510
    132tp2025120900-168pd6:10610
    142tp2025120900-168pd7:10710
    152tp2025120900-168pd8:10810
    162tp2025120900-168pd9:10910
    172tp2025120900-168pd10:101010
    \n", - "" - ], - "text/plain": [ - " metadata.shortName metadata.dataDate metadata.dataTime \\\n", - "0 2tp 20251209 0 \n", - "1 2tp 20251209 0 \n", - "2 2tp 20251209 0 \n", - "3 2tp 20251209 0 \n", - "4 2tp 20251209 0 \n", - "5 2tp 20251209 0 \n", - "6 2tp 20251209 0 \n", - "7 2tp 20251209 0 \n", - "8 2tp 20251209 0 \n", - "9 2tp 20251209 0 \n", - "10 2tp 20251209 0 \n", - "11 2tp 20251209 0 \n", - "12 2tp 20251209 0 \n", - "13 2tp 20251209 0 \n", - "14 2tp 20251209 0 \n", - "15 2tp 20251209 0 \n", - "16 2tp 20251209 0 \n", - "17 2tp 20251209 0 \n", - "\n", - " metadata.stepRange metadata.dataType metadata.quantile metadata.number \\\n", - "0 0-168 pd 1:3 1 \n", - "1 0-168 pd 1:5 1 \n", - "2 0-168 pd 1:10 1 \n", - "3 0-168 pd 2:3 2 \n", - "4 0-168 pd 2:5 2 \n", - "5 0-168 pd 2:10 2 \n", - "6 0-168 pd 3:3 3 \n", - "7 0-168 pd 3:5 3 \n", - "8 0-168 pd 3:10 3 \n", - "9 0-168 pd 4:5 4 \n", - "10 0-168 pd 4:10 4 \n", - "11 0-168 pd 5:5 5 \n", - "12 0-168 pd 5:10 5 \n", - "13 0-168 pd 6:10 6 \n", - "14 0-168 pd 7:10 7 \n", - "15 0-168 pd 8:10 8 \n", - "16 0-168 pd 9:10 9 \n", - "17 0-168 pd 10:10 10 \n", - "\n", - " metadata.numberOfForecastsInEnsemble \n", - "0 3 \n", - "1 5 \n", - "2 10 \n", - "3 3 \n", - "4 5 \n", - "5 10 \n", - "6 3 \n", - "7 5 \n", - "8 10 \n", - "9 5 \n", - "10 10 \n", - "11 5 \n", - "12 10 \n", - "13 10 \n", - "14 10 \n", - "15 10 \n", - "16 10 \n", - "17 10 " - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ds_fl2.ls(\n", - " keys=[\n", - " \"metadata.shortName\",\n", - " \"metadata.dataDate\",\n", - " \"metadata.dataTime\",\n", - " \"metadata.stepRange\",\n", - " \"metadata.dataType\",\n", - " \"metadata.quantile\",\n", - " \"metadata.number\",\n", - " \"metadata.numberOfForecastsInEnsemble\",\n", - " ]\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "3ea48ddf-3fc0-455b-b381-e3c8b2a3debe", - "metadata": {}, - "source": [ - "Note that, in this context, the usual meaning of the metadata key ``\"number\"`` (and the related ``\"numberOfForecastsInEnsemble\"``) is overridden by ``\"quantile\"``. As a result, the ensemble dimension normally derived from ``\"number\"`` is no longer applicable.\n", - "\n", - "For this reason, we must:\n", - "- declare ``\"quantile\"`` as an extra dimension, and\n", - "- remove the predefined ensemble dimension ``\"number\"``, since it would otherwise conflict with the ``\"quantile\"`` dimension.\n", - "\n", - "Still, it might be useful to keep the information carried by ``\"number\"`` and ``\"numberOfForecastsInEnsemble\"`` is auxiliary coordinates." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "cd65d5ce-b511-4c12-88f7-f64f5b0c18e7", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
    \n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
    <xarray.Dataset> Size: 13kB\n",
    -       "Dimensions:                  (quantile: 18, forecast_reference_time: 1,\n",
    -       "                              step: 1, level: 1, level_type: 1, latitude: 7,\n",
    -       "                              longitude: 12)\n",
    -       "Coordinates:\n",
    -       "  * quantile                 (quantile) <U5 360B '10:10' '1:10' ... '9:10'\n",
    -       "    quantile_rank            (quantile) <U2 144B '10' '1' '1' ... '7' '8' '9'\n",
    -       "    nquantiles               (quantile) int64 144B 10 10 3 5 10 ... 10 10 10 10\n",
    -       "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 8B 2025...\n",
    -       "  * step                     (step) timedelta64[ns] 8B 7 days\n",
    -       "  * level                    (level) int64 8B 0\n",
    -       "  * level_type               (level_type) <U7 28B 'surface'\n",
    -       "  * latitude                 (latitude) float64 56B 90.0 60.0 ... -60.0 -90.0\n",
    -       "  * longitude                (longitude) float64 96B 0.0 30.0 ... 300.0 330.0\n",
    -       "Data variables:\n",
    -       "    2tp                      (quantile, forecast_reference_time, step, level, level_type, latitude, longitude) float64 12kB ...\n",
    -       "Attributes:\n",
    -       "    Conventions:  CF-1.8\n",
    -       "    institution:  ECMWF
    " - ], - "text/plain": [ - " Size: 13kB\n", - "Dimensions: (quantile: 18, forecast_reference_time: 1,\n", - " step: 1, level: 1, level_type: 1, latitude: 7,\n", - " longitude: 12)\n", - "Coordinates:\n", - " * quantile (quantile) \n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
    parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
    0avg_2t2010-07-012010-06-0130 days2height_above_ground_level0regular_ll
    1avg_2t2010-08-012010-07-0131 days2height_above_ground_level0regular_ll
    \n", - "" - ], - "text/plain": [ - " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", - "0 avg_2t 2010-07-01 2010-06-01 30 days \n", - "1 avg_2t 2010-08-01 2010-07-01 31 days \n", - "\n", - " vertical.level vertical.level_type ensemble.member \\\n", - "0 2 height_above_ground_level 0 \n", - "1 2 height_above_ground_level 0 \n", - "\n", - " geography.grid_type \n", - "0 regular_ll \n", - "1 regular_ll " - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ds_fl3 = ekd.from_source(\"file\", \"/Users/ecm8620/data/issue-948-avg_2t-2months.grib2\").to_fieldlist()\n", - "ds_fl3.ls()" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "fdcac0e0-f501-4cd5-bf2b-59c9d760d8e7", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
    \n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
    <xarray.Dataset> Size: 22kB\n",
    -       "Dimensions:                  (forecast_reference_time: 2, latitude: 33,\n",
    -       "                              longitude: 41)\n",
    -       "Coordinates:\n",
    -       "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 16B 201...\n",
    -       "    step                     (forecast_reference_time) timedelta64[ns] 16B 30...\n",
    -       "  * latitude                 (latitude) float64 264B 55.0 54.75 ... 47.25 47.0\n",
    -       "  * longitude                (longitude) float64 328B 5.0 5.25 ... 14.75 15.0\n",
    -       "Data variables:\n",
    -       "    avg_2t                   (forecast_reference_time, latitude, longitude) float64 22kB ...\n",
    -       "Attributes:\n",
    -       "    Conventions:  CF-1.8\n",
    -       "    institution:  ECMWF
    " + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    metadata.shortNamemetadata.dataDatemetadata.dataTimemetadata.stepRangemetadata.dataTypemetadata.quantilemetadata.numbermetadata.numberOfForecastsInEnsemble
    02tp2025120900-168pd1:313
    12tp2025120900-168pd1:515
    22tp2025120900-168pd1:10110
    32tp2025120900-168pd2:323
    42tp2025120900-168pd2:525
    52tp2025120900-168pd2:10210
    62tp2025120900-168pd3:333
    72tp2025120900-168pd3:535
    82tp2025120900-168pd3:10310
    92tp2025120900-168pd4:545
    102tp2025120900-168pd4:10410
    112tp2025120900-168pd5:555
    122tp2025120900-168pd5:10510
    132tp2025120900-168pd6:10610
    142tp2025120900-168pd7:10710
    152tp2025120900-168pd8:10810
    162tp2025120900-168pd9:10910
    172tp2025120900-168pd10:101010
    \n", + "" ], "text/plain": [ - " Size: 22kB\n", - "Dimensions: (forecast_reference_time: 2, latitude: 33,\n", - " longitude: 41)\n", - "Coordinates:\n", - " * forecast_reference_time (forecast_reference_time) datetime64[ns] 16B 201...\n", - " step (forecast_reference_time) timedelta64[ns] 16B 30...\n", - " * latitude (latitude) float64 264B 55.0 54.75 ... 47.25 47.0\n", - " * longitude (longitude) float64 328B 5.0 5.25 ... 14.75 15.0\n", - "Data variables:\n", - " avg_2t (forecast_reference_time, latitude, longitude) float64 22kB ...\n", - "Attributes:\n", - " Conventions: CF-1.8\n", - " institution: ECMWF" + " metadata.shortName metadata.dataDate metadata.dataTime \\\n", + "0 2tp 20251209 0 \n", + "1 2tp 20251209 0 \n", + "2 2tp 20251209 0 \n", + "3 2tp 20251209 0 \n", + "4 2tp 20251209 0 \n", + "5 2tp 20251209 0 \n", + "6 2tp 20251209 0 \n", + "7 2tp 20251209 0 \n", + "8 2tp 20251209 0 \n", + "9 2tp 20251209 0 \n", + "10 2tp 20251209 0 \n", + "11 2tp 20251209 0 \n", + "12 2tp 20251209 0 \n", + "13 2tp 20251209 0 \n", + "14 2tp 20251209 0 \n", + "15 2tp 20251209 0 \n", + "16 2tp 20251209 0 \n", + "17 2tp 20251209 0 \n", + "\n", + " metadata.stepRange metadata.dataType metadata.quantile metadata.number \\\n", + "0 0-168 pd 1:3 1 \n", + "1 0-168 pd 1:5 1 \n", + "2 0-168 pd 1:10 1 \n", + "3 0-168 pd 2:3 2 \n", + "4 0-168 pd 2:5 2 \n", + "5 0-168 pd 2:10 2 \n", + "6 0-168 pd 3:3 3 \n", + "7 0-168 pd 3:5 3 \n", + "8 0-168 pd 3:10 3 \n", + "9 0-168 pd 4:5 4 \n", + "10 0-168 pd 4:10 4 \n", + "11 0-168 pd 5:5 5 \n", + "12 0-168 pd 5:10 5 \n", + "13 0-168 pd 6:10 6 \n", + "14 0-168 pd 7:10 7 \n", + "15 0-168 pd 8:10 8 \n", + "16 0-168 pd 9:10 9 \n", + "17 0-168 pd 10:10 10 \n", + "\n", + " metadata.numberOfForecastsInEnsemble \n", + "0 3 \n", + "1 5 \n", + "2 10 \n", + "3 3 \n", + "4 5 \n", + "5 10 \n", + "6 3 \n", + "7 5 \n", + "8 10 \n", + "9 5 \n", + "10 10 \n", + "11 5 \n", + "12 10 \n", + "13 10 \n", + "14 10 \n", + "15 10 \n", + "16 10 \n", + "17 10 " ] }, - "execution_count": 10, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "ds5 = ds_fl3.to_xarray(\n", - " drop_dims=\"step\",\n", - " aux_coords={\"step\": (\"time.step\", (\"forecast_reference_time\",))},\n", - ")\n", - "ds5.load()" + "ds_fl2.ls(\n", + " keys=[\n", + " \"metadata.shortName\",\n", + " \"metadata.dataDate\",\n", + " \"metadata.dataTime\",\n", + " \"metadata.stepRange\",\n", + " \"metadata.dataType\",\n", + " \"metadata.quantile\",\n", + " \"metadata.number\",\n", + " \"metadata.numberOfForecastsInEnsemble\",\n", + " ]\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "3ea48ddf-3fc0-455b-b381-e3c8b2a3debe", + "metadata": {}, + "source": [ + "Note that, in this context, the usual meaning of the GRIB metadata key ``\"number\"`` (and the related ``\"numberOfForecastsInEnsemble\"``) is overridden by ``\"quantile\"``. As a result, the ensemble dimension normally derived from ``\"number\"`` is no longer applicable.\n", + "\n", + "For this reason, we must:\n", + "- declare the GRIB metadata key ``\"quantile\"`` as an extra dimension, and\n", + "- remove the predefined ensemble dimension ``\"number\"``, since it would otherwise conflict with the ``\"quantile\"`` dimension.\n", + "\n", + "Still, it might be useful to keep the information carried by ``\"number\"`` and ``\"numberOfForecastsInEnsemble\"`` as auxiliary coordinates." ] }, { "cell_type": "code", - "execution_count": 30, - "id": "aebe01e5-0a1f-4871-82d8-9baa2fb4272d", + "execution_count": 6, + "id": "cd65d5ce-b511-4c12-88f7-f64f5b0c18e7", "metadata": {}, "outputs": [ { @@ -4512,88 +1778,101 @@ " filter: drop-shadow(1px 1px 5px var(--xr-font-color2));\n", " stroke-width: 0.8px;\n", "}\n", - "
    <xarray.Dataset> Size: 22kB\n",
    -       "Dimensions:     (s: 1, valid_time: 2, latitude: 33, longitude: 41)\n",
    +       "
    <xarray.Dataset> Size: 13kB\n",
    +       "Dimensions:        (quantile: 18, latitude: 7, longitude: 12)\n",
            "Coordinates:\n",
    -       "  * s           (s) <U4 16B '0001'\n",
    -       "  * valid_time  (valid_time) datetime64[ns] 16B 2010-07-01 2010-08-01\n",
    -       "  * latitude    (latitude) float64 264B 55.0 54.75 54.5 ... 47.5 47.25 47.0\n",
    -       "  * longitude   (longitude) float64 328B 5.0 5.25 5.5 5.75 ... 14.5 14.75 15.0\n",
    +       "  * quantile       (quantile) <U5 360B '10:10' '1:10' '1:3' ... '8:10' '9:10'\n",
    +       "    quantile_rank  (quantile) <U2 144B '10' '1' '1' '1' '2' ... '6' '7' '8' '9'\n",
    +       "    nquantiles     (quantile) int64 144B 10 10 3 5 10 3 5 ... 5 10 5 10 10 10 10\n",
    +       "  * latitude       (latitude) float64 56B 90.0 60.0 30.0 0.0 -30.0 -60.0 -90.0\n",
    +       "  * longitude      (longitude) float64 96B 0.0 30.0 60.0 ... 270.0 300.0 330.0\n",
            "Data variables:\n",
    -       "    avg_2t      (s, valid_time, latitude, longitude) float64 22kB 284.2 ... 2...\n",
    +       "    2tp            (quantile, latitude, longitude) float64 12kB 13.37 ... 0.0\n",
            "Attributes:\n",
            "    Conventions:  CF-1.8\n",
    -       "    institution:  ECMWF
    " + " institution: ECMWF
    " ], "text/plain": [ - " Size: 22kB\n", - "Dimensions: (s: 1, valid_time: 2, latitude: 33, longitude: 41)\n", + " Size: 13kB\n", + "Dimensions: (quantile: 18, latitude: 7, longitude: 12)\n", "Coordinates:\n", - " * s (s) Date: Thu, 23 Apr 2026 12:12:55 +0200 Subject: [PATCH 5/5] Docstrings on `aux_coord` improved --- src/earthkit/data/indexing/xarray.py | 2 +- src/earthkit/data/xr_engine/engine.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/earthkit/data/indexing/xarray.py b/src/earthkit/data/indexing/xarray.py index 85d0c07a8..32b46ea15 100644 --- a/src/earthkit/data/indexing/xarray.py +++ b/src/earthkit/data/indexing/xarray.py @@ -255,7 +255,7 @@ def to_xarray(self, engine="earthkit", xarray_open_dataset_kwargs=None, **kwargs overwrites it. * aux_coords: dict, None Mapping from an auxiliary coordinate label to a tuple: - (metadata key, the dataset dimension(s)). The default value is None. + ``(metadata_key: str, dataset_dimension(s): str or iterable of str)``. The default value is None. * add_geo_coords: bool, None Add geographic coordinates to the dataset when field values are represented by a single "values" dimension. Its default value (None) expands diff --git a/src/earthkit/data/xr_engine/engine.py b/src/earthkit/data/xr_engine/engine.py index b95f7f613..97d8e3974 100644 --- a/src/earthkit/data/xr_engine/engine.py +++ b/src/earthkit/data/xr_engine/engine.py @@ -259,7 +259,7 @@ def open_dataset( overwrites it. aux_coords: dict, None Mapping from an auxiliary coordinate label to a tuple: - (metadata key, the dataset dimension(s)). The default value is None. + ``(metadata_key: str, dataset_dimension(s): str or iterable of str)``. The default value is None. add_geo_coords: bool, None If True, add geographic coordinates to the dataset when field values are represented by a single "values" dimension. Its default value (None) expands