diff --git a/docs/examples/grib_to_file_target.ipynb b/docs/examples/grib_to_file_target.ipynb index c37ffa05f..97bcf1dad 100644 --- a/docs/examples/grib_to_file_target.ipynb +++ b/docs/examples/grib_to_file_target.ipynb @@ -442,9 +442,9 @@ ], "metadata": { "kernelspec": { - "display_name": "dev_ecc", + "display_name": "dev", "language": "python", - "name": "dev_ecc" + "name": "dev" }, "language_info": { "codemirror_mode": { @@ -456,7 +456,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.13" + "version": "3.11.12" } }, "nbformat": 4, diff --git a/docs/examples/index.rst b/docs/examples/index.rst index f9f5af4a9..c945dddd4 100644 --- a/docs/examples/index.rst +++ b/docs/examples/index.rst @@ -158,12 +158,15 @@ Xarray engine xarray_engine_overview.ipynb xarray_engine_temporal.ipynb + xarray_engine_step_ranges.ipynb + xarray_engine_seasonal.ipynb xarray_engine_level.ipynb + xarray_engine_ensemble.ipynb xarray_engine_variable_key.ipynb xarray_engine_field_dims.ipynb xarray_engine_to_grib.ipynb xarray_engine_split.ipynb - xarray_engine_seasonal.ipynb + xarray_engine_squeeze.ipynb xarray_engine_chunks.ipynb Targets and encoders diff --git a/docs/examples/list_of_dicts_to_xarray.ipynb b/docs/examples/list_of_dicts_to_xarray.ipynb index 955d60420..322f3097c 100644 --- a/docs/examples/list_of_dicts_to_xarray.ipynb +++ b/docs/examples/list_of_dicts_to_xarray.ipynb @@ -1,968 +1,968 @@ { - "cells": [ - { - "cell_type": "markdown", - "id": "ee0f0104-8077-45f1-9746-58f29b64db92", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" + "cells": [ + { + "cell_type": "markdown", + "id": "ee0f0104-8077-45f1-9746-58f29b64db92", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## list-of-dict: converting to Xarray" + ] + }, + { + "cell_type": "raw", + "id": "6cadbfbf-c7af-4927-8927-c320d9160c4f", + "metadata": { + "editable": true, + "raw_mimetype": "text/restructuredtext", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "This example demonstrates how :ref:`data-sources-lod` fieldlists can be converted into Xarray." + ] + }, + { + "cell_type": "markdown", + "id": "2e087423-8c96-49b4-984c-f15472fa8381", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "#### Data containing geography" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "1e5ebf7a-2fc6-453a-9e14-6b04b5135810", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 248B\n",
+                            "Dimensions:    (levelist: 2, latitude: 3, longitude: 2)\n",
+                            "Coordinates:\n",
+                            "  * levelist   (levelist) int64 16B 500 850\n",
+                            "  * latitude   (latitude) float64 24B 10.0 0.0 -10.0\n",
+                            "  * longitude  (longitude) float64 16B 20.0 40.0\n",
+                            "Data variables:\n",
+                            "    t          (levelist, latitude, longitude) float64 96B ...\n",
+                            "    u          (levelist, latitude, longitude) float64 96B ...\n",
+                            "Attributes:\n",
+                            "    Conventions:  CF-1.8\n",
+                            "    institution:  ECMWF
" + ], + "text/plain": [ + " Size: 248B\n", + "Dimensions: (levelist: 2, latitude: 3, longitude: 2)\n", + "Coordinates:\n", + " * levelist (levelist) int64 16B 500 850\n", + " * latitude (latitude) float64 24B 10.0 0.0 -10.0\n", + " * longitude (longitude) float64 16B 20.0 40.0\n", + "Data variables:\n", + " t (levelist, latitude, longitude) float64 96B ...\n", + " u (levelist, latitude, longitude) float64 96B ...\n", + "Attributes:\n", + " Conventions: CF-1.8\n", + " institution: ECMWF" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import earthkit.data as ekd\n", + "\n", + "prototype = {\n", + " \"latitudes\": [10.0, 0.0, -10.0],\n", + " \"longitudes\": [20, 40.0],\n", + " \"values\": [1, 2, 3, 4, 5, 6],\n", + " \"valid_datetime\": \"2018-08-01T09:00:00Z\",\n", + " }\n", + "\n", + "d = [\n", + " {\"param\": \"t\", \"level\": 500, **prototype},\n", + " {\"param\": \"t\", \"level\": 850, **prototype},\n", + " {\"param\": \"u\", \"level\": 500, **prototype},\n", + " {\"param\": \"u\", \"level\": 850, **prototype},\n", + " ]\n", + "\n", + "ds = ekd.from_source(\"list-of-dicts\", d)\n", + "ds.to_xarray()" + ] + }, + { + "cell_type": "markdown", + "id": "94b46ec8-614b-480a-8ffe-0b1dd4e344bb", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "#### Data without geography" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "7ea3d8bf-a432-4aef-94d9-5ac0c6b19503", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 208B\n",
+                            "Dimensions:   (levelist: 2, values: 6)\n",
+                            "Coordinates:\n",
+                            "  * levelist  (levelist) int64 16B 500 850\n",
+                            "Dimensions without coordinates: values\n",
+                            "Data variables:\n",
+                            "    t         (levelist, values) float64 96B ...\n",
+                            "    u         (levelist, values) float64 96B ...\n",
+                            "Attributes:\n",
+                            "    Conventions:  CF-1.8\n",
+                            "    institution:  ECMWF
" + ], + "text/plain": [ + " Size: 208B\n", + "Dimensions: (levelist: 2, values: 6)\n", + "Coordinates:\n", + " * levelist (levelist) int64 16B 500 850\n", + "Dimensions without coordinates: values\n", + "Data variables:\n", + " t (levelist, values) float64 96B ...\n", + " u (levelist, values) float64 96B ...\n", + "Attributes:\n", + " Conventions: CF-1.8\n", + " institution: ECMWF" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "prototype = {\n", + " \"values\": [1, 2, 3, 4, 5, 6],\n", + " \"valid_datetime\": \"2018-08-01T09:00:00Z\",\n", + " }\n", + "\n", + "d = [\n", + " {\"param\": \"t\", \"level\": 500, **prototype},\n", + " {\"param\": \"t\", \"level\": 850, **prototype},\n", + " {\"param\": \"u\", \"level\": 500, **prototype},\n", + " {\"param\": \"u\", \"level\": 850, **prototype},\n", + " ]\n", + "\n", + "ds = ekd.from_source(\"list-of-dicts\", d)\n", + "ds.to_xarray()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c6463409-7686-4d90-8cab-00a04b7119bb", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "dev", + "language": "python", + "name": "dev" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.12" + } }, - "tags": [] - }, - "source": [ - "## list-of-dict: converting to Xarray" - ] - }, - { - "cell_type": "raw", - "id": "6cadbfbf-c7af-4927-8927-c320d9160c4f", - "metadata": { - "editable": true, - "raw_mimetype": "text/x-rst", - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "source": [ - "This example demonstrates how :ref:`data-sources-lod` fieldlists can be converted into Xarray." - ] - }, - { - "cell_type": "markdown", - "id": "2e087423-8c96-49b4-984c-f15472fa8381", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "source": [ - "#### Data containing geography" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "1e5ebf7a-2fc6-453a-9e14-6b04b5135810", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.Dataset> Size: 248B\n",
-       "Dimensions:    (levelist: 2, latitude: 3, longitude: 2)\n",
-       "Coordinates:\n",
-       "  * levelist   (levelist) int64 16B 500 850\n",
-       "  * latitude   (latitude) float64 24B 10.0 0.0 -10.0\n",
-       "  * longitude  (longitude) float64 16B 20.0 40.0\n",
-       "Data variables:\n",
-       "    t          (levelist, latitude, longitude) float64 96B ...\n",
-       "    u          (levelist, latitude, longitude) float64 96B ...\n",
-       "Attributes:\n",
-       "    Conventions:  CF-1.8\n",
-       "    institution:  ECMWF
" - ], - "text/plain": [ - " Size: 248B\n", - "Dimensions: (levelist: 2, latitude: 3, longitude: 2)\n", - "Coordinates:\n", - " * levelist (levelist) int64 16B 500 850\n", - " * latitude (latitude) float64 24B 10.0 0.0 -10.0\n", - " * longitude (longitude) float64 16B 20.0 40.0\n", - "Data variables:\n", - " t (levelist, latitude, longitude) float64 96B ...\n", - " u (levelist, latitude, longitude) float64 96B ...\n", - "Attributes:\n", - " Conventions: CF-1.8\n", - " institution: ECMWF" - ] - }, - "execution_count": 1, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import earthkit.data as ekd\n", - "\n", - "prototype = {\n", - " \"latitudes\": [10.0, 0.0, -10.0],\n", - " \"longitudes\": [20, 40.0],\n", - " \"values\": [1, 2, 3, 4, 5, 6],\n", - " \"valid_datetime\": \"2018-08-01T09:00:00Z\",\n", - " }\n", - "\n", - "d = [\n", - " {\"param\": \"t\", \"level\": 500, **prototype},\n", - " {\"param\": \"t\", \"level\": 850, **prototype},\n", - " {\"param\": \"u\", \"level\": 500, **prototype},\n", - " {\"param\": \"u\", \"level\": 850, **prototype},\n", - " ]\n", - "\n", - "ds = ekd.from_source(\"list-of-dicts\", d)\n", - "ds.to_xarray()" - ] - }, - { - "cell_type": "markdown", - "id": "94b46ec8-614b-480a-8ffe-0b1dd4e344bb", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "source": [ - "#### Data without geography" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "7ea3d8bf-a432-4aef-94d9-5ac0c6b19503", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.Dataset> Size: 208B\n",
-       "Dimensions:   (levelist: 2, values: 6)\n",
-       "Coordinates:\n",
-       "  * levelist  (levelist) int64 16B 500 850\n",
-       "Dimensions without coordinates: values\n",
-       "Data variables:\n",
-       "    t         (levelist, values) float64 96B ...\n",
-       "    u         (levelist, values) float64 96B ...\n",
-       "Attributes:\n",
-       "    Conventions:  CF-1.8\n",
-       "    institution:  ECMWF
" - ], - "text/plain": [ - " Size: 208B\n", - "Dimensions: (levelist: 2, values: 6)\n", - "Coordinates:\n", - " * levelist (levelist) int64 16B 500 850\n", - "Dimensions without coordinates: values\n", - "Data variables:\n", - " t (levelist, values) float64 96B ...\n", - " u (levelist, values) float64 96B ...\n", - "Attributes:\n", - " Conventions: CF-1.8\n", - " institution: ECMWF" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "prototype = {\n", - " \"values\": [1, 2, 3, 4, 5, 6],\n", - " \"valid_datetime\": \"2018-08-01T09:00:00Z\",\n", - " }\n", - "\n", - "d = [\n", - " {\"param\": \"t\", \"level\": 500, **prototype},\n", - " {\"param\": \"t\", \"level\": 850, **prototype},\n", - " {\"param\": \"u\", \"level\": 500, **prototype},\n", - " {\"param\": \"u\", \"level\": 850, **prototype},\n", - " ]\n", - "\n", - "ds = ekd.from_source(\"list-of-dicts\", d)\n", - "ds.to_xarray()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c6463409-7686-4d90-8cab-00a04b7119bb", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "dev", - "language": "python", - "name": "dev" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.12" - } - }, - "nbformat": 4, - "nbformat_minor": 5 + "nbformat": 4, + "nbformat_minor": 5 } diff --git a/docs/examples/xarray_engine_chunks.ipynb b/docs/examples/xarray_engine_chunks.ipynb index 39eb8f6f9..37b9993f4 100644 --- a/docs/examples/xarray_engine_chunks.ipynb +++ b/docs/examples/xarray_engine_chunks.ipynb @@ -19,7 +19,7 @@ "id": "b42eccf8-abcc-44a1-8406-f8aa966b1bf5", "metadata": { "editable": true, - "raw_mimetype": "text/x-rst", + "raw_mimetype": "text/restructuredtext", "slideshow": { "slide_type": "" }, @@ -58,7 +58,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "98299fdfafa74aa5b8cbc0f95188b8d5", + "model_id": "5bd1390ded1949169dbeeaf1f017ad75", "version_major": 2, "version_minor": 0 }, @@ -494,7 +494,7 @@ "Attributes:\n", " standard_name: air_temperature\n", " long_name: 2 metre temperature\n", - " units: K" + " dtype='float64', name='longitude'))
  • standard_name :
    air_temperature
    long_name :
    2 metre temperature
    units :
    K
  • " ], "text/plain": [ " Size: 2MB\n", @@ -667,7 +667,7 @@ "tags": [] }, "source": [ - "We compute the mean along the temporal dimension. Xarray will load data in chunks for this computation keeping the memory usage low." + "Finally, we compute the mean along the temporal dimension. Xarray will load data in chunks for this computation keeping the memory usage low." ] }, { @@ -1099,7 +1099,7 @@ " 227.70048102, 227.70048102, 227.70048102, 227.70048102]])\n", "Coordinates:\n", " * latitude (latitude) float64 104B 90.0 75.0 60.0 45.0 ... -60.0 -75.0 -90.0\n", - " * longitude (longitude) float64 192B 0.0 15.0 30.0 45.0 ... 315.0 330.0 345.0
  • " ], "text/plain": [ " Size: 2kB\n", diff --git a/docs/examples/xarray_engine_ensemble.ipynb b/docs/examples/xarray_engine_ensemble.ipynb new file mode 100644 index 000000000..c2c296da9 --- /dev/null +++ b/docs/examples/xarray_engine_ensemble.ipynb @@ -0,0 +1,1181 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "9ea9a922-c03f-43c1-aa83-b4ce321b75f5", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Xarray engine: ensemble data" + ] + }, + { + "cell_type": "markdown", + "id": "ef0e9584-da7b-4461-804e-5785e494485e", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "Get input GRIB ensemble forecast data." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "ecc9eac2-21fa-47a1-b1a4-37a26c980800", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "fe83d176cacd410da62e44b2ac16e0b2", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "ens_cf_pf.grib: 0%| | 0.00/7.03k [00:00\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    centreshortNametypeOfLevelleveldataDatedataTimestepRangedataTypenumbergridType
    0ecmftisobaricInhPa5002024060300cf0regular_ll
    1ecmftisobaricInhPa5002024060306cf0regular_ll
    2ecmftisobaricInhPa5002024060300pf1regular_ll
    3ecmftisobaricInhPa5002024060300pf2regular_ll
    4ecmftisobaricInhPa5002024060306pf1regular_ll
    5ecmftisobaricInhPa5002024060306pf2regular_ll
    \n", + "" + ], + "text/plain": [ + " centre shortName typeOfLevel level dataDate dataTime stepRange \\\n", + "0 ecmf t isobaricInhPa 500 20240603 0 0 \n", + "1 ecmf t isobaricInhPa 500 20240603 0 6 \n", + "2 ecmf t isobaricInhPa 500 20240603 0 0 \n", + "3 ecmf t isobaricInhPa 500 20240603 0 0 \n", + "4 ecmf t isobaricInhPa 500 20240603 0 6 \n", + "5 ecmf t isobaricInhPa 500 20240603 0 6 \n", + "\n", + " dataType number gridType \n", + "0 cf 0 regular_ll \n", + "1 cf 0 regular_ll \n", + "2 pf 1 regular_ll \n", + "3 pf 2 regular_ll \n", + "4 pf 1 regular_ll \n", + "5 pf 2 regular_ll " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds_fl.ls()" + ] + }, + { + "cell_type": "raw", + "id": "48c853ab-abec-4475-818f-f64d8d3d01d8", + "metadata": { + "editable": true, + "raw_mimetype": "text/restructuredtext", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "When we convert GRIB data to Xarray with :py:meth:`~data.readers.grib.index.GribFieldList.to_xarray` the ensemble dimension is defined by the \"number\" :ref:`dimension role `. By default, this role is using the \"number\" metadata key." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "7ae667a2-8e6b-4c6d-9cfe-b6bde40b2971", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
    \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
    <xarray.Dataset> Size: 33kB\n",
    +       "Dimensions:    (number: 3, step: 2, latitude: 19, longitude: 36)\n",
    +       "Coordinates:\n",
    +       "  * number     (number) int64 24B 0 1 2\n",
    +       "  * step       (step) timedelta64[ns] 16B 00:00:00 06:00:00\n",
    +       "  * latitude   (latitude) float64 152B 90.0 80.0 70.0 60.0 ... -70.0 -80.0 -90.0\n",
    +       "  * longitude  (longitude) float64 288B 0.0 10.0 20.0 30.0 ... 330.0 340.0 350.0\n",
    +       "Data variables:\n",
    +       "    t          (number, step, latitude, longitude) float64 33kB ...\n",
    +       "Attributes: (12/13)\n",
    +       "    param:        t\n",
    +       "    paramId:      130\n",
    +       "    class:        od\n",
    +       "    stream:       enfo\n",
    +       "    levtype:      pl\n",
    +       "    type:         cf\n",
    +       "    ...           ...\n",
    +       "    date:         20240603\n",
    +       "    time:         0\n",
    +       "    domain:       g\n",
    +       "    levelist:     500\n",
    +       "    Conventions:  CF-1.8\n",
    +       "    institution:  ECMWF
    " + ], + "text/plain": [ + " Size: 33kB\n", + "Dimensions: (number: 3, step: 2, latitude: 19, longitude: 36)\n", + "Coordinates:\n", + " * number (number) int64 24B 0 1 2\n", + " * step (step) timedelta64[ns] 16B 00:00:00 06:00:00\n", + " * latitude (latitude) float64 152B 90.0 80.0 70.0 60.0 ... -70.0 -80.0 -90.0\n", + " * longitude (longitude) float64 288B 0.0 10.0 20.0 30.0 ... 330.0 340.0 350.0\n", + "Data variables:\n", + " t (number, step, latitude, longitude) float64 33kB ...\n", + "Attributes: (12/13)\n", + " param: t\n", + " paramId: 130\n", + " class: od\n", + " stream: enfo\n", + " levtype: pl\n", + " type: cf\n", + " ... ...\n", + " date: 20240603\n", + " time: 0\n", + " domain: g\n", + " levelist: 500\n", + " Conventions: CF-1.8\n", + " institution: ECMWF" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds_fl.to_xarray()" + ] + }, + { + "cell_type": "markdown", + "id": "6f2a0d29-10bf-45ad-abf5-e457fd6f820d", + "metadata": { + "editable": true, + "raw_mimetype": "text/restructuredtext", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "This default behaviour can be overridden by specifying custom ``dim_roles``. E.g. to get the ensemble member number from the \"perturbatioNumber\" key we can use:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "ba4a3746-3549-494b-8272-df0b03d6936c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
    \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
    <xarray.Dataset> Size: 33kB\n",
    +       "Dimensions:    (number: 3, step: 2, latitude: 19, longitude: 36)\n",
    +       "Coordinates:\n",
    +       "  * number     (number) int64 24B 0 1 2\n",
    +       "  * step       (step) timedelta64[ns] 16B 00:00:00 06:00:00\n",
    +       "  * latitude   (latitude) float64 152B 90.0 80.0 70.0 60.0 ... -70.0 -80.0 -90.0\n",
    +       "  * longitude  (longitude) float64 288B 0.0 10.0 20.0 30.0 ... 330.0 340.0 350.0\n",
    +       "Data variables:\n",
    +       "    t          (number, step, latitude, longitude) float64 33kB ...\n",
    +       "Attributes: (12/14)\n",
    +       "    param:        t\n",
    +       "    paramId:      130\n",
    +       "    class:        od\n",
    +       "    stream:       enfo\n",
    +       "    levtype:      pl\n",
    +       "    type:         cf\n",
    +       "    ...           ...\n",
    +       "    time:         0\n",
    +       "    domain:       g\n",
    +       "    number:       0\n",
    +       "    levelist:     500\n",
    +       "    Conventions:  CF-1.8\n",
    +       "    institution:  ECMWF
    " + ], + "text/plain": [ + " Size: 33kB\n", + "Dimensions: (number: 3, step: 2, latitude: 19, longitude: 36)\n", + "Coordinates:\n", + " * number (number) int64 24B 0 1 2\n", + " * step (step) timedelta64[ns] 16B 00:00:00 06:00:00\n", + " * latitude (latitude) float64 152B 90.0 80.0 70.0 60.0 ... -70.0 -80.0 -90.0\n", + " * longitude (longitude) float64 288B 0.0 10.0 20.0 30.0 ... 330.0 340.0 350.0\n", + "Data variables:\n", + " t (number, step, latitude, longitude) float64 33kB ...\n", + "Attributes: (12/14)\n", + " param: t\n", + " paramId: 130\n", + " class: od\n", + " stream: enfo\n", + " levtype: pl\n", + " type: cf\n", + " ... ...\n", + " time: 0\n", + " domain: g\n", + " number: 0\n", + " levelist: 500\n", + " Conventions: CF-1.8\n", + " institution: ECMWF" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds_fl.to_xarray(dim_roles={\"number\": \"perturbationNumber\"})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "596c4426-32d7-4766-a640-4436da604918", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "dev", + "language": "python", + "name": "dev" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/examples/xarray_engine_field_dims.ipynb b/docs/examples/xarray_engine_field_dims.ipynb index 082335403..36bb29093 100644 --- a/docs/examples/xarray_engine_field_dims.ipynb +++ b/docs/examples/xarray_engine_field_dims.ipynb @@ -75,12 +75,26 @@ "outputs": [ { "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "9d15f57ec3eb42f19a24d4fd6774fb79", + "version_major": 2, + "version_minor": 0 + }, "text/plain": [ - "(Frozen({'forecast_reference_time': 4, 'step': 2, 'levelist': 2, 'latitude': 19, 'longitude': 36}),\n", + "pl.grib: 0%| | 0.00/48.8k [00:00` or :ref:`\"grib\" ` we have the following mappings:\n", + "We can convert a GRIB fieldlist to Xarray with :py:meth:`~data.readers.grib.index.GribFieldList.to_xarray`. This notebook discusses the **level** options used by this method.\n", "\n", - "- level role: \"level\"\n", - "- level_type role: \"typeOfLevel\"\n", + "The level dimension is based on the ``dim_roles`` and ``level_dim_mode`` options. The ``dim_roles`` are a mapping between predefined dimension roles and metadata keys used to build the given dimensions. With regards to the levels the \"level\" and \"level_type\" roles are the ones we need to consider. When ``profile`` is :ref:`None ` or :ref:`\"grib\" ` we have the following mappings:\n", + " \n", + " - level role: \"level\"\n", + " - level_type role: \"typeOfLevel\"\n", "\n", "When ``profile`` is :ref:`\"mars\" ` the roles are defined as follows:\n", - "\n", - "- level role: \"levelist\"\n", - "- level_type role: \"levtype\"\n" + " \n", + " - level role: \"levelist\"\n", + " - level_type role: \"levtype\"" ] }, { @@ -56,14 +58,16 @@ "id": "a477dd3d-e98f-43f2-99f6-628c2bd8cd6f", "metadata": { "editable": true, - "raw_mimetype": "text/restructuredtext", + "raw_mimetype": "", "slideshow": { "slide_type": "" }, "tags": [] }, "source": [ - "When ``level_dim_mode=\"level\"`` the level role defines the level dimension. Since the default :ref:`profile ` is :ref:`\"mars\" ` in the example below the level dimension will be derived from the \"levelist\" key." + "When ``level_dim_mode=\"level\"`` the level role defines the level dimension. Since the default :ref:`profile ` is :ref:`\"mars\" ` in the example below the level dimension will be derived from the \"levelist\" key. \n", + "\n", + "By default, the dimensions related to dimension roles are named after the roles. So, although the level dimension was generated from the \"levelist\" GRIB key the dimension name is still \"level\". " ] }, { @@ -78,6 +82,20 @@ "tags": [] }, "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "59d6167a6800496ab715b55fd46c41e0", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "pl.grib: 0%| | 0.00/48.8k [00:00 span {\n", @@ -286,15 +311,15 @@ "}\n", "\n", ".xr-dim-list:before {\n", - " content: '(';\n", + " content: \"(\";\n", "}\n", "\n", ".xr-dim-list:after {\n", - " content: ')';\n", + " content: \")\";\n", "}\n", "\n", ".xr-dim-list li:not(:last-child):after {\n", - " content: ',';\n", + " content: \",\";\n", " padding-right: 5px;\n", "}\n", "\n", @@ -444,18 +469,18 @@ " stroke: currentColor;\n", " fill: currentColor;\n", "}\n", - "
    <xarray.Dataset>\n",
    -       "Dimensions:                  (forecast_reference_time: 4, step: 2, levelist: 2,\n",
    +       "
    <xarray.Dataset> Size: 176kB\n",
    +       "Dimensions:                  (forecast_reference_time: 4, step: 2, level: 2,\n",
            "                              latitude: 19, longitude: 36)\n",
            "Coordinates:\n",
    -       "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 2024-06...\n",
    -       "  * step                     (step) timedelta64[ns] 00:00:00 06:00:00\n",
    -       "  * levelist                 (levelist) int64 500 700\n",
    -       "  * latitude                 (latitude) float64 90.0 80.0 70.0 ... -80.0 -90.0\n",
    -       "  * longitude                (longitude) float64 0.0 10.0 20.0 ... 340.0 350.0\n",
    +       "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 32B 202...\n",
    +       "  * step                     (step) timedelta64[ns] 16B 00:00:00 06:00:00\n",
    +       "  * level                    (level) int64 16B 500 700\n",
    +       "  * latitude                 (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n",
    +       "  * longitude                (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n",
            "Data variables:\n",
    -       "    r                        (forecast_reference_time, step, levelist, latitude, longitude) float64 ...\n",
    -       "    t                        (forecast_reference_time, step, levelist, latitude, longitude) float64 ...\n",
    +       "    r                        (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n",
    +       "    t                        (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n",
            "Attributes:\n",
            "    class:        od\n",
            "    stream:       oper\n",
    @@ -467,34 +492,34 @@
            "    domain:       g\n",
            "    number:       0\n",
            "    Conventions:  CF-1.8\n",
    -       "    institution:  ECMWF
  • class :
    od
    stream :
    oper
    levtype :
    pl
    type :
    fc
    expver :
    0001
    date :
    20240603
    time :
    0
    domain :
    g
    number :
    0
    Conventions :
    CF-1.8
    institution :
    ECMWF
  • " ], "text/plain": [ - "\n", - "Dimensions: (forecast_reference_time: 4, step: 2, levelist: 2,\n", + " Size: 176kB\n", + "Dimensions: (forecast_reference_time: 4, step: 2, level: 2,\n", " latitude: 19, longitude: 36)\n", "Coordinates:\n", - " * forecast_reference_time (forecast_reference_time) datetime64[ns] 2024-06...\n", - " * step (step) timedelta64[ns] 00:00:00 06:00:00\n", - " * levelist (levelist) int64 500 700\n", - " * latitude (latitude) float64 90.0 80.0 70.0 ... -80.0 -90.0\n", - " * longitude (longitude) float64 0.0 10.0 20.0 ... 340.0 350.0\n", + " * forecast_reference_time (forecast_reference_time) datetime64[ns] 32B 202...\n", + " * step (step) timedelta64[ns] 16B 00:00:00 06:00:00\n", + " * level (level) int64 16B 500 700\n", + " * latitude (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n", + " * longitude (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n", "Data variables:\n", - " r (forecast_reference_time, step, levelist, latitude, longitude) float64 ...\n", - " t (forecast_reference_time, step, levelist, latitude, longitude) float64 ...\n", + " r (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n", + " t (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n", "Attributes:\n", " class: od\n", " stream: oper\n", @@ -581,13 +606,14 @@ " --xr-background-color-row-odd: var(--jp-layout-color2, #eeeeee);\n", "}\n", "\n", - "html[theme=dark],\n", - "body[data-theme=dark],\n", + "html[theme=\"dark\"],\n", + "html[data-theme=\"dark\"],\n", + "body[data-theme=\"dark\"],\n", "body.vscode-dark {\n", " --xr-font-color0: rgba(255, 255, 255, 1);\n", " --xr-font-color2: rgba(255, 255, 255, 0.54);\n", " --xr-font-color3: rgba(255, 255, 255, 0.38);\n", - " --xr-border-color: #1F1F1F;\n", + " --xr-border-color: #1f1f1f;\n", " --xr-disabled-color: #515151;\n", " --xr-background-color: #111111;\n", " --xr-background-color-row-even: #111111;\n", @@ -632,7 +658,7 @@ ".xr-sections {\n", " padding-left: 0 !important;\n", " display: grid;\n", - " grid-template-columns: 150px auto auto 1fr 20px 20px;\n", + " grid-template-columns: 150px auto auto 1fr 0 20px 0 20px;\n", "}\n", "\n", ".xr-section-item {\n", @@ -640,7 +666,9 @@ "}\n", "\n", ".xr-section-item input {\n", - " display: none;\n", + " display: inline-block;\n", + " opacity: 0;\n", + " height: 0;\n", "}\n", "\n", ".xr-section-item input + label {\n", @@ -652,6 +680,10 @@ " color: var(--xr-font-color2);\n", "}\n", "\n", + ".xr-section-item input:focus + label {\n", + " border: 2px solid var(--xr-font-color0);\n", + "}\n", + "\n", ".xr-section-item input:enabled + label:hover {\n", " color: var(--xr-font-color0);\n", "}\n", @@ -673,7 +705,7 @@ "\n", ".xr-section-summary-in + label:before {\n", " display: inline-block;\n", - " content: '►';\n", + " content: \"►\";\n", " font-size: 11px;\n", " width: 15px;\n", " text-align: center;\n", @@ -684,7 +716,7 @@ "}\n", "\n", ".xr-section-summary-in:checked + label:before {\n", - " content: '▼';\n", + " content: \"▼\";\n", "}\n", "\n", ".xr-section-summary-in:checked + label > span {\n", @@ -756,15 +788,15 @@ "}\n", "\n", ".xr-dim-list:before {\n", - " content: '(';\n", + " content: \"(\";\n", "}\n", "\n", ".xr-dim-list:after {\n", - " content: ')';\n", + " content: \")\";\n", "}\n", "\n", ".xr-dim-list li:not(:last-child):after {\n", - " content: ',';\n", + " content: \",\";\n", " padding-right: 5px;\n", "}\n", "\n", @@ -914,48 +946,48 @@ " stroke: currentColor;\n", " fill: currentColor;\n", "}\n", - "
    <xarray.Dataset>\n",
    +       "
    <xarray.Dataset> Size: 176kB\n",
            "Dimensions:                  (forecast_reference_time: 4, step: 2, level: 2,\n",
            "                              latitude: 19, longitude: 36)\n",
            "Coordinates:\n",
    -       "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 2024-06...\n",
    -       "  * step                     (step) timedelta64[ns] 00:00:00 06:00:00\n",
    -       "  * level                    (level) int64 500 700\n",
    -       "  * latitude                 (latitude) float64 90.0 80.0 70.0 ... -80.0 -90.0\n",
    -       "  * longitude                (longitude) float64 0.0 10.0 20.0 ... 340.0 350.0\n",
    +       "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 32B 202...\n",
    +       "  * step                     (step) timedelta64[ns] 16B 00:00:00 06:00:00\n",
    +       "  * level                    (level) int64 16B 500 700\n",
    +       "  * latitude                 (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n",
    +       "  * longitude                (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n",
            "Data variables:\n",
    -       "    r                        (forecast_reference_time, step, level, latitude, longitude) float64 ...\n",
    -       "    t                        (forecast_reference_time, step, level, latitude, longitude) float64 ...\n",
    +       "    r                        (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n",
    +       "    t                        (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n",
            "Attributes:\n",
            "    Conventions:  CF-1.8\n",
    -       "    institution:  ECMWF
  • Conventions :
    CF-1.8
    institution :
    ECMWF
  • " ], "text/plain": [ - "\n", + " Size: 176kB\n", "Dimensions: (forecast_reference_time: 4, step: 2, level: 2,\n", " latitude: 19, longitude: 36)\n", "Coordinates:\n", - " * forecast_reference_time (forecast_reference_time) datetime64[ns] 2024-06...\n", - " * step (step) timedelta64[ns] 00:00:00 06:00:00\n", - " * level (level) int64 500 700\n", - " * latitude (latitude) float64 90.0 80.0 70.0 ... -80.0 -90.0\n", - " * longitude (longitude) float64 0.0 10.0 20.0 ... 340.0 350.0\n", + " * forecast_reference_time (forecast_reference_time) datetime64[ns] 32B 202...\n", + " * step (step) timedelta64[ns] 16B 00:00:00 06:00:00\n", + " * level (level) int64 16B 500 700\n", + " * latitude (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n", + " * longitude (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n", "Data variables:\n", - " r (forecast_reference_time, step, level, latitude, longitude) float64 ...\n", - " t (forecast_reference_time, step, level, latitude, longitude) float64 ...\n", + " r (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n", + " t (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n", "Attributes:\n", " Conventions: CF-1.8\n", " institution: ECMWF" @@ -1005,6 +1037,20 @@ "tags": [] }, "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "56b17d5515a14844859d8269f559d026", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "mixed_pl_ml.grib: 0%| | 0.00/176k [00:00 span {\n", @@ -1213,15 +1266,15 @@ "}\n", "\n", ".xr-dim-list:before {\n", - " content: '(';\n", + " content: \"(\";\n", "}\n", "\n", ".xr-dim-list:after {\n", - " content: ')';\n", + " content: \")\";\n", "}\n", "\n", ".xr-dim-list li:not(:last-child):after {\n", - " content: ',';\n", + " content: \",\";\n", " padding-right: 5px;\n", "}\n", "\n", @@ -1371,18 +1424,18 @@ " stroke: currentColor;\n", " fill: currentColor;\n", "}\n", - "
    <xarray.Dataset>\n",
    +       "
    <xarray.Dataset> Size: 351kB\n",
            "Dimensions:                  (forecast_reference_time: 4, step: 2,\n",
            "                              level_and_type: 4, latitude: 19, longitude: 36)\n",
            "Coordinates:\n",
    -       "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 2024-06...\n",
    -       "  * step                     (step) timedelta64[ns] 00:00:00 06:00:00\n",
    -       "  * level_and_type           (level_and_type) <U5 '137ml' '500pl' '700pl' '90ml'\n",
    -       "  * latitude                 (latitude) float64 90.0 80.0 70.0 ... -80.0 -90.0\n",
    -       "  * longitude                (longitude) float64 0.0 10.0 20.0 ... 340.0 350.0\n",
    +       "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 32B 202...\n",
    +       "  * step                     (step) timedelta64[ns] 16B 00:00:00 06:00:00\n",
    +       "  * level_and_type           (level_and_type) <U5 80B '137ml' '500pl' ... '90ml'\n",
    +       "  * latitude                 (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n",
    +       "  * longitude                (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n",
            "Data variables:\n",
    -       "    t                        (forecast_reference_time, step, level_and_type, latitude, longitude) float64 ...\n",
    -       "    u                        (forecast_reference_time, step, level_and_type, latitude, longitude) float64 ...\n",
    +       "    t                        (forecast_reference_time, step, level_and_type, latitude, longitude) float64 175kB ...\n",
    +       "    u                        (forecast_reference_time, step, level_and_type, latitude, longitude) float64 175kB ...\n",
            "Attributes:\n",
            "    class:        od\n",
            "    stream:       oper\n",
    @@ -1394,34 +1447,34 @@
            "    domain:       g\n",
            "    levelist:     137\n",
            "    Conventions:  CF-1.8\n",
    -       "    institution:  ECMWF
  • class :
    od
    stream :
    oper
    levtype :
    ml
    type :
    fc
    expver :
    0001
    date :
    20240603
    time :
    0
    domain :
    g
    levelist :
    137
    Conventions :
    CF-1.8
    institution :
    ECMWF
  • " ], "text/plain": [ - "\n", + " Size: 351kB\n", "Dimensions: (forecast_reference_time: 4, step: 2,\n", " level_and_type: 4, latitude: 19, longitude: 36)\n", "Coordinates:\n", - " * forecast_reference_time (forecast_reference_time) datetime64[ns] 2024-06...\n", - " * step (step) timedelta64[ns] 00:00:00 06:00:00\n", - " * level_and_type (level_and_type) span {\n", @@ -1697,15 +1771,15 @@ "}\n", "\n", ".xr-dim-list:before {\n", - " content: '(';\n", + " content: \"(\";\n", "}\n", "\n", ".xr-dim-list:after {\n", - " content: ')';\n", + " content: \")\";\n", "}\n", "\n", ".xr-dim-list li:not(:last-child):after {\n", - " content: ',';\n", + " content: \",\";\n", " padding-right: 5px;\n", "}\n", "\n", @@ -1855,64 +1929,64 @@ " stroke: currentColor;\n", " fill: currentColor;\n", "}\n", - "
    <xarray.Dataset>\n",
    +       "
    <xarray.Dataset> Size: 1MB\n",
            "Dimensions:                  (number: 1, forecast_reference_time: 4, step: 2,\n",
            "                              surface: 1, latitude: 19, longitude: 36,\n",
            "                              isobaricInhPa: 6)\n",
            "Coordinates:\n",
    -       "  * number                   (number) int64 0\n",
    -       "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 2024-06...\n",
    -       "  * step                     (step) timedelta64[ns] 00:00:00 06:00:00\n",
    -       "  * surface                  (surface) int64 0\n",
    -       "  * isobaricInhPa            (isobaricInhPa) int64 300 400 500 700 850 1000\n",
    -       "  * latitude                 (latitude) float64 90.0 80.0 70.0 ... -80.0 -90.0\n",
    -       "  * longitude                (longitude) float64 0.0 10.0 20.0 ... 340.0 350.0\n",
    +       "  * number                   (number) int64 8B 0\n",
    +       "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 32B 202...\n",
    +       "  * step                     (step) timedelta64[ns] 16B 00:00:00 06:00:00\n",
    +       "  * surface                  (surface) int64 8B 0\n",
    +       "  * isobaricInhPa            (isobaricInhPa) int64 48B 300 400 500 700 850 1000\n",
    +       "  * latitude                 (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n",
    +       "  * longitude                (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n",
            "Data variables:\n",
    -       "    2t                       (number, forecast_reference_time, step, surface, latitude, longitude) float64 ...\n",
    -       "    msl                      (number, forecast_reference_time, step, surface, latitude, longitude) float64 ...\n",
    -       "    r                        (number, forecast_reference_time, step, isobaricInhPa, latitude, longitude) float64 ...\n",
    -       "    t                        (number, forecast_reference_time, step, isobaricInhPa, latitude, longitude) float64 ...\n",
    -       "    u                        (number, forecast_reference_time, step, isobaricInhPa, latitude, longitude) float64 ...\n",
    -       "    v                        (number, forecast_reference_time, step, isobaricInhPa, latitude, longitude) float64 ...\n",
    -       "    z                        (number, forecast_reference_time, step, isobaricInhPa, latitude, longitude) float64 ...\n",
    +       "    2t                       (number, forecast_reference_time, step, surface, latitude, longitude) float64 44kB ...\n",
    +       "    msl                      (number, forecast_reference_time, step, surface, latitude, longitude) float64 44kB ...\n",
    +       "    r                        (number, forecast_reference_time, step, isobaricInhPa, latitude, longitude) float64 263kB ...\n",
    +       "    t                        (number, forecast_reference_time, step, isobaricInhPa, latitude, longitude) float64 263kB ...\n",
    +       "    u                        (number, forecast_reference_time, step, isobaricInhPa, latitude, longitude) float64 263kB ...\n",
    +       "    v                        (number, forecast_reference_time, step, isobaricInhPa, latitude, longitude) float64 263kB ...\n",
    +       "    z                        (number, forecast_reference_time, step, isobaricInhPa, latitude, longitude) float64 263kB ...\n",
            "Attributes:\n",
            "    Conventions:  CF-1.8\n",
    -       "    institution:  ECMWF
  • Conventions :
    CF-1.8
    institution :
    ECMWF
  • " ], "text/plain": [ - "\n", + " Size: 1MB\n", "Dimensions: (number: 1, forecast_reference_time: 4, step: 2,\n", " surface: 1, latitude: 19, longitude: 36,\n", " isobaricInhPa: 6)\n", "Coordinates:\n", - " * number (number) int64 0\n", - " * forecast_reference_time (forecast_reference_time) datetime64[ns] 2024-06...\n", - " * step (step) timedelta64[ns] 00:00:00 06:00:00\n", - " * surface (surface) int64 0\n", - " * isobaricInhPa (isobaricInhPa) int64 300 400 500 700 850 1000\n", - " * latitude (latitude) float64 90.0 80.0 70.0 ... -80.0 -90.0\n", - " * longitude (longitude) float64 0.0 10.0 20.0 ... 340.0 350.0\n", + " * number (number) int64 8B 0\n", + " * forecast_reference_time (forecast_reference_time) datetime64[ns] 32B 202...\n", + " * step (step) timedelta64[ns] 16B 00:00:00 06:00:00\n", + " * surface (surface) int64 8B 0\n", + " * isobaricInhPa (isobaricInhPa) int64 48B 300 400 500 700 850 1000\n", + " * latitude (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n", + " * longitude (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n", "Data variables:\n", - " 2t (number, forecast_reference_time, step, surface, latitude, longitude) float64 ...\n", - " msl (number, forecast_reference_time, step, surface, latitude, longitude) float64 ...\n", - " r (number, forecast_reference_time, step, isobaricInhPa, latitude, longitude) float64 ...\n", - " t (number, forecast_reference_time, step, isobaricInhPa, latitude, longitude) float64 ...\n", - " u (number, forecast_reference_time, step, isobaricInhPa, latitude, longitude) float64 ...\n", - " v (number, forecast_reference_time, step, isobaricInhPa, latitude, longitude) float64 ...\n", - " z (number, forecast_reference_time, step, isobaricInhPa, latitude, longitude) float64 ...\n", + " 2t (number, forecast_reference_time, step, surface, latitude, longitude) float64 44kB ...\n", + " msl (number, forecast_reference_time, step, surface, latitude, longitude) float64 44kB ...\n", + " r (number, forecast_reference_time, step, isobaricInhPa, latitude, longitude) float64 263kB ...\n", + " t (number, forecast_reference_time, step, isobaricInhPa, latitude, longitude) float64 263kB ...\n", + " u (number, forecast_reference_time, step, isobaricInhPa, latitude, longitude) float64 263kB ...\n", + " v (number, forecast_reference_time, step, isobaricInhPa, latitude, longitude) float64 263kB ...\n", + " z (number, forecast_reference_time, step, isobaricInhPa, latitude, longitude) float64 263kB ...\n", "Attributes:\n", " Conventions: CF-1.8\n", " institution: ECMWF" @@ -1946,9 +2020,9 @@ ], "metadata": { "kernelspec": { - "display_name": "dev_ecc", + "display_name": "dev", "language": "python", - "name": "dev_ecc" + "name": "dev" }, "language_info": { "codemirror_mode": { @@ -1960,7 +2034,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.13" + "version": "3.11.12" } }, "nbformat": 4, diff --git a/docs/examples/xarray_engine_overview.ipynb b/docs/examples/xarray_engine_overview.ipynb index 0d1fe3615..0d256cdb5 100644 --- a/docs/examples/xarray_engine_overview.ipynb +++ b/docs/examples/xarray_engine_overview.ipynb @@ -57,7 +57,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "", + "model_id": "8b2a598b3f264e2aa75a0cddab1650d2", "version_major": 2, "version_minor": 0 }, @@ -115,13 +115,6 @@ "tags": [] }, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "From version 0.11.0 the default engine for to_xarray is 'earthkit'. Use engine=`cfgrib` to invoke the cfgrib engine.\n" - ] - }, { "data": { "text/html": [ @@ -155,14 +148,14 @@ " --xr-background-color-row-odd: var(--jp-layout-color2, #eeeeee);\n", "}\n", "\n", - "html[theme=dark],\n", - "html[data-theme=dark],\n", - "body[data-theme=dark],\n", + "html[theme=\"dark\"],\n", + "html[data-theme=\"dark\"],\n", + "body[data-theme=\"dark\"],\n", "body.vscode-dark {\n", " --xr-font-color0: rgba(255, 255, 255, 1);\n", " --xr-font-color2: rgba(255, 255, 255, 0.54);\n", " --xr-font-color3: rgba(255, 255, 255, 0.38);\n", - " --xr-border-color: #1F1F1F;\n", + " --xr-border-color: #1f1f1f;\n", " --xr-disabled-color: #515151;\n", " --xr-background-color: #111111;\n", " --xr-background-color-row-even: #111111;\n", @@ -217,6 +210,7 @@ ".xr-section-item input {\n", " display: inline-block;\n", " opacity: 0;\n", + " height: 0;\n", "}\n", "\n", ".xr-section-item input + label {\n", @@ -253,7 +247,7 @@ "\n", ".xr-section-summary-in + label:before {\n", " display: inline-block;\n", - " content: '►';\n", + " content: \"►\";\n", " font-size: 11px;\n", " width: 15px;\n", " text-align: center;\n", @@ -264,7 +258,7 @@ "}\n", "\n", ".xr-section-summary-in:checked + label:before {\n", - " content: '▼';\n", + " content: \"▼\";\n", "}\n", "\n", ".xr-section-summary-in:checked + label > span {\n", @@ -336,15 +330,15 @@ "}\n", "\n", ".xr-dim-list:before {\n", - " content: '(';\n", + " content: \"(\";\n", "}\n", "\n", ".xr-dim-list:after {\n", - " content: ')';\n", + " content: \")\";\n", "}\n", "\n", ".xr-dim-list li:not(:last-child):after {\n", - " content: ',';\n", + " content: \",\";\n", " padding-right: 5px;\n", "}\n", "\n", @@ -495,17 +489,17 @@ " fill: currentColor;\n", "}\n", "
    <xarray.Dataset> Size: 176kB\n",
    -       "Dimensions:                  (forecast_reference_time: 4, step: 2, levelist: 2,\n",
    +       "Dimensions:                  (forecast_reference_time: 4, step: 2, level: 2,\n",
            "                              latitude: 19, longitude: 36)\n",
            "Coordinates:\n",
            "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 32B 202...\n",
            "  * step                     (step) timedelta64[ns] 16B 00:00:00 06:00:00\n",
    -       "  * levelist                 (levelist) int64 16B 500 700\n",
    +       "  * level                    (level) int64 16B 500 700\n",
            "  * latitude                 (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n",
            "  * longitude                (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n",
            "Data variables:\n",
    -       "    r                        (forecast_reference_time, step, levelist, latitude, longitude) float64 88kB ...\n",
    -       "    t                        (forecast_reference_time, step, levelist, latitude, longitude) float64 88kB ...\n",
    +       "    r                        (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n",
    +       "    t                        (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n",
            "Attributes:\n",
            "    class:        od\n",
            "    stream:       oper\n",
    @@ -517,34 +511,34 @@
            "    domain:       g\n",
            "    number:       0\n",
            "    Conventions:  CF-1.8\n",
    -       "    institution:  ECMWF
  • class :
    od
    stream :
    oper
    levtype :
    pl
    type :
    fc
    expver :
    0001
    date :
    20240603
    time :
    0
    domain :
    g
    number :
    0
    Conventions :
    CF-1.8
    institution :
    ECMWF
  • " ], "text/plain": [ " Size: 176kB\n", - "Dimensions: (forecast_reference_time: 4, step: 2, levelist: 2,\n", + "Dimensions: (forecast_reference_time: 4, step: 2, level: 2,\n", " latitude: 19, longitude: 36)\n", "Coordinates:\n", " * forecast_reference_time (forecast_reference_time) datetime64[ns] 32B 202...\n", " * step (step) timedelta64[ns] 16B 00:00:00 06:00:00\n", - " * levelist (levelist) int64 16B 500 700\n", + " * level (level) int64 16B 500 700\n", " * latitude (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n", " * longitude (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n", "Data variables:\n", - " r (forecast_reference_time, step, levelist, latitude, longitude) float64 88kB ...\n", - " t (forecast_reference_time, step, levelist, latitude, longitude) float64 88kB ...\n", + " r (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n", + " t (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n", "Attributes:\n", " class: od\n", " stream: oper\n", @@ -820,7 +814,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 8, "id": "5ad32a3e-2f48-49f5-b207-15e89c397fba", "metadata": { "editable": true, @@ -833,17 +827,18 @@ { "data": { "text/plain": [ - "255.25649845948692" + "(254.25649845948692, 255.25649845948692)" ] }, - "execution_count": 5, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "ds_fl.sel(param=\"t\", step=6, level=500)[0].values.mean(), \n", - "ds_fl1.sel(param=\"t\", step=6, level=500)[0].values.mean()" + "m_0 = ds_fl.sel(param=\"t\", step=6, level=500)[0].values.mean() \n", + "m_1 = ds_fl1.sel(param=\"t\", step=6, level=500)[0].values.mean()\n", + "m_0, m_1" ] }, { @@ -1189,9 +1184,9 @@ ], "metadata": { "kernelspec": { - "display_name": "dev_ecc", + "display_name": "dev", "language": "python", - "name": "dev_ecc" + "name": "dev" }, "language_info": { "codemirror_mode": { @@ -1203,7 +1198,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.13" + "version": "3.11.12" } }, "nbformat": 4, diff --git a/docs/examples/xarray_engine_seasonal.ipynb b/docs/examples/xarray_engine_seasonal.ipynb index 2c8c55528..52a0a0b1d 100644 --- a/docs/examples/xarray_engine_seasonal.ipynb +++ b/docs/examples/xarray_engine_seasonal.ipynb @@ -1,700 +1,1204 @@ { - "cells": [ - { - "cell_type": "markdown", - "id": "55f1f7bf-9589-4a43-b246-7c4c7880fa2d", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" + "cells": [ + { + "cell_type": "markdown", + "id": "55f1f7bf-9589-4a43-b246-7c4c7880fa2d", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Xarray engine: seasonal forecast" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "7e0be52c-bedb-4ae7-984c-4807bf253d7f", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "653a95e071ca4633aadbe42f597676a9", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "seasonal_monthly.grib: 0%| | 0.00/160k [00:00\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    centreshortNametypeOfLevelleveldataDatedataTimestepRangedataTypenumbergridTypeforecastMonth
    0lfpw2tsurface0199310010744fcmean0regular_ll1
    1lfpw2tsurface0199310010744fcmean1regular_ll1
    2lfpw2tsurface0199310010744fcmean2regular_ll1
    3lfpw2tsurface01993100101464fcmean0regular_ll2
    \n", + "" + ], + "text/plain": [ + " centre shortName typeOfLevel level dataDate dataTime stepRange dataType \\\n", + "0 lfpw 2t surface 0 19931001 0 744 fcmean \n", + "1 lfpw 2t surface 0 19931001 0 744 fcmean \n", + "2 lfpw 2t surface 0 19931001 0 744 fcmean \n", + "3 lfpw 2t surface 0 19931001 0 1464 fcmean \n", + "\n", + " number gridType forecastMonth \n", + "0 0 regular_ll 1 \n", + "1 1 regular_ll 1 \n", + "2 2 regular_ll 1 \n", + "3 0 regular_ll 2 " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds_fl[0:4].ls(extra_keys=\"forecastMonth\")" + ] + }, + { + "cell_type": "raw", + "id": "665fba14-79d5-4344-84fb-2e16da77936d", + "metadata": { + "editable": true, + "raw_mimetype": "text/restructuredtext", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "In order to use ``forecastMonth`` instead of ``step`` we need to use the ``dim_roles`` option in :py:meth:`~data.readers.grib.index.GribFieldList.to_xarray`." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "c500c39a-8cdf-4e25-950e-581924879e6c", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
    \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
    <xarray.Dataset> Size: 395kB\n",
    +                            "Dimensions:                  (number: 3, forecast_reference_time: 4, step: 6,\n",
    +                            "                              latitude: 19, longitude: 36)\n",
    +                            "Coordinates:\n",
    +                            "  * number                   (number) int64 24B 0 1 2\n",
    +                            "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 32B 199...\n",
    +                            "  * step                     (step) int64 48B 1 2 3 4 5 6\n",
    +                            "  * latitude                 (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n",
    +                            "  * longitude                (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n",
    +                            "Data variables:\n",
    +                            "    2t                       (number, forecast_reference_time, step, latitude, longitude) float64 394kB ...\n",
    +                            "Attributes: (12/15)\n",
    +                            "    param:        2t\n",
    +                            "    paramId:      167\n",
    +                            "    class:        c3\n",
    +                            "    stream:       msmm\n",
    +                            "    levtype:      sfc\n",
    +                            "    type:         fcmean\n",
    +                            "    ...           ...\n",
    +                            "    fcmonth:      1\n",
    +                            "    origin:       lfpw\n",
    +                            "    domain:       g\n",
    +                            "    method:       1\n",
    +                            "    Conventions:  CF-1.8\n",
    +                            "    institution:  ECMWF
    " + ], + "text/plain": [ + " Size: 395kB\n", + "Dimensions: (number: 3, forecast_reference_time: 4, step: 6,\n", + " latitude: 19, longitude: 36)\n", + "Coordinates:\n", + " * number (number) int64 24B 0 1 2\n", + " * forecast_reference_time (forecast_reference_time) datetime64[ns] 32B 199...\n", + " * step (step) int64 48B 1 2 3 4 5 6\n", + " * latitude (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n", + " * longitude (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n", + "Data variables:\n", + " 2t (number, forecast_reference_time, step, latitude, longitude) float64 394kB ...\n", + "Attributes: (12/15)\n", + " param: 2t\n", + " paramId: 167\n", + " class: c3\n", + " stream: msmm\n", + " levtype: sfc\n", + " type: fcmean\n", + " ... ...\n", + " fcmonth: 1\n", + " origin: lfpw\n", + " domain: g\n", + " method: 1\n", + " Conventions: CF-1.8\n", + " institution: ECMWF" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds = ds_fl.to_xarray(time_dim_mode=\"forecast\", \n", + " dim_roles={\"step\": \"forecastMonth\"})\n", + "ds" + ] + }, + { + "cell_type": "markdown", + "id": "e917dbc1-ba05-4180-b1d8-62e04bf98d50", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "When we check the \"step\" dimension we can see its units are \"months\"." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "850836de-db60-48ac-b42b-253ab335ceef", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Size: 48B\n", + "array([1, 2, 3, 4, 5, 6])\n", + "Coordinates:\n", + " * step (step) int64 48B 1 2 3 4 5 6\n", + "Attributes:\n", + " units: months\n" + ] + } + ], + "source": [ + "print(ds[\"step\"])" + ] + }, + { + "cell_type": "raw", + "id": "13b0fdb1-ed1f-4a0a-b77f-71115adf40ad", + "metadata": { + "editable": true, + "raw_mimetype": "text/restructuredtext", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "By default, the dimensions related to dimension roles are named after the roles. So, although the step dimension was generated from the \"forecastMonth\" GRIB key the dimension name is still \"step\". To override this use the ``keep_dim_role_name=False`` option in :py:meth:`~data.readers.grib.index.GribFieldList.to_xarray`." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "cbf6d822-546e-42ab-a8c7-ad8d7d0c61fc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
    \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
    <xarray.Dataset> Size: 395kB\n",
    +                            "Dimensions:                  (number: 3, forecast_reference_time: 4,\n",
    +                            "                              forecastMonth: 6, latitude: 19, longitude: 36)\n",
    +                            "Coordinates:\n",
    +                            "  * number                   (number) int64 24B 0 1 2\n",
    +                            "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 32B 199...\n",
    +                            "  * forecastMonth            (forecastMonth) int64 48B 1 2 3 4 5 6\n",
    +                            "  * latitude                 (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n",
    +                            "  * longitude                (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n",
    +                            "Data variables:\n",
    +                            "    2t                       (number, forecast_reference_time, forecastMonth, latitude, longitude) float64 394kB ...\n",
    +                            "Attributes: (12/15)\n",
    +                            "    param:        2t\n",
    +                            "    paramId:      167\n",
    +                            "    class:        c3\n",
    +                            "    stream:       msmm\n",
    +                            "    levtype:      sfc\n",
    +                            "    type:         fcmean\n",
    +                            "    ...           ...\n",
    +                            "    fcmonth:      1\n",
    +                            "    origin:       lfpw\n",
    +                            "    domain:       g\n",
    +                            "    method:       1\n",
    +                            "    Conventions:  CF-1.8\n",
    +                            "    institution:  ECMWF
    " + ], + "text/plain": [ + " Size: 395kB\n", + "Dimensions: (number: 3, forecast_reference_time: 4,\n", + " forecastMonth: 6, latitude: 19, longitude: 36)\n", + "Coordinates:\n", + " * number (number) int64 24B 0 1 2\n", + " * forecast_reference_time (forecast_reference_time) datetime64[ns] 32B 199...\n", + " * forecastMonth (forecastMonth) int64 48B 1 2 3 4 5 6\n", + " * latitude (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n", + " * longitude (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n", + "Data variables:\n", + " 2t (number, forecast_reference_time, forecastMonth, latitude, longitude) float64 394kB ...\n", + "Attributes: (12/15)\n", + " param: 2t\n", + " paramId: 167\n", + " class: c3\n", + " stream: msmm\n", + " levtype: sfc\n", + " type: fcmean\n", + " ... ...\n", + " fcmonth: 1\n", + " origin: lfpw\n", + " domain: g\n", + " method: 1\n", + " Conventions: CF-1.8\n", + " institution: ECMWF" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds = ds_fl.to_xarray(time_dim_mode=\"forecast\", \n", + " dim_roles={\"step\": \"forecastMonth\"}, \n", + " dim_name_from_role_name=False)\n", + "ds" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "dev", + "language": "python", + "name": "dev" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.12" + } }, - "tags": [] - }, - "source": [ - "## Xarray engine: seasonal forecast" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "7e0be52c-bedb-4ae7-984c-4807bf253d7f", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "import earthkit.data as ekd\n", - "\n", - "ds_fl = ekd.from_source(\"sample\", \"seasonal_monthly.grib\")" - ] - }, - { - "cell_type": "markdown", - "id": "918dec31-6135-458d-a243-7ccd3f5ca3ff", - "metadata": { - "editable": true, - "raw_mimetype": "text/restructuredtext", - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "source": [ - "The input data contains seasonal monthly forecast. Because the length of a month varies, for this data the ``forecastMonth`` key is better suited for describing the temporal structure than using the ``step*`` keys. \n", - "\n", - "This is how the first few GRIB messages look like:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "78ebb588-85a8-4a67-8f6b-046a536a508a", - "metadata": { - "editable": true, - "scrolled": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "
    \n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
    centreshortNametypeOfLevelleveldataDatedataTimestepRangedataTypenumbergridTypeforecastMonth
    0lfpw2tsurface0199310010744fcmean0regular_ll1
    1lfpw2tsurface0199310010744fcmean1regular_ll1
    2lfpw2tsurface0199310010744fcmean2regular_ll1
    3lfpw2tsurface01993100101464fcmean0regular_ll2
    \n", - "
    " - ], - "text/plain": [ - " centre shortName typeOfLevel level dataDate dataTime stepRange dataType \\\n", - "0 lfpw 2t surface 0 19931001 0 744 fcmean \n", - "1 lfpw 2t surface 0 19931001 0 744 fcmean \n", - "2 lfpw 2t surface 0 19931001 0 744 fcmean \n", - "3 lfpw 2t surface 0 19931001 0 1464 fcmean \n", - "\n", - " number gridType forecastMonth \n", - "0 0 regular_ll 1 \n", - "1 1 regular_ll 1 \n", - "2 2 regular_ll 1 \n", - "3 0 regular_ll 2 " - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ds_fl[0:4].ls(extra_keys=\"forecastMonth\")" - ] - }, - { - "cell_type": "raw", - "id": "665fba14-79d5-4344-84fb-2e16da77936d", - "metadata": { - "editable": true, - "raw_mimetype": "text/restructuredtext", - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "source": [ - "In order to use ``forecastMonth`` instead of ``step`` we need to use the ``dim_roles`` option in :py:meth:`~data.readers.grib.index.GribFieldList.to_xarray`." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "c500c39a-8cdf-4e25-950e-581924879e6c", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "From version 0.11.0 the default engine for to_xarray is 'earthkit'. Use engine=`cfgrib` to invoke the cfgrib engine.\n" - ] - }, - { - "data": { - "text/html": [ - "
    \n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
    <xarray.Dataset> Size: 395kB\n",
    -       "Dimensions:                  (number: 3, forecast_reference_time: 4,\n",
    -       "                              forecastMonth: 6, latitude: 19, longitude: 36)\n",
    -       "Coordinates:\n",
    -       "  * number                   (number) int64 24B 0 1 2\n",
    -       "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 32B 199...\n",
    -       "  * forecastMonth            (forecastMonth) int64 48B 1 2 3 4 5 6\n",
    -       "  * latitude                 (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n",
    -       "  * longitude                (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n",
    -       "Data variables:\n",
    -       "    2t                       (number, forecast_reference_time, forecastMonth, latitude, longitude) float64 394kB ...\n",
    -       "Attributes: (12/17)\n",
    -       "    param:          2t\n",
    -       "    standard_name:  unknown\n",
    -       "    long_name:      2 metre temperature\n",
    -       "    paramId:        167\n",
    -       "    class:          c3\n",
    -       "    stream:         msmm\n",
    -       "    ...             ...\n",
    -       "    fcmonth:        1\n",
    -       "    origin:         lfpw\n",
    -       "    domain:         g\n",
    -       "    method:         1\n",
    -       "    Conventions:    CF-1.8\n",
    -       "    institution:    ECMWF
    " - ], - "text/plain": [ - " Size: 395kB\n", - "Dimensions: (number: 3, forecast_reference_time: 4,\n", - " forecastMonth: 6, latitude: 19, longitude: 36)\n", - "Coordinates:\n", - " * number (number) int64 24B 0 1 2\n", - " * forecast_reference_time (forecast_reference_time) datetime64[ns] 32B 199...\n", - " * forecastMonth (forecastMonth) int64 48B 1 2 3 4 5 6\n", - " * latitude (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n", - " * longitude (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n", - "Data variables:\n", - " 2t (number, forecast_reference_time, forecastMonth, latitude, longitude) float64 394kB ...\n", - "Attributes: (12/17)\n", - " param: 2t\n", - " standard_name: unknown\n", - " long_name: 2 metre temperature\n", - " paramId: 167\n", - " class: c3\n", - " stream: msmm\n", - " ... ...\n", - " fcmonth: 1\n", - " origin: lfpw\n", - " domain: g\n", - " method: 1\n", - " Conventions: CF-1.8\n", - " institution: ECMWF" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ds = ds_fl.to_xarray(time_dim_mode=\"forecast\", dim_roles={\"step\": \"forecastMonth\"})\n", - "ds" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6eba63a3-452b-4317-aa7d-7793e4a1dd2a", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "dev_ecc", - "language": "python", - "name": "dev_ecc" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.13" - } - }, - "nbformat": 4, - "nbformat_minor": 5 + "nbformat": 4, + "nbformat_minor": 5 } diff --git a/docs/examples/xarray_engine_split.ipynb b/docs/examples/xarray_engine_split.ipynb index 6c77736b0..787e734e3 100644 --- a/docs/examples/xarray_engine_split.ipynb +++ b/docs/examples/xarray_engine_split.ipynb @@ -44,7 +44,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "8e0c9b139cf64468a0126a7780bf54e0", + "model_id": "6c861eb4640740fab0436a6893174a95", "version_major": 2, "version_minor": 0 }, @@ -59,7 +59,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Dimension 'typeOfLevel' of variable 't' cannot have multiple values=['hybrid', 'isobaricInhPa']\n" + "Dimension 'level_type' of variable 't' cannot have multiple values=['hybrid', 'isobaricInhPa']\n" ] } ], @@ -84,7 +84,7 @@ "tags": [] }, "source": [ - "In this case we can use the ``split_dims`` option to split the hypercube along the problematic dimensions. The results a tuple of two lists: \n", + "In this case we can use the ``split_dims`` option to split the hypercube along the problematic dimensions. `split_dims`` does not use dimension names but takes a single or multiple GRIB keys to perform the splitting on. The results a tuple of two lists: \n", "\n", "- the first list contains the Xarray datasets\n", "- the second list contains the corresponding dictionaries with the spitting keys/values (one dictionary per dataset)\n", @@ -527,20 +527,20 @@ " u (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n", "Attributes:\n", " Conventions: CF-1.8\n", - " institution: ECMWF
  • Conventions :
    CF-1.8
    institution :
    ECMWF
  • " ], "text/plain": [ " Size: 176kB\n", diff --git a/docs/examples/xarray_engine_squeeze.ipynb b/docs/examples/xarray_engine_squeeze.ipynb new file mode 100644 index 000000000..c32360896 --- /dev/null +++ b/docs/examples/xarray_engine_squeeze.ipynb @@ -0,0 +1,1497 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "c2feafcc-430b-4718-983f-554e55dcd54a", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Xarray engine: sqeezing dimensions" + ] + }, + { + "cell_type": "markdown", + "id": "f1b37637-7cce-4af5-8bad-1ddb6492d732", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "First, we get some GRIB forecast data on pressure levels and read it into a GRIB fieldlist." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "1a6e355d-3fbf-4d92-b32f-a9d7e770f9db", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "fbbb4422431d4d75aad6e3a4bd7d20d4", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "pl.grib: 0%| | 0.00/48.8k [00:00\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
    <xarray.Dataset> Size: 176kB\n",
    +       "Dimensions:                  (forecast_reference_time: 4, step: 2, level: 2,\n",
    +       "                              latitude: 19, longitude: 36)\n",
    +       "Coordinates:\n",
    +       "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 32B 202...\n",
    +       "  * step                     (step) timedelta64[ns] 16B 00:00:00 06:00:00\n",
    +       "  * level                    (level) int64 16B 500 700\n",
    +       "  * latitude                 (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n",
    +       "  * longitude                (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n",
    +       "Data variables:\n",
    +       "    r                        (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n",
    +       "    t                        (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n",
    +       "Attributes:\n",
    +       "    class:        od\n",
    +       "    stream:       oper\n",
    +       "    levtype:      pl\n",
    +       "    type:         fc\n",
    +       "    expver:       0001\n",
    +       "    date:         20240603\n",
    +       "    time:         0\n",
    +       "    domain:       g\n",
    +       "    number:       0\n",
    +       "    Conventions:  CF-1.8\n",
    +       "    institution:  ECMWF
    " + ], + "text/plain": [ + " Size: 176kB\n", + "Dimensions: (forecast_reference_time: 4, step: 2, level: 2,\n", + " latitude: 19, longitude: 36)\n", + "Coordinates:\n", + " * forecast_reference_time (forecast_reference_time) datetime64[ns] 32B 202...\n", + " * step (step) timedelta64[ns] 16B 00:00:00 06:00:00\n", + " * level (level) int64 16B 500 700\n", + " * latitude (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n", + " * longitude (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n", + "Data variables:\n", + " r (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n", + " t (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n", + "Attributes:\n", + " class: od\n", + " stream: oper\n", + " levtype: pl\n", + " type: fc\n", + " expver: 0001\n", + " date: 20240603\n", + " time: 0\n", + " domain: g\n", + " number: 0\n", + " Conventions: CF-1.8\n", + " institution: ECMWF" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds_fl.to_xarray()" + ] + }, + { + "cell_type": "markdown", + "id": "5e22a7d6-8f86-454a-b7e7-fca4273cb493", + "metadata": {}, + "source": [ + "When using ``squeeze=True`` these dimension are added to the dataset." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "e19e154e-89ac-4d5c-a82c-bd6227bc94f6", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
    \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
    <xarray.Dataset> Size: 176kB\n",
    +       "Dimensions:                  (number: 1, forecast_reference_time: 4, step: 2,\n",
    +       "                              level: 2, level_type: 1, latitude: 19,\n",
    +       "                              longitude: 36)\n",
    +       "Coordinates:\n",
    +       "  * number                   (number) int64 8B 0\n",
    +       "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 32B 202...\n",
    +       "  * step                     (step) timedelta64[ns] 16B 00:00:00 06:00:00\n",
    +       "  * level                    (level) int64 16B 500 700\n",
    +       "  * level_type               (level_type) <U2 8B 'pl'\n",
    +       "  * latitude                 (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n",
    +       "  * longitude                (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n",
    +       "Data variables:\n",
    +       "    r                        (number, forecast_reference_time, step, level, level_type, latitude, longitude) float64 88kB ...\n",
    +       "    t                        (number, forecast_reference_time, step, level, level_type, latitude, longitude) float64 88kB ...\n",
    +       "Attributes:\n",
    +       "    class:        od\n",
    +       "    stream:       oper\n",
    +       "    type:         fc\n",
    +       "    expver:       0001\n",
    +       "    date:         20240603\n",
    +       "    time:         0\n",
    +       "    domain:       g\n",
    +       "    Conventions:  CF-1.8\n",
    +       "    institution:  ECMWF
    " + ], + "text/plain": [ + " Size: 176kB\n", + "Dimensions: (number: 1, forecast_reference_time: 4, step: 2,\n", + " level: 2, level_type: 1, latitude: 19,\n", + " longitude: 36)\n", + "Coordinates:\n", + " * number (number) int64 8B 0\n", + " * forecast_reference_time (forecast_reference_time) datetime64[ns] 32B 202...\n", + " * step (step) timedelta64[ns] 16B 00:00:00 06:00:00\n", + " * level (level) int64 16B 500 700\n", + " * level_type (level_type) \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
    <xarray.Dataset> Size: 176kB\n",
    +       "Dimensions:                  (number: 1, forecast_reference_time: 4, step: 2,\n",
    +       "                              level: 2, latitude: 19, longitude: 36)\n",
    +       "Coordinates:\n",
    +       "  * number                   (number) int64 8B 0\n",
    +       "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 32B 202...\n",
    +       "  * step                     (step) timedelta64[ns] 16B 00:00:00 06:00:00\n",
    +       "  * level                    (level) int64 16B 500 700\n",
    +       "  * latitude                 (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n",
    +       "  * longitude                (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n",
    +       "Data variables:\n",
    +       "    r                        (number, forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n",
    +       "    t                        (number, forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n",
    +       "Attributes:\n",
    +       "    class:        od\n",
    +       "    stream:       oper\n",
    +       "    levtype:      pl\n",
    +       "    type:         fc\n",
    +       "    expver:       0001\n",
    +       "    date:         20240603\n",
    +       "    time:         0\n",
    +       "    domain:       g\n",
    +       "    Conventions:  CF-1.8\n",
    +       "    institution:  ECMWF
    " + ], + "text/plain": [ + " Size: 176kB\n", + "Dimensions: (number: 1, forecast_reference_time: 4, step: 2,\n", + " level: 2, latitude: 19, longitude: 36)\n", + "Coordinates:\n", + " * number (number) int64 8B 0\n", + " * forecast_reference_time (forecast_reference_time) datetime64[ns] 32B 202...\n", + " * step (step) timedelta64[ns] 16B 00:00:00 06:00:00\n", + " * level (level) int64 16B 500 700\n", + " * latitude (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n", + " * longitude (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n", + "Data variables:\n", + " r (number, forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n", + " t (number, forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n", + "Attributes:\n", + " class: od\n", + " stream: oper\n", + " levtype: pl\n", + " type: fc\n", + " expver: 0001\n", + " date: 20240603\n", + " time: 0\n", + " domain: g\n", + " Conventions: CF-1.8\n", + " institution: ECMWF" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds_fl.to_xarray(ensure_dims=[\"number\", \"level_type\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a6937b30-029a-4b2a-8391-ed4f5ac8eeae", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "dev", + "language": "python", + "name": "dev" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/examples/xarray_engine_step_ranges.ipynb b/docs/examples/xarray_engine_step_ranges.ipynb new file mode 100644 index 000000000..cef1aa2ba --- /dev/null +++ b/docs/examples/xarray_engine_step_ranges.ipynb @@ -0,0 +1,671 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "415a2c2a-8d00-48fc-9a02-6fc79aac663f", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Xarray engine: step range" + ] + }, + { + "cell_type": "markdown", + "id": "b8e40382-ef81-46d7-8c94-2a01bd3a5214", + "metadata": {}, + "source": [ + "Get input GRIB2 data containing precipitation forecast for step ranges." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "85d04283-3488-477f-90dd-ee27f0a91935", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "936ac87a643d414781370a22f10a9904", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "lsp_step_range.grib2: 0%| | 0.00/1.17k [00:00\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    paramstepstepRangestartStependStep
    0lsp71-7271-727172
    1lsp72-7372-737273
    \n", + "" + ], + "text/plain": [ + " param step stepRange startStep endStep\n", + "0 lsp 71-72 71-72 71 72\n", + "1 lsp 72-73 72-73 72 73" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import earthkit.data as ekd\n", + "ds_fl = ekd.from_source(\"sample\", \"lsp_step_range.grib2\")\n", + "ds_fl.ls(keys=[\"param\", \"step\", \"stepRange\", \"startStep\", \"endStep\"])" + ] + }, + { + "cell_type": "raw", + "id": "b2fab96a-8435-4ed3-b43d-e7f5dcc27141", + "metadata": { + "editable": true, + "raw_mimetype": "text/restructuredtext", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "When we convert GRIB data to Xarray with :py:meth:`~data.readers.grib.index.GribFieldList.to_xarray` the step dimension is defined by the \"step\" :ref:`dimension role `. By default, this role is using the \"step_timedelta\" generated metadata key that is the timedelta representation of the \"endStep\" GRIB key." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "5b6872c9-97b6-4336-89ba-4e6491605f90", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
    \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
    <xarray.Dataset> Size: 2kB\n",
    +       "Dimensions:    (step: 2, latitude: 7, longitude: 12)\n",
    +       "Coordinates:\n",
    +       "  * step       (step) timedelta64[ns] 16B 3 days 3 days 01:00:00\n",
    +       "  * latitude   (latitude) float64 56B 90.0 60.0 30.0 0.0 -30.0 -60.0 -90.0\n",
    +       "  * longitude  (longitude) float64 96B 0.0 30.0 60.0 90.0 ... 270.0 300.0 330.0\n",
    +       "Data variables:\n",
    +       "    lsp        (step, latitude, longitude) float64 1kB ...\n",
    +       "Attributes:\n",
    +       "    param:        lsp\n",
    +       "    paramId:      142\n",
    +       "    class:        d1\n",
    +       "    stream:       oper\n",
    +       "    levtype:      sfc\n",
    +       "    type:         fc\n",
    +       "    expver:       0001\n",
    +       "    date:         20250527\n",
    +       "    time:         0\n",
    +       "    domain:       g\n",
    +       "    Conventions:  CF-1.8\n",
    +       "    institution:  ECMWF
    " + ], + "text/plain": [ + " Size: 2kB\n", + "Dimensions: (step: 2, latitude: 7, longitude: 12)\n", + "Coordinates:\n", + " * step (step) timedelta64[ns] 16B 3 days 3 days 01:00:00\n", + " * latitude (latitude) float64 56B 90.0 60.0 30.0 0.0 -30.0 -60.0 -90.0\n", + " * longitude (longitude) float64 96B 0.0 30.0 60.0 90.0 ... 270.0 300.0 330.0\n", + "Data variables:\n", + " lsp (step, latitude, longitude) float64 1kB ...\n", + "Attributes:\n", + " param: lsp\n", + " paramId: 142\n", + " class: d1\n", + " stream: oper\n", + " levtype: sfc\n", + " type: fc\n", + " expver: 0001\n", + " date: 20250527\n", + " time: 0\n", + " domain: g\n", + " Conventions: CF-1.8\n", + " institution: ECMWF" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds = ds_fl.to_xarray()\n", + "ds" + ] + }, + { + "cell_type": "markdown", + "id": "5e0f85a6-30bd-4dfe-ae98-b9c13304a465", + "metadata": {}, + "source": [ + "We can check the \"step\" coordinate in the dataset to see that it matches the \"endStep\" values." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "f9a4b868-29dd-4bb1-bbaa-18caa68f405e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[72, 73]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# convert to hours from ns\n", + "[int(x* 1E-9/(3600)) for x in ds[\"step\"].values]" + ] + }, + { + "cell_type": "markdown", + "id": "542a047c-39d8-4ec1-9194-bb362e9de4f7", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "This default behaviour can be overridden by specifying custom ``dim_roles``. E.g. to get the step from the \"startStep\" key we can use:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "5ea06ff7-70c6-4967-80ce-7b7b6fa12fa5", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[71, 72]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds = ds_fl.to_xarray(dim_roles={\"step\": \"startStep\"})\n", + "[int(x* 1E-9/(3600)) for x in ds[\"step\"].values]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "495387d6-331c-4dc5-90fa-e05a6da9b998", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "dev", + "language": "python", + "name": "dev" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/examples/xarray_engine_temporal.ipynb b/docs/examples/xarray_engine_temporal.ipynb index 1b1a76d57..b3c8589e7 100644 --- a/docs/examples/xarray_engine_temporal.ipynb +++ b/docs/examples/xarray_engine_temporal.ipynb @@ -54,7 +54,22 @@ }, "tags": [] }, - "outputs": [], + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "8dca45f059a04a48898e26443d3b1a64", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "pl.grib: 0%| | 0.00/48.8k [00:00`)." ] }, { @@ -114,7 +138,7 @@ "tags": [] }, "source": [ - "When ``time_dim_mode=\"raw\"`` the \"date\", \"time\" and \"step\" ecCodes GRIB keys are used to form the temporal dimensions." + "When ``time_dim_mode=\"raw\"`` the \"date\", \"time\" and \"step\" roles are used to form the temporal dimensions." ] }, { @@ -162,14 +186,14 @@ " --xr-background-color-row-odd: var(--jp-layout-color2, #eeeeee);\n", "}\n", "\n", - "html[theme=dark],\n", - "html[data-theme=dark],\n", - "body[data-theme=dark],\n", + "html[theme=\"dark\"],\n", + "html[data-theme=\"dark\"],\n", + "body[data-theme=\"dark\"],\n", "body.vscode-dark {\n", " --xr-font-color0: rgba(255, 255, 255, 1);\n", " --xr-font-color2: rgba(255, 255, 255, 0.54);\n", " --xr-font-color3: rgba(255, 255, 255, 0.38);\n", - " --xr-border-color: #1F1F1F;\n", + " --xr-border-color: #1f1f1f;\n", " --xr-disabled-color: #515151;\n", " --xr-background-color: #111111;\n", " --xr-background-color-row-even: #111111;\n", @@ -224,6 +248,7 @@ ".xr-section-item input {\n", " display: inline-block;\n", " opacity: 0;\n", + " height: 0;\n", "}\n", "\n", ".xr-section-item input + label {\n", @@ -260,7 +285,7 @@ "\n", ".xr-section-summary-in + label:before {\n", " display: inline-block;\n", - " content: '►';\n", + " content: \"►\";\n", " font-size: 11px;\n", " width: 15px;\n", " text-align: center;\n", @@ -271,7 +296,7 @@ "}\n", "\n", ".xr-section-summary-in:checked + label:before {\n", - " content: '▼';\n", + " content: \"▼\";\n", "}\n", "\n", ".xr-section-summary-in:checked + label > span {\n", @@ -343,15 +368,15 @@ "}\n", "\n", ".xr-dim-list:before {\n", - " content: '(';\n", + " content: \"(\";\n", "}\n", "\n", ".xr-dim-list:after {\n", - " content: ')';\n", + " content: \")\";\n", "}\n", "\n", ".xr-dim-list li:not(:last-child):after {\n", - " content: ',';\n", + " content: \",\";\n", " padding-right: 5px;\n", "}\n", "\n", @@ -502,20 +527,18 @@ " fill: currentColor;\n", "}\n", "
    <xarray.Dataset> Size: 176kB\n",
    -       "Dimensions:    (date: 2, time: 2, step: 2, levelist: 2, latitude: 19,\n",
    -       "                longitude: 36)\n",
    +       "Dimensions:    (date: 2, time: 2, step: 2, level: 2, latitude: 19, longitude: 36)\n",
            "Coordinates:\n",
            "  * date       (date) datetime64[ns] 16B 2024-06-03 2024-06-04\n",
            "  * time       (time) timedelta64[ns] 16B 00:00:00 12:00:00\n",
            "  * step       (step) timedelta64[ns] 16B 00:00:00 06:00:00\n",
    -       "  * levelist   (levelist) int64 16B 500 700\n",
    +       "  * level      (level) int64 16B 500 700\n",
            "  * latitude   (latitude) float64 152B 90.0 80.0 70.0 60.0 ... -70.0 -80.0 -90.0\n",
            "  * longitude  (longitude) float64 288B 0.0 10.0 20.0 30.0 ... 330.0 340.0 350.0\n",
            "Data variables:\n",
    -       "    r          (date, time, step, levelist, latitude, longitude) float64 88kB ...\n",
    -       "    t          (date, time, step, levelist, latitude, longitude) float64 88kB ...\n",
    +       "    r          (date, time, step, level, latitude, longitude) float64 88kB ...\n",
    +       "    t          (date, time, step, level, latitude, longitude) float64 88kB ...\n",
            "Attributes:\n",
    -       "    param:        t\n",
            "    class:        od\n",
            "    stream:       oper\n",
            "    levtype:      pl\n",
    @@ -524,34 +547,32 @@
            "    domain:       g\n",
            "    number:       0\n",
            "    Conventions:  CF-1.8\n",
    -       "    institution:  ECMWF
  • class :
    od
    stream :
    oper
    levtype :
    pl
    type :
    fc
    expver :
    0001
    domain :
    g
    number :
    0
    Conventions :
    CF-1.8
    institution :
    ECMWF
  • " ], "text/plain": [ " Size: 176kB\n", - "Dimensions: (date: 2, time: 2, step: 2, levelist: 2, latitude: 19,\n", - " longitude: 36)\n", + "Dimensions: (date: 2, time: 2, step: 2, level: 2, latitude: 19, longitude: 36)\n", "Coordinates:\n", " * date (date) datetime64[ns] 16B 2024-06-03 2024-06-04\n", " * time (time) timedelta64[ns] 16B 00:00:00 12:00:00\n", " * step (step) timedelta64[ns] 16B 00:00:00 06:00:00\n", - " * levelist (levelist) int64 16B 500 700\n", + " * level (level) int64 16B 500 700\n", " * latitude (latitude) float64 152B 90.0 80.0 70.0 60.0 ... -70.0 -80.0 -90.0\n", " * longitude (longitude) float64 288B 0.0 10.0 20.0 30.0 ... 330.0 340.0 350.0\n", "Data variables:\n", - " r (date, time, step, levelist, latitude, longitude) float64 88kB ...\n", - " t (date, time, step, levelist, latitude, longitude) float64 88kB ...\n", + " r (date, time, step, level, latitude, longitude) float64 88kB ...\n", + " t (date, time, step, level, latitude, longitude) float64 88kB ...\n", "Attributes:\n", - " param: t\n", " class: od\n", " stream: oper\n", " levtype: pl\n", @@ -599,7 +620,7 @@ "tags": [] }, "source": [ - "When ``time_dim_mode=\"forecast\"`` the \"date\" and \"time\" ecCodes GRIB keys are merged to form the dimension \"forecats_reference_time\". It also adds the \"step\" dimension based on the \"step\" key." + "When ``time_dim_mode=\"forecast\"`` the \"date\" and \"time\" roles are merged to form the dimension \"forecats_reference_time\". It also adds the \"step\" dimension." ] }, { @@ -647,14 +668,14 @@ " --xr-background-color-row-odd: var(--jp-layout-color2, #eeeeee);\n", "}\n", "\n", - "html[theme=dark],\n", - "html[data-theme=dark],\n", - "body[data-theme=dark],\n", + "html[theme=\"dark\"],\n", + "html[data-theme=\"dark\"],\n", + "body[data-theme=\"dark\"],\n", "body.vscode-dark {\n", " --xr-font-color0: rgba(255, 255, 255, 1);\n", " --xr-font-color2: rgba(255, 255, 255, 0.54);\n", " --xr-font-color3: rgba(255, 255, 255, 0.38);\n", - " --xr-border-color: #1F1F1F;\n", + " --xr-border-color: #1f1f1f;\n", " --xr-disabled-color: #515151;\n", " --xr-background-color: #111111;\n", " --xr-background-color-row-even: #111111;\n", @@ -709,6 +730,7 @@ ".xr-section-item input {\n", " display: inline-block;\n", " opacity: 0;\n", + " height: 0;\n", "}\n", "\n", ".xr-section-item input + label {\n", @@ -745,7 +767,7 @@ "\n", ".xr-section-summary-in + label:before {\n", " display: inline-block;\n", - " content: '►';\n", + " content: \"►\";\n", " font-size: 11px;\n", " width: 15px;\n", " text-align: center;\n", @@ -756,7 +778,7 @@ "}\n", "\n", ".xr-section-summary-in:checked + label:before {\n", - " content: '▼';\n", + " content: \"▼\";\n", "}\n", "\n", ".xr-section-summary-in:checked + label > span {\n", @@ -828,15 +850,15 @@ "}\n", "\n", ".xr-dim-list:before {\n", - " content: '(';\n", + " content: \"(\";\n", "}\n", "\n", ".xr-dim-list:after {\n", - " content: ')';\n", + " content: \")\";\n", "}\n", "\n", ".xr-dim-list li:not(:last-child):after {\n", - " content: ',';\n", + " content: \",\";\n", " padding-right: 5px;\n", "}\n", "\n", @@ -987,62 +1009,64 @@ " fill: currentColor;\n", "}\n", "
    <xarray.Dataset> Size: 176kB\n",
    -       "Dimensions:                  (forecast_reference_time: 4, step: 2, levelist: 2,\n",
    +       "Dimensions:                  (forecast_reference_time: 4, step: 2, level: 2,\n",
            "                              latitude: 19, longitude: 36)\n",
            "Coordinates:\n",
            "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 32B 202...\n",
            "  * step                     (step) timedelta64[ns] 16B 00:00:00 06:00:00\n",
    -       "  * levelist                 (levelist) int64 16B 500 700\n",
    +       "  * level                    (level) int64 16B 500 700\n",
            "  * latitude                 (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n",
            "  * longitude                (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n",
            "Data variables:\n",
    -       "    r                        (forecast_reference_time, step, levelist, latitude, longitude) float64 88kB ...\n",
    -       "    t                        (forecast_reference_time, step, levelist, latitude, longitude) float64 88kB ...\n",
    +       "    r                        (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n",
    +       "    t                        (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n",
            "Attributes:\n",
    -       "    param:        t\n",
            "    class:        od\n",
            "    stream:       oper\n",
            "    levtype:      pl\n",
            "    type:         fc\n",
            "    expver:       0001\n",
    +       "    date:         20240603\n",
    +       "    time:         0\n",
            "    domain:       g\n",
            "    number:       0\n",
            "    Conventions:  CF-1.8\n",
    -       "    institution:  ECMWF
  • class :
    od
    stream :
    oper
    levtype :
    pl
    type :
    fc
    expver :
    0001
    date :
    20240603
    time :
    0
    domain :
    g
    number :
    0
    Conventions :
    CF-1.8
    institution :
    ECMWF
  • " ], "text/plain": [ " Size: 176kB\n", - "Dimensions: (forecast_reference_time: 4, step: 2, levelist: 2,\n", + "Dimensions: (forecast_reference_time: 4, step: 2, level: 2,\n", " latitude: 19, longitude: 36)\n", "Coordinates:\n", " * forecast_reference_time (forecast_reference_time) datetime64[ns] 32B 202...\n", " * step (step) timedelta64[ns] 16B 00:00:00 06:00:00\n", - " * levelist (levelist) int64 16B 500 700\n", + " * level (level) int64 16B 500 700\n", " * latitude (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n", " * longitude (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n", "Data variables:\n", - " r (forecast_reference_time, step, levelist, latitude, longitude) float64 88kB ...\n", - " t (forecast_reference_time, step, levelist, latitude, longitude) float64 88kB ...\n", + " r (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n", + " t (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n", "Attributes:\n", - " param: t\n", " class: od\n", " stream: oper\n", " levtype: pl\n", " type: fc\n", " expver: 0001\n", + " date: 20240603\n", + " time: 0\n", " domain: g\n", " number: 0\n", " Conventions: CF-1.8\n", @@ -1085,7 +1109,7 @@ "tags": [] }, "source": [ - "When ``time_dim_mode=\"valid_time\"`` the only temporal dimension is \"valid_time\". It is built from the values of the \"validityDate\" and \"validityTime\" ecCodes GRIB keys. This dimension can only be generated if each GRIB field has a distinct valid time, so it typically fits for analysis/climate data." + "When ``time_dim_mode=\"valid_time\"`` the only temporal dimension is \"valid_time\". By default, it is built from the values of the \"validityDate\" and \"validityTime\" ecCodes GRIB keys. This dimension can only be generated if each GRIB field has a distinct valid time, so it typically fits for analysis/climate data." ] }, { @@ -1103,7 +1127,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "", + "model_id": "98fb8660778641739b5bdba816ad80ef", "version_major": 2, "version_minor": 0 }, @@ -1147,14 +1171,14 @@ " --xr-background-color-row-odd: var(--jp-layout-color2, #eeeeee);\n", "}\n", "\n", - "html[theme=dark],\n", - "html[data-theme=dark],\n", - "body[data-theme=dark],\n", + "html[theme=\"dark\"],\n", + "html[data-theme=\"dark\"],\n", + "body[data-theme=\"dark\"],\n", "body.vscode-dark {\n", " --xr-font-color0: rgba(255, 255, 255, 1);\n", " --xr-font-color2: rgba(255, 255, 255, 0.54);\n", " --xr-font-color3: rgba(255, 255, 255, 0.38);\n", - " --xr-border-color: #1F1F1F;\n", + " --xr-border-color: #1f1f1f;\n", " --xr-disabled-color: #515151;\n", " --xr-background-color: #111111;\n", " --xr-background-color-row-even: #111111;\n", @@ -1209,6 +1233,7 @@ ".xr-section-item input {\n", " display: inline-block;\n", " opacity: 0;\n", + " height: 0;\n", "}\n", "\n", ".xr-section-item input + label {\n", @@ -1245,7 +1270,7 @@ "\n", ".xr-section-summary-in + label:before {\n", " display: inline-block;\n", - " content: '►';\n", + " content: \"►\";\n", " font-size: 11px;\n", " width: 15px;\n", " text-align: center;\n", @@ -1256,7 +1281,7 @@ "}\n", "\n", ".xr-section-summary-in:checked + label:before {\n", - " content: '▼';\n", + " content: \"▼\";\n", "}\n", "\n", ".xr-section-summary-in:checked + label > span {\n", @@ -1328,15 +1353,15 @@ "}\n", "\n", ".xr-dim-list:before {\n", - " content: '(';\n", + " content: \"(\";\n", "}\n", "\n", ".xr-dim-list:after {\n", - " content: ')';\n", + " content: \")\";\n", "}\n", "\n", ".xr-dim-list li:not(:last-child):after {\n", - " content: ',';\n", + " content: \",\";\n", " padding-right: 5px;\n", "}\n", "\n", @@ -1494,27 +1519,30 @@ " * longitude (longitude) float64 96B -70.0 -60.0 -50.0 ... 20.0 30.0 40.0\n", "Data variables:\n", " msl (valid_time, latitude, longitude) float64 5kB ...\n", - "Attributes:\n", + "Attributes: (12/13)\n", " param: msl\n", + " paramId: 151\n", " class: od\n", " stream: oper\n", " levtype: sfc\n", " type: an\n", - " expver: 0001\n", + " ... ...\n", + " date: 20160925\n", + " time: 0\n", " domain: g\n", " number: 0\n", " Conventions: CF-1.8\n", - " institution: ECMWF
  • param :
    msl
    paramId :
    151
    class :
    od
    stream :
    oper
    levtype :
    sfc
    type :
    an
    expver :
    0001
    date :
    20160925
    time :
    0
    domain :
    g
    number :
    0
    Conventions :
    CF-1.8
    institution :
    ECMWF
  • " ], "text/plain": [ " Size: 6kB\n", @@ -1525,13 +1553,16 @@ " * longitude (longitude) float64 96B -70.0 -60.0 -50.0 ... 20.0 30.0 40.0\n", "Data variables:\n", " msl (valid_time, latitude, longitude) float64 5kB ...\n", - "Attributes:\n", + "Attributes: (12/13)\n", " param: msl\n", + " paramId: 151\n", " class: od\n", " stream: oper\n", " levtype: sfc\n", " type: an\n", - " expver: 0001\n", + " ... ...\n", + " date: 20160925\n", + " time: 0\n", " domain: g\n", " number: 0\n", " Conventions: CF-1.8\n", @@ -1560,7 +1591,7 @@ "tags": [] }, "source": [ - "This mode can also be used for suitable forecasts. To use it for the original forecast data first we need to filter it." + "This mode can also be used for suitable forecasts data. To use it for the original forecast data first we need to filter it." ] }, { @@ -1608,14 +1639,14 @@ " --xr-background-color-row-odd: var(--jp-layout-color2, #eeeeee);\n", "}\n", "\n", - "html[theme=dark],\n", - "html[data-theme=dark],\n", - "body[data-theme=dark],\n", + "html[theme=\"dark\"],\n", + "html[data-theme=\"dark\"],\n", + "body[data-theme=\"dark\"],\n", "body.vscode-dark {\n", " --xr-font-color0: rgba(255, 255, 255, 1);\n", " --xr-font-color2: rgba(255, 255, 255, 0.54);\n", " --xr-font-color3: rgba(255, 255, 255, 0.38);\n", - " --xr-border-color: #1F1F1F;\n", + " --xr-border-color: #1f1f1f;\n", " --xr-disabled-color: #515151;\n", " --xr-background-color: #111111;\n", " --xr-background-color-row-even: #111111;\n", @@ -1670,6 +1701,7 @@ ".xr-section-item input {\n", " display: inline-block;\n", " opacity: 0;\n", + " height: 0;\n", "}\n", "\n", ".xr-section-item input + label {\n", @@ -1706,7 +1738,7 @@ "\n", ".xr-section-summary-in + label:before {\n", " display: inline-block;\n", - " content: '►';\n", + " content: \"►\";\n", " font-size: 11px;\n", " width: 15px;\n", " text-align: center;\n", @@ -1717,7 +1749,7 @@ "}\n", "\n", ".xr-section-summary-in:checked + label:before {\n", - " content: '▼';\n", + " content: \"▼\";\n", "}\n", "\n", ".xr-section-summary-in:checked + label > span {\n", @@ -1789,15 +1821,15 @@ "}\n", "\n", ".xr-dim-list:before {\n", - " content: '(';\n", + " content: \"(\";\n", "}\n", "\n", ".xr-dim-list:after {\n", - " content: ')';\n", + " content: \")\";\n", "}\n", "\n", ".xr-dim-list li:not(:last-child):after {\n", - " content: ',';\n", + " content: \",\";\n", " padding-right: 5px;\n", "}\n", "\n", @@ -1948,17 +1980,16 @@ " fill: currentColor;\n", "}\n", "
    <xarray.Dataset> Size: 44kB\n",
    -       "Dimensions:     (valid_time: 2, levelist: 2, latitude: 19, longitude: 36)\n",
    +       "Dimensions:     (valid_time: 2, level: 2, latitude: 19, longitude: 36)\n",
            "Coordinates:\n",
            "  * valid_time  (valid_time) datetime64[ns] 16B 2024-06-03 2024-06-03T06:00:00\n",
    -       "  * levelist    (levelist) int64 16B 500 700\n",
    +       "  * level       (level) int64 16B 500 700\n",
            "  * latitude    (latitude) float64 152B 90.0 80.0 70.0 ... -70.0 -80.0 -90.0\n",
            "  * longitude   (longitude) float64 288B 0.0 10.0 20.0 ... 330.0 340.0 350.0\n",
            "Data variables:\n",
    -       "    r           (valid_time, levelist, latitude, longitude) float64 22kB ...\n",
    -       "    t           (valid_time, levelist, latitude, longitude) float64 22kB ...\n",
    +       "    r           (valid_time, level, latitude, longitude) float64 22kB ...\n",
    +       "    t           (valid_time, level, latitude, longitude) float64 22kB ...\n",
            "Attributes:\n",
    -       "    param:        t\n",
            "    class:        od\n",
            "    stream:       oper\n",
            "    levtype:      pl\n",
    @@ -1969,31 +2000,30 @@
            "    domain:       g\n",
            "    number:       0\n",
            "    Conventions:  CF-1.8\n",
    -       "    institution:  ECMWF
  • class :
    od
    stream :
    oper
    levtype :
    pl
    type :
    fc
    expver :
    0001
    date :
    20240603
    time :
    0
    domain :
    g
    number :
    0
    Conventions :
    CF-1.8
    institution :
    ECMWF
  • " ], "text/plain": [ " Size: 44kB\n", - "Dimensions: (valid_time: 2, levelist: 2, latitude: 19, longitude: 36)\n", + "Dimensions: (valid_time: 2, level: 2, latitude: 19, longitude: 36)\n", "Coordinates:\n", " * valid_time (valid_time) datetime64[ns] 16B 2024-06-03 2024-06-03T06:00:00\n", - " * levelist (levelist) int64 16B 500 700\n", + " * level (level) int64 16B 500 700\n", " * latitude (latitude) float64 152B 90.0 80.0 70.0 ... -70.0 -80.0 -90.0\n", " * longitude (longitude) float64 288B 0.0 10.0 20.0 ... 330.0 340.0 350.0\n", "Data variables:\n", - " r (valid_time, levelist, latitude, longitude) float64 22kB ...\n", - " t (valid_time, levelist, latitude, longitude) float64 22kB ...\n", + " r (valid_time, level, latitude, longitude) float64 22kB ...\n", + " t (valid_time, level, latitude, longitude) float64 22kB ...\n", "Attributes:\n", - " param: t\n", " class: od\n", " stream: oper\n", " levtype: pl\n", @@ -2042,7 +2072,7 @@ "tags": [] }, "source": [ - "When ``add_valid_time_dim=True`` it adds coord `valid_time` containing the valid times for all the different temporal dimensions as datetime64. When ``time_dim_mode=\"valid_time\"`` this coordinate is always added irrespectively of the value of ``add_valid_time_dim``." + "When ``add_valid_time_dim=True`` it adds the coordine`valid_time` containing the valid times for all the different temporal dimensions as datetime64. When ``time_dim_mode=\"valid_time\"`` this coordinate is always added irrespective of the value of ``add_valid_time_dim``." ] }, { @@ -2090,14 +2120,14 @@ " --xr-background-color-row-odd: var(--jp-layout-color2, #eeeeee);\n", "}\n", "\n", - "html[theme=dark],\n", - "html[data-theme=dark],\n", - "body[data-theme=dark],\n", + "html[theme=\"dark\"],\n", + "html[data-theme=\"dark\"],\n", + "body[data-theme=\"dark\"],\n", "body.vscode-dark {\n", " --xr-font-color0: rgba(255, 255, 255, 1);\n", " --xr-font-color2: rgba(255, 255, 255, 0.54);\n", " --xr-font-color3: rgba(255, 255, 255, 0.38);\n", - " --xr-border-color: #1F1F1F;\n", + " --xr-border-color: #1f1f1f;\n", " --xr-disabled-color: #515151;\n", " --xr-background-color: #111111;\n", " --xr-background-color-row-even: #111111;\n", @@ -2152,6 +2182,7 @@ ".xr-section-item input {\n", " display: inline-block;\n", " opacity: 0;\n", + " height: 0;\n", "}\n", "\n", ".xr-section-item input + label {\n", @@ -2188,7 +2219,7 @@ "\n", ".xr-section-summary-in + label:before {\n", " display: inline-block;\n", - " content: '►';\n", + " content: \"►\";\n", " font-size: 11px;\n", " width: 15px;\n", " text-align: center;\n", @@ -2199,7 +2230,7 @@ "}\n", "\n", ".xr-section-summary-in:checked + label:before {\n", - " content: '▼';\n", + " content: \"▼\";\n", "}\n", "\n", ".xr-section-summary-in:checked + label > span {\n", @@ -2271,15 +2302,15 @@ "}\n", "\n", ".xr-dim-list:before {\n", - " content: '(';\n", + " content: \"(\";\n", "}\n", "\n", ".xr-dim-list:after {\n", - " content: ')';\n", + " content: \")\";\n", "}\n", "\n", ".xr-dim-list li:not(:last-child):after {\n", - " content: ',';\n", + " content: \",\";\n", " padding-right: 5px;\n", "}\n", "\n", @@ -2430,21 +2461,20 @@ " fill: currentColor;\n", "}\n", "
    <xarray.Dataset> Size: 176kB\n",
    -       "Dimensions:     (date: 2, time: 2, step: 2, levelist: 2, latitude: 19,\n",
    +       "Dimensions:     (date: 2, time: 2, step: 2, level: 2, latitude: 19,\n",
            "                 longitude: 36)\n",
            "Coordinates:\n",
            "  * date        (date) datetime64[ns] 16B 2024-06-03 2024-06-04\n",
            "  * time        (time) timedelta64[ns] 16B 00:00:00 12:00:00\n",
            "  * step        (step) timedelta64[ns] 16B 00:00:00 06:00:00\n",
    -       "  * levelist    (levelist) int64 16B 500 700\n",
    +       "  * level       (level) int64 16B 500 700\n",
            "    valid_time  (date, time, step) datetime64[ns] 64B ...\n",
            "  * latitude    (latitude) float64 152B 90.0 80.0 70.0 ... -70.0 -80.0 -90.0\n",
            "  * longitude   (longitude) float64 288B 0.0 10.0 20.0 ... 330.0 340.0 350.0\n",
            "Data variables:\n",
    -       "    r           (date, time, step, levelist, latitude, longitude) float64 88kB ...\n",
    -       "    t           (date, time, step, levelist, latitude, longitude) float64 88kB ...\n",
    +       "    r           (date, time, step, level, latitude, longitude) float64 88kB ...\n",
    +       "    t           (date, time, step, level, latitude, longitude) float64 88kB ...\n",
            "Attributes:\n",
    -       "    param:        t\n",
            "    class:        od\n",
            "    stream:       oper\n",
            "    levtype:      pl\n",
    @@ -2453,35 +2483,34 @@
            "    domain:       g\n",
            "    number:       0\n",
            "    Conventions:  CF-1.8\n",
    -       "    institution:  ECMWF
  • class :
    od
    stream :
    oper
    levtype :
    pl
    type :
    fc
    expver :
    0001
    domain :
    g
    number :
    0
    Conventions :
    CF-1.8
    institution :
    ECMWF
  • " ], "text/plain": [ " Size: 176kB\n", - "Dimensions: (date: 2, time: 2, step: 2, levelist: 2, latitude: 19,\n", + "Dimensions: (date: 2, time: 2, step: 2, level: 2, latitude: 19,\n", " longitude: 36)\n", "Coordinates:\n", " * date (date) datetime64[ns] 16B 2024-06-03 2024-06-04\n", " * time (time) timedelta64[ns] 16B 00:00:00 12:00:00\n", " * step (step) timedelta64[ns] 16B 00:00:00 06:00:00\n", - " * levelist (levelist) int64 16B 500 700\n", + " * level (level) int64 16B 500 700\n", " valid_time (date, time, step) datetime64[ns] 64B ...\n", " * latitude (latitude) float64 152B 90.0 80.0 70.0 ... -70.0 -80.0 -90.0\n", " * longitude (longitude) float64 288B 0.0 10.0 20.0 ... 330.0 340.0 350.0\n", "Data variables:\n", - " r (date, time, step, levelist, latitude, longitude) float64 88kB ...\n", - " t (date, time, step, levelist, latitude, longitude) float64 88kB ...\n", + " r (date, time, step, level, latitude, longitude) float64 88kB ...\n", + " t (date, time, step, level, latitude, longitude) float64 88kB ...\n", "Attributes:\n", - " param: t\n", " class: od\n", " stream: oper\n", " levtype: pl\n", @@ -2524,13 +2553,13 @@ "source": [ "When ``decode_times=True`` (the default) the follwing coordinates will be stored as datetime64:\n", "\n", - "- coordinates representing the date-like ecCodes keys (e.g. \"date\", \"validityDate\" etc.)\n", + "- coordinates representing the date-like roles or GRIB keys (e.g. \"date\", \"validityDate\" etc.)\n", "- datetime coordinates (e.g. \"forecast_reference_time\" etc.)\n", "\n", "When ``decode_timedelta=True`` (the default) the following coordinates will be stored as timedelta64:\n", "\n", - "- coordinates representing the time-like ecCodes keys (e.g. \"time\", \"validityTime\" etc.)\n", - "- duration-like coordinates (e.g. \"step\")" + "- coordinates representing the time-like roles or GRIB keys (e.g. \"time\", \"validityTime\" etc.)\n", + "- duration-like coordinates (e.g. \"step\", \"endStep\")" ] }, { @@ -2546,7 +2575,7 @@ " * date (date) datetime64[ns] 16B 2024-06-03 2024-06-04\n", " * time (time) timedelta64[ns] 16B 00:00:00 12:00:00\n", " * step (step) timedelta64[ns] 16B 00:00:00 06:00:00\n", - " * levelist (levelist) int64 16B 500 700\n", + " * level (level) int64 16B 500 700\n", " * latitude (latitude) float64 152B 90.0 80.0 70.0 60.0 ... -70.0 -80.0 -90.0\n", " * longitude (longitude) float64 288B 0.0 10.0 20.0 30.0 ... 330.0 340.0 350.0" ] @@ -2568,12 +2597,12 @@ "source": [ "When ``decode_times=False`` the following rules apply:\n", "\n", - "- coordinates representing date-like ecCodes keys (e.g. \"date\", \"validityDate\" etc.) will store the native GRIB int values (as yyyymmdd)\n", + "- coordinates representing date-like GRIB keys (e.g. \"date\", \"validityDate\" etc.) will store the native GRIB int values (as yyyymmdd)\n", "- datetime coordinates (e.g. \"forecast_reference_time\" etc.) will store datetime64 values\n", "\n", "When ``decode_timedelta=False`` the following rules apply:\n", "\n", - "- coordinates representing the time-like ecCodes keys (e.g. \"time\", \"validityTime\" etc.) will store the native GRIB int values (as 100*hours + minutes)\n", + "- coordinates representing the time-like GRIB keys (e.g. \"time\", \"validityTime\" etc.) will store the native GRIB int values (as 100*hours + minutes)\n", "- duration-like (e.g. \"step\") coordinates will store int values with units indicated by the coordinate attribute \"units\"" ] }, @@ -2596,7 +2625,7 @@ " * date (date) int64 16B 20240603 20240604\n", " * time (time) int64 16B 0 1200\n", " * step (step) int64 16B 0 6\n", - " * levelist (levelist) int64 16B 500 700\n", + " * level (level) int64 16B 500 700\n", " * latitude (latitude) float64 152B 90.0 80.0 70.0 60.0 ... -70.0 -80.0 -90.0\n", " * longitude (longitude) float64 288B 0.0 10.0 20.0 30.0 ... 330.0 340.0 350.0" ] @@ -2626,9 +2655,7 @@ { "data": { "text/plain": [ - "{'standard_name': 'forecast_period',\n", - " 'long_name': 'time since forecast_reference_time',\n", - " 'units': 'hours'}" + "{'units': 'hours'}" ] }, "execution_count": 9, diff --git a/docs/examples/xarray_engine_to_grib.ipynb b/docs/examples/xarray_engine_to_grib.ipynb index a69e0264e..8f0efe4fd 100644 --- a/docs/examples/xarray_engine_to_grib.ipynb +++ b/docs/examples/xarray_engine_to_grib.ipynb @@ -58,7 +58,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "", + "model_id": "cf6d8ab8661b476ca0f076c67c8acfea", "version_major": 2, "version_minor": 0 }, @@ -69,13 +69,6 @@ "metadata": {}, "output_type": "display_data" }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "From version 0.11.0 the default engine for to_xarray is 'earthkit'. Use engine=`cfgrib` to invoke the cfgrib engine.\n" - ] - }, { "data": { "text/html": [ @@ -109,14 +102,14 @@ " --xr-background-color-row-odd: var(--jp-layout-color2, #eeeeee);\n", "}\n", "\n", - "html[theme=dark],\n", - "html[data-theme=dark],\n", - "body[data-theme=dark],\n", + "html[theme=\"dark\"],\n", + "html[data-theme=\"dark\"],\n", + "body[data-theme=\"dark\"],\n", "body.vscode-dark {\n", " --xr-font-color0: rgba(255, 255, 255, 1);\n", " --xr-font-color2: rgba(255, 255, 255, 0.54);\n", " --xr-font-color3: rgba(255, 255, 255, 0.38);\n", - " --xr-border-color: #1F1F1F;\n", + " --xr-border-color: #1f1f1f;\n", " --xr-disabled-color: #515151;\n", " --xr-background-color: #111111;\n", " --xr-background-color-row-even: #111111;\n", @@ -171,6 +164,7 @@ ".xr-section-item input {\n", " display: inline-block;\n", " opacity: 0;\n", + " height: 0;\n", "}\n", "\n", ".xr-section-item input + label {\n", @@ -207,7 +201,7 @@ "\n", ".xr-section-summary-in + label:before {\n", " display: inline-block;\n", - " content: '►';\n", + " content: \"►\";\n", " font-size: 11px;\n", " width: 15px;\n", " text-align: center;\n", @@ -218,7 +212,7 @@ "}\n", "\n", ".xr-section-summary-in:checked + label:before {\n", - " content: '▼';\n", + " content: \"▼\";\n", "}\n", "\n", ".xr-section-summary-in:checked + label > span {\n", @@ -290,15 +284,15 @@ "}\n", "\n", ".xr-dim-list:before {\n", - " content: '(';\n", + " content: \"(\";\n", "}\n", "\n", ".xr-dim-list:after {\n", - " content: ')';\n", + " content: \")\";\n", "}\n", "\n", ".xr-dim-list li:not(:last-child):after {\n", - " content: ',';\n", + " content: \",\";\n", " padding-right: 5px;\n", "}\n", "\n", @@ -449,17 +443,17 @@ " fill: currentColor;\n", "}\n", "
    <xarray.Dataset> Size: 176kB\n",
    -       "Dimensions:                  (forecast_reference_time: 4, step: 2, levelist: 2,\n",
    +       "Dimensions:                  (forecast_reference_time: 4, step: 2, level: 2,\n",
            "                              latitude: 19, longitude: 36)\n",
            "Coordinates:\n",
            "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 32B 202...\n",
            "  * step                     (step) timedelta64[ns] 16B 00:00:00 06:00:00\n",
    -       "  * levelist                 (levelist) int64 16B 500 700\n",
    +       "  * level                    (level) int64 16B 500 700\n",
            "  * latitude                 (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n",
            "  * longitude                (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n",
            "Data variables:\n",
    -       "    r                        (forecast_reference_time, step, levelist, latitude, longitude) float64 88kB ...\n",
    -       "    t                        (forecast_reference_time, step, levelist, latitude, longitude) float64 88kB ...\n",
    +       "    r                        (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n",
    +       "    t                        (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n",
            "Attributes:\n",
            "    class:        od\n",
            "    stream:       oper\n",
    @@ -471,34 +465,34 @@
            "    domain:       g\n",
            "    number:       0\n",
            "    Conventions:  CF-1.8\n",
    -       "    institution:  ECMWF
  • class :
    od
    stream :
    oper
    levtype :
    pl
    type :
    fc
    expver :
    0001
    date :
    20240603
    time :
    0
    domain :
    g
    number :
    0
    Conventions :
    CF-1.8
    institution :
    ECMWF
  • " ], "text/plain": [ " Size: 176kB\n", - "Dimensions: (forecast_reference_time: 4, step: 2, levelist: 2,\n", + "Dimensions: (forecast_reference_time: 4, step: 2, level: 2,\n", " latitude: 19, longitude: 36)\n", "Coordinates:\n", " * forecast_reference_time (forecast_reference_time) datetime64[ns] 32B 202...\n", " * step (step) timedelta64[ns] 16B 00:00:00 06:00:00\n", - " * levelist (levelist) int64 16B 500 700\n", + " * level (level) int64 16B 500 700\n", " * latitude (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n", " * longitude (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n", "Data variables:\n", - " r (forecast_reference_time, step, levelist, latitude, longitude) float64 88kB ...\n", - " t (forecast_reference_time, step, levelist, latitude, longitude) float64 88kB ...\n", + " r (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n", + " t (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n", "Attributes:\n", " class: od\n", " stream: oper\n", @@ -933,7 +927,7 @@ "tags": [] }, "source": [ - "The generated GRIB fieldlist can be saved to disk using the :py:meth:`~data.readers.grib.index.GribFieldList.save` method." + "The generated GRIB fieldlist can be saved to disk using the :func:`to_target` method." ] }, { @@ -1035,7 +1029,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "", + "model_id": "120cbbf74d0c4edeac0cbdeed7bb6e2f", "version_major": 2, "version_minor": 0 }, @@ -1197,9 +1191,9 @@ ], "metadata": { "kernelspec": { - "display_name": "dev_ecc", + "display_name": "dev", "language": "python", - "name": "dev_ecc" + "name": "dev" }, "language_info": { "codemirror_mode": { @@ -1211,7 +1205,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.13" + "version": "3.11.12" } }, "nbformat": 4, diff --git a/docs/examples/xarray_engine_variable_key.ipynb b/docs/examples/xarray_engine_variable_key.ipynb index 23b95acc3..71c30dc70 100644 --- a/docs/examples/xarray_engine_variable_key.ipynb +++ b/docs/examples/xarray_engine_variable_key.ipynb @@ -31,7 +31,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 1, "id": "08b75c56-0b2f-4cc6-9637-b28ad2aa4455", "metadata": { "editable": true, @@ -41,6 +41,20 @@ "tags": [] }, "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "41fcae342d1e49ac812d5756cd625b1f", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "pl.grib: 0%| | 0.00/48.8k [00:00 span {\n", @@ -255,15 +270,15 @@ "}\n", "\n", ".xr-dim-list:before {\n", - " content: '(';\n", + " content: \"(\";\n", "}\n", "\n", ".xr-dim-list:after {\n", - " content: ')';\n", + " content: \")\";\n", "}\n", "\n", ".xr-dim-list li:not(:last-child):after {\n", - " content: ',';\n", + " content: \",\";\n", " padding-right: 5px;\n", "}\n", "\n", @@ -414,69 +429,71 @@ " fill: currentColor;\n", "}\n", "
    <xarray.Dataset> Size: 176kB\n",
    -       "Dimensions:                  (forecast_reference_time: 4, step: 2, levelist: 2,\n",
    +       "Dimensions:                  (forecast_reference_time: 4, step: 2, level: 2,\n",
            "                              latitude: 19, longitude: 36)\n",
            "Coordinates:\n",
            "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 32B 202...\n",
            "  * step                     (step) timedelta64[ns] 16B 00:00:00 06:00:00\n",
    -       "  * levelist                 (levelist) int64 16B 500 700\n",
    +       "  * level                    (level) int64 16B 500 700\n",
            "  * latitude                 (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n",
            "  * longitude                (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n",
            "Data variables:\n",
    -       "    r                        (forecast_reference_time, step, levelist, latitude, longitude) float64 88kB ...\n",
    -       "    t                        (forecast_reference_time, step, levelist, latitude, longitude) float64 88kB ...\n",
    +       "    r                        (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n",
    +       "    t                        (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n",
            "Attributes:\n",
    -       "    param:        t\n",
            "    class:        od\n",
            "    stream:       oper\n",
            "    levtype:      pl\n",
            "    type:         fc\n",
            "    expver:       0001\n",
    +       "    date:         20240603\n",
    +       "    time:         0\n",
            "    domain:       g\n",
            "    number:       0\n",
            "    Conventions:  CF-1.8\n",
    -       "    institution:  ECMWF
  • class :
    od
    stream :
    oper
    levtype :
    pl
    type :
    fc
    expver :
    0001
    date :
    20240603
    time :
    0
    domain :
    g
    number :
    0
    Conventions :
    CF-1.8
    institution :
    ECMWF
  • " ], "text/plain": [ " Size: 176kB\n", - "Dimensions: (forecast_reference_time: 4, step: 2, levelist: 2,\n", + "Dimensions: (forecast_reference_time: 4, step: 2, level: 2,\n", " latitude: 19, longitude: 36)\n", "Coordinates:\n", " * forecast_reference_time (forecast_reference_time) datetime64[ns] 32B 202...\n", " * step (step) timedelta64[ns] 16B 00:00:00 06:00:00\n", - " * levelist (levelist) int64 16B 500 700\n", + " * level (level) int64 16B 500 700\n", " * latitude (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n", " * longitude (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n", "Data variables:\n", - " r (forecast_reference_time, step, levelist, latitude, longitude) float64 88kB ...\n", - " t (forecast_reference_time, step, levelist, latitude, longitude) float64 88kB ...\n", + " r (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n", + " t (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n", "Attributes:\n", - " param: t\n", " class: od\n", " stream: oper\n", " levtype: pl\n", " type: fc\n", " expver: 0001\n", + " date: 20240603\n", + " time: 0\n", " domain: g\n", " number: 0\n", " Conventions: CF-1.8\n", " institution: ECMWF" ] }, - "execution_count": 4, + "execution_count": 1, "metadata": {}, "output_type": "execute_result" } @@ -518,7 +535,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 2, "id": "4898ceb3-4657-4397-b1d8-3bc1110b86eb", "metadata": { "editable": true, @@ -561,14 +578,14 @@ " --xr-background-color-row-odd: var(--jp-layout-color2, #eeeeee);\n", "}\n", "\n", - "html[theme=dark],\n", - "html[data-theme=dark],\n", - "body[data-theme=dark],\n", + "html[theme=\"dark\"],\n", + "html[data-theme=\"dark\"],\n", + "body[data-theme=\"dark\"],\n", "body.vscode-dark {\n", " --xr-font-color0: rgba(255, 255, 255, 1);\n", " --xr-font-color2: rgba(255, 255, 255, 0.54);\n", " --xr-font-color3: rgba(255, 255, 255, 0.38);\n", - " --xr-border-color: #1F1F1F;\n", + " --xr-border-color: #1f1f1f;\n", " --xr-disabled-color: #515151;\n", " --xr-background-color: #111111;\n", " --xr-background-color-row-even: #111111;\n", @@ -623,6 +640,7 @@ ".xr-section-item input {\n", " display: inline-block;\n", " opacity: 0;\n", + " height: 0;\n", "}\n", "\n", ".xr-section-item input + label {\n", @@ -659,7 +677,7 @@ "\n", ".xr-section-summary-in + label:before {\n", " display: inline-block;\n", - " content: '►';\n", + " content: \"►\";\n", " font-size: 11px;\n", " width: 15px;\n", " text-align: center;\n", @@ -670,7 +688,7 @@ "}\n", "\n", ".xr-section-summary-in:checked + label:before {\n", - " content: '▼';\n", + " content: \"▼\";\n", "}\n", "\n", ".xr-section-summary-in:checked + label > span {\n", @@ -742,15 +760,15 @@ "}\n", "\n", ".xr-dim-list:before {\n", - " content: '(';\n", + " content: \"(\";\n", "}\n", "\n", ".xr-dim-list:after {\n", - " content: ')';\n", + " content: \")\";\n", "}\n", "\n", ".xr-dim-list li:not(:last-child):after {\n", - " content: ',';\n", + " content: \",\";\n", " padding-right: 5px;\n", "}\n", "\n", @@ -914,30 +932,30 @@ " t500 (forecast_reference_time, step, latitude, longitude) float64 44kB ...\n", " t700 (forecast_reference_time, step, latitude, longitude) float64 44kB ...\n", "Attributes:\n", - " param: t\n", " class: od\n", " stream: oper\n", " levtype: pl\n", " type: fc\n", " expver: 0001\n", + " date: 20240603\n", + " time: 0\n", " domain: g\n", " number: 0\n", - " levelist: 700\n", " Conventions: CF-1.8\n", - " institution: ECMWF
  • class :
    od
    stream :
    oper
    levtype :
    pl
    type :
    fc
    expver :
    0001
    date :
    20240603
    time :
    0
    domain :
    g
    number :
    0
    Conventions :
    CF-1.8
    institution :
    ECMWF
  • " ], "text/plain": [ " Size: 176kB\n", @@ -954,20 +972,20 @@ " t500 (forecast_reference_time, step, latitude, longitude) float64 44kB ...\n", " t700 (forecast_reference_time, step, latitude, longitude) float64 44kB ...\n", "Attributes:\n", - " param: t\n", " class: od\n", " stream: oper\n", " levtype: pl\n", " type: fc\n", " expver: 0001\n", + " date: 20240603\n", + " time: 0\n", " domain: g\n", " number: 0\n", - " levelist: 700\n", " Conventions: CF-1.8\n", " institution: ECMWF" ] }, - "execution_count": 6, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -993,7 +1011,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 3, "id": "f55fe598-7f80-4e9a-87b3-43131f78f2e1", "metadata": { "editable": true, @@ -1003,6 +1021,20 @@ "tags": [] }, "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "3aaa4f09dc6e498c8df353d0bce3579e", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "mixed_pl_sfc.grib: 0%| | 0.00/390k [00:00 span {\n", @@ -1217,15 +1250,15 @@ "}\n", "\n", ".xr-dim-list:before {\n", - " content: '(';\n", + " content: \"(\";\n", "}\n", "\n", ".xr-dim-list:after {\n", - " content: ')';\n", + " content: \")\";\n", "}\n", "\n", ".xr-dim-list li:not(:last-child):after {\n", - " content: ',';\n", + " content: \",\";\n", " padding-right: 5px;\n", "}\n", "\n", @@ -1398,30 +1431,29 @@ " z700 (forecast_reference_time, step, latitude, longitude) float64 44kB ...\n", " z850 (forecast_reference_time, step, latitude, longitude) float64 44kB ...\n", "Attributes:\n", - " param: z\n", " class: od\n", " stream: oper\n", - " levtype: pl\n", " type: fc\n", " expver: 0001\n", + " date: 20240603\n", + " time: 0\n", " domain: g\n", " number: 0\n", - " levelist: 850\n", " Conventions: CF-1.8\n", - " institution: ECMWF
  • class :
    od
    stream :
    oper
    type :
    fc
    expver :
    0001
    date :
    20240603
    time :
    0
    domain :
    g
    number :
    0
    Conventions :
    CF-1.8
    institution :
    ECMWF
  • " ], "text/plain": [ " Size: 1MB\n", @@ -1447,20 +1479,19 @@ " z700 (forecast_reference_time, step, latitude, longitude) float64 44kB ...\n", " z850 (forecast_reference_time, step, latitude, longitude) float64 44kB ...\n", "Attributes:\n", - " param: z\n", " class: od\n", " stream: oper\n", - " levtype: pl\n", " type: fc\n", " expver: 0001\n", + " date: 20240603\n", + " time: 0\n", " domain: g\n", " number: 0\n", - " levelist: 850\n", " Conventions: CF-1.8\n", " institution: ECMWF" ] }, - "execution_count": 8, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -1501,7 +1532,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 4, "id": "e1bc4e80-2efc-460c-96b0-26553bed7591", "metadata": { "editable": true, @@ -1511,6 +1542,20 @@ "tags": [] }, "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "b3618ce82f674f7d9c28e3bc7cc87fa9", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "mixed_pl_sfc.grib: 0%| | 0.00/390k [00:00 span {\n", @@ -1725,15 +1771,15 @@ "}\n", "\n", ".xr-dim-list:before {\n", - " content: '(';\n", + " content: \"(\";\n", "}\n", "\n", ".xr-dim-list:after {\n", - " content: ')';\n", + " content: \")\";\n", "}\n", "\n", ".xr-dim-list li:not(:last-child):after {\n", - " content: ',';\n", + " content: \",\";\n", " padding-right: 5px;\n", "}\n", "\n", @@ -1906,30 +1952,29 @@ " z_700_pl (forecast_reference_time, step, latitude, longitude) float64 44kB ...\n", " z_850_pl (forecast_reference_time, step, latitude, longitude) float64 44kB ...\n", "Attributes:\n", - " param: z\n", " class: od\n", " stream: oper\n", - " levtype: pl\n", " type: fc\n", " expver: 0001\n", + " date: 20240603\n", + " time: 0\n", " domain: g\n", " number: 0\n", - " levelist: 850\n", " Conventions: CF-1.8\n", - " institution: ECMWF
  • class :
    od
    stream :
    oper
    type :
    fc
    expver :
    0001
    date :
    20240603
    time :
    0
    domain :
    g
    number :
    0
    Conventions :
    CF-1.8
    institution :
    ECMWF
  • " ], "text/plain": [ " Size: 1MB\n", @@ -1955,20 +2000,19 @@ " z_700_pl (forecast_reference_time, step, latitude, longitude) float64 44kB ...\n", " z_850_pl (forecast_reference_time, step, latitude, longitude) float64 44kB ...\n", "Attributes:\n", - " param: z\n", " class: od\n", " stream: oper\n", - " levtype: pl\n", " type: fc\n", " expver: 0001\n", + " date: 20240603\n", + " time: 0\n", " domain: g\n", " number: 0\n", - " levelist: 850\n", " Conventions: CF-1.8\n", " institution: ECMWF" ] }, - "execution_count": 10, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -1990,12 +2034,12 @@ "tags": [] }, "source": [ - "This technique is partuculary useful when the same parameter is available on multiple level types in the input data. In this case using \"param_level\" does not result in a full hypercube, however the same `remapping`` that we used above does." + "This technique is partuculary useful when the same parameter is available on multiple level types in the input data. In this case using \"param_level\" does not result in a full hypercube, however the same ``remapping`` that we used above does." ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 5, "id": "39cbb360-43c4-416e-956b-8b6cfadda26c", "metadata": { "editable": true, @@ -2005,6 +2049,20 @@ "tags": [] }, "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "ad9990991ee44e4b81fad30f4cef90e4", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "mixed_pl_ml.grib: 0%| | 0.00/176k [00:00 span {\n", @@ -2219,15 +2278,15 @@ "}\n", "\n", ".xr-dim-list:before {\n", - " content: '(';\n", + " content: \"(\";\n", "}\n", "\n", ".xr-dim-list:after {\n", - " content: ')';\n", + " content: \")\";\n", "}\n", "\n", ".xr-dim-list li:not(:last-child):after {\n", - " content: ',';\n", + " content: \",\";\n", " padding-right: 5px;\n", "}\n", "\n", @@ -2395,29 +2454,28 @@ " u_700_pl (forecast_reference_time, step, latitude, longitude) float64 44kB ...\n", " u_90_ml (forecast_reference_time, step, latitude, longitude) float64 44kB ...\n", "Attributes:\n", - " param: u\n", " class: od\n", " stream: oper\n", - " levtype: ml\n", " type: fc\n", " expver: 0001\n", + " date: 20240603\n", + " time: 0\n", " domain: g\n", - " levelist: 90\n", " Conventions: CF-1.8\n", - " institution: ECMWF
  • class :
    od
    stream :
    oper
    type :
    fc
    expver :
    0001
    date :
    20240603
    time :
    0
    domain :
    g
    Conventions :
    CF-1.8
    institution :
    ECMWF
  • " ], "text/plain": [ " Size: 351kB\n", @@ -2438,19 +2496,18 @@ " u_700_pl (forecast_reference_time, step, latitude, longitude) float64 44kB ...\n", " u_90_ml (forecast_reference_time, step, latitude, longitude) float64 44kB ...\n", "Attributes:\n", - " param: u\n", " class: od\n", " stream: oper\n", - " levtype: ml\n", " type: fc\n", " expver: 0001\n", + " date: 20240603\n", + " time: 0\n", " domain: g\n", - " levelist: 90\n", " Conventions: CF-1.8\n", " institution: ECMWF" ] }, - "execution_count": 11, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -2478,9 +2535,9 @@ ], "metadata": { "kernelspec": { - "display_name": "dev_ecc", + "display_name": "dev", "language": "python", - "name": "dev_ecc" + "name": "dev" }, "language_info": { "codemirror_mode": { @@ -2492,7 +2549,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.13" + "version": "3.11.12" } }, "nbformat": 4, diff --git a/docs/guide/xarray/dim.rst b/docs/guide/xarray/dim.rst new file mode 100644 index 000000000..ef5f8adce --- /dev/null +++ b/docs/guide/xarray/dim.rst @@ -0,0 +1,110 @@ +.. _xr_dim: + +Dimensions +================== + +One of the most important aspect of the :ref:`xr_engine` is how it generates dimensions in the Xarray dataset with :py:meth:`~data.readers.grib.index.GribFieldList.to_xarray`. + +.. _xr_dim_roles: +.. _xr_predefined_dims: + + +Predefined dimensions and dimension roles +------------------------------------------- + +By default, a list of predefined dimensions are generated. Their order is fixed: + +- ensemble forecast member dimension +- temporal dimensions (controlled by ``time_dim_mode``) +- vertical dimensions (controlled by ``level_dim_mode``) + +The predefined dimensions are based on the ``dim_roles``, which is a mapping between the "roles" and the metadata keys associated with the roles. +The possible roles are as follows: + +.. list-table:: Default dimension roles + :header-rows: 1 + + * - Dimension role + - Description + - Key (profile: :ref:`mars `) + - Key (profile: :ref:`grib `) + * - "number" + - metadata key interpreted as ensemble forecast members + - "number" + - "number" + * - "date" + - metadata key interpreted as date part of the "forecast_reference_time" + - "date" + - "date" + * - "time" + - metadata key interpreted as time part of the "forecast_reference_time" + - "time" + - "time" + * - "step" + - metadata key interpreted as forecast step + - "step_timedelta" + - "step_timedelta" + * - "forecast_reference_time" + - if not specified or None or empty the forecast reference time is built using the "date" and "time" roles + - None + - None + * - "valid_time" + - if not specified or None or empty the valid time is built using the "validityDate" and "validityTime" metadata keys + - None + - None + * - "level" + - metadata key interpreted as level + - "levelist" + - "level" + * - "level_type" + - metadata key interpreted as level type + - "levtype" + - "typeOfLevel" + +By default, the dimension names are the same as the role names. To use the associated metadata keys instead use the ``dim_name_from_role_name=False`` option. + +the metadata keys. However, this can be controlled with the ``dim_name_from_role_name`` option. If set to ``False``, the dimension names will be the same as the dimension roles. This is useful when you want to use the dimension roles in your code, as they are more descriptive than the metadata keys. + +.. note:: + + For GRIB data, "step_timedelta" is a generated metadata key (by earthkit-data), which is the representation of the value of the "endStep" key as a `datetime.timedelta`. + + +Dimension modes +---------------------- + +The ``time_dim_mode`` and ``level_dim_mode`` options control how the temporal and vertical dimensions are generated in the Xarray dataset using ``dim_roles``. See the following notebooks for examples of how these modes work: + +``time_dim_mode``: + +- :ref:`/examples/xr_engine_temporal.ipynb` +- :ref:`/examples/xr_engine_seasonal.ipynb` + + +``level_dim_mode``: +- :ref:`/examples/xr_engine_level.ipynb` + + +Squeezing/ensuring dimensions +---------------------------------- + +By default, the dimensions are squeezed. This means that if a dimension has only one value, it is removed from the dataset. This can be controlled with the ``squeeze`` option. Alternatively, the ``ensure_dims`` option can be used to ensure that certain dimensions are always present in the dataset, even if they have only one value. This is useful when you want to keep the dimensions for consistency or for further processing. + +See the following notebooks for examples of how this works: + +- :ref:`/examples/xr_engine_squeeze.ipynb` + + +Extra dimensions +---------------------- + +The ``extra_dims`` option allows to add extra dimensions to the Xarray dataset on top of the predefined ones. E.g. + + + +Fixed dimensions +---------------------- + + +Split dimensions +---------------------- diff --git a/docs/guide/xarray/overview.rst b/docs/guide/xarray/overview.rst index aa8031cb6..57af5d94e 100644 --- a/docs/guide/xarray/overview.rst +++ b/docs/guide/xarray/overview.rst @@ -25,12 +25,12 @@ We can convert :ref:`grib` data into an Xarray dataset by using :py:meth:`~data. Coordinates: * forecast_reference_time (forecast_reference_time) datetime64[ns] 32B 202... * step (step) timedelta64[ns] 16B 00:00:00 06:00:00 - * levelist (levelist) int64 16B 500 700 + * level (level) int64 16B 500 700 * latitude (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0 * longitude (longitude) float64 288B 0.0 10.0 ... 340.0 350.0 Data variables: - r (forecast_reference_time, step, levelist, latitude, longitude) float64 88kB ... - t (forecast_reference_time, step, levelist, latitude, longitude) float64 88kB ... + r (forecast_reference_time, step, level, latitude, longitude) float64 88kB ... + t (forecast_reference_time, step, level, latitude, longitude) float64 88kB ... ... .. note:: @@ -51,6 +51,11 @@ We can also use the Xarray engine to read GRIB data directly with the :py:func:` Size: 176kB ... +Dimensions +++++++++++ + +The pivotal question when generating the Xarray dataset is how to form the dimensions. The :py:meth:`~data.readers.grib.index.GribFieldList.to_xarray` method has a number of options to control the dimensions. Please see more details in the :ref:`dimensions ` section. + Profiles +++++++++ diff --git a/docs/release_notes/deprecations.rst b/docs/release_notes/deprecations.rst index 6d142fd63..110bab34e 100644 --- a/docs/release_notes/deprecations.rst +++ b/docs/release_notes/deprecations.rst @@ -1,6 +1,34 @@ Deprecations ============= + +.. _deprecated-0.15.0: + +Version 0.15.0 +----------------- + +.. _deprecated-ens-dim-role: + +The "ens" dimension role has been renamed to "number" +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +The name of the ensemble member :ref:`dimension role <_xr_dim_roles>` changed to "number" from "ens" in the ``dim_roles`` option of :py:meth:`~data.readers.grib.index.GribFieldList.to_xarray`. The old name is still available for backward compatibility but will be removed in a future release. + +.. list-table:: + :header-rows: 0 + + * - Deprecated code + * - + + .. literalinclude:: include/deprec_ens_dim_role.py + + * - New code + * - + + .. literalinclude:: include/migrated_ens_dim_role.py + + + .. _deprecated-0.13.0: Version 0.13.0 diff --git a/docs/release_notes/include/deprec_ens_dim_role.py b/docs/release_notes/include/deprec_ens_dim_role.py new file mode 100644 index 000000000..965e63215 --- /dev/null +++ b/docs/release_notes/include/deprec_ens_dim_role.py @@ -0,0 +1,7 @@ +import earthkit.data as ekd + +ds_fl = ekd.from_source("sample", "ens_cf_pf.grib") + +ds = ds_fl.to_xarray( + dim_roles={"ens": "perturbationNumber"}, +) diff --git a/docs/release_notes/include/migrated_ens_dim_role.py b/docs/release_notes/include/migrated_ens_dim_role.py new file mode 100644 index 000000000..1c98f6af8 --- /dev/null +++ b/docs/release_notes/include/migrated_ens_dim_role.py @@ -0,0 +1,7 @@ +import earthkit.data as ekd + +ds_fl = ekd.from_source("sample", "ens_cf_pf.grib") + +ds = ds_fl.to_xarray( + dim_roles={"number": "perturbationNumber"}, +) diff --git a/docs/release_notes/version_0.15_updates.rst b/docs/release_notes/version_0.15_updates.rst index bc8a5d815..f53d1e020 100644 --- a/docs/release_notes/version_0.15_updates.rst +++ b/docs/release_notes/version_0.15_updates.rst @@ -5,13 +5,52 @@ Version 0.15 Updates Version 0.15.0 =============== +Deprecations ++++++++++++++++++++ + +- :ref:`deprecated-ens-dim-role` + Xarray engine ++++++++++++++++++++++++++++++ +Breaking changes +------------------- + +- Separated the dimension names from the metadata keys used to generate the dimensions. Dimensions associated with the dimension roles are now taking the name of the :ref:`dimension role <_xr_dim_roles>`, irrespective of the metadata key the dimension role is mapped to. E.g.: the "level_type" dimension role now generates a dimension called "level_type". Previously, the dimension name was the name of the associated metadata key: e.g. it was "levtype" in the :ref:`default ` profile. The old behaviour can still be invoked by using the newly added ``dim_name_from_role_name=False`` option. See: :py:meth:`~data.readers.grib.index.GribFieldList.to_xarray`. + + +- The ``step`` dimension role is now mapped to the ``step_timedelta`` metadata key, which is the ``datetime.timedelta`` representation of the ``"endStep"`` GRIB/metadata key. Previously, this role was mapped to the ``"step"`` key. Please note that due to this change when ``dim_name_from_role_name=False`` is used the step dimension will be called "step_timedelta" instead of "step". + + +Other changes +------------------- + +- Allowed using mappings in the ``extra_dims`` and ``fixed_dims`` options to define both the name of the dimensions and the metadata keys to generate their values. Previously, these options only took a single/multiple metadata keys. E.g. both the options below will generate the "expver", "mars_stream" and "mars_class" dimensions using the "expver", "stream" and "class" metadata keys. + + .. code-block:: python + + extra_dims = ["expver", {"mars_stream": "stream"}, ("mars_class", "class")] + extra_dims = { + "expver": "expver", + "mars_stream": "stream", + "mars_class": "class", + } + + - Improved the serialisation of GRIB fieldlists to reduce memory usage when Xarray is generated with chunks (:pr:`700`). See the :ref:`/examples/xarray_engine_chunks.ipynb` notebook example. - TensorBackendArray, which implements the lazy loading of DataArrays in the Xarray engine, now uses a ``dask.utils.SerializableLock`` when accessing the data (:pr:`700`). - Enabled converting :ref:`data-sources-lod` fieldlists into Xarray (:pr:`701`). See the :ref:`/examples/list_of_dicts_to_xarray.ipynb` notebook example. +New Xarray engine notebooks +------------------------------ + +- :ref:`/examples/xr_engine_step_range.ipynb` +- :ref:`/examples/xr_engine_ensemble.ipynb` +- :ref:`/examples/xr_engine_squeeze.ipynb` +- :ref:`/examples/xarray_engine_chunks.ipynb` +- :ref:`/examples/list_of_dicts_to_xarray.ipynb` + + New features +++++++++++++++++ diff --git a/src/earthkit/data/core/select.py b/src/earthkit/data/core/select.py index 2e6a1fdf2..104cc9c31 100644 --- a/src/earthkit/data/core/select.py +++ b/src/earthkit/data/core/select.py @@ -33,7 +33,7 @@ def normalize_selection(*args, **kwargs): or v is ALL or callable(v) or isinstance(v, (list, tuple, set, slice)) - or isinstance(v, (str, int, float, datetime.datetime)) + or isinstance(v, (str, int, float, datetime.datetime, datetime.timedelta)) ), f"Unsupported type: {type(v)} for key {k}" return _kwargs diff --git a/src/earthkit/data/indexing/tensor.py b/src/earthkit/data/indexing/tensor.py index 533f9d4f6..4e6460eb5 100644 --- a/src/earthkit/data/indexing/tensor.py +++ b/src/earthkit/data/indexing/tensor.py @@ -443,11 +443,38 @@ def _subset(self, indexes): ds = self.source[tuple(dataset_indexes)] return self.from_tensor(self, ds, coords) - def make_valid_datetime(self, dtype="datetime64[ns]"): + def make_valid_datetime(self, dims_map, dtype="datetime64[ns]"): # TODO: make it more general - dims_opt = [ - ["base_datetime", "step"], - ["base_datetime"], + + for k in ["valid_datetime", "valid_time"]: + if k in self.user_coords: + import datetime + + return (k,), [datetime.datetime.fromisoformat(x) for x in self.user_coords[k]] + + # in the tensor the dims.coords are GRIB keys + # dims_map is a mapping from dim names to GRIB keys + DIM_ROLES = { + "forecast_reference_time": ("forecast_reference_time", "base_datetime"), + "step": ("step_timedelta", "step", "ensStep", "stepRange"), + "date": ("date", "dataDate"), + "time": ("time", "dataTime"), + } + + # map dim roles to keys available in the tensor + keys = {} + for k in DIM_ROLES: + for d in dims_map: + if d.name == k: + keys[k] = d.key + break + if k not in keys: + for d in self.user_dims: + if d in DIM_ROLES[k]: + keys[k] = d + break + + DIM_COMBINATIONS = [ ["forecast_reference_time", "step"], ["forecast_reference_time"], ["date", "time", "step"], @@ -457,19 +484,14 @@ def make_valid_datetime(self, dtype="datetime64[ns]"): ["step"], ] - for k in ["valid_datetime", "valid_time"]: - if k in self.user_coords: - import datetime - - return (k,), [datetime.datetime.fromisoformat(x) for x in self.user_coords[k]] - - # print(f"{self.user_dims=}") - for dims in dims_opt: - if all(d in self.user_dims for d in dims): + for dims in DIM_COMBINATIONS: + if all(d in keys for d in dims): + dims_step = [keys[d] for d in dims] # use same dim order as in user_dims - dims = [d for d in dims if d in self.user_dims] + dims = [d for d in self.user_dims if d in dims_step] + assert len(dims) == len(dims_step), f"Duplicate dims in {dims}" other_dims = [d for d in self.user_dims if d not in dims] - # print(f"{dims=} {other_dims=}") + if other_dims: import datetime diff --git a/src/earthkit/data/readers/grib/metadata.py b/src/earthkit/data/readers/grib/metadata.py index 9de63e1cf..5f3bc2e51 100644 --- a/src/earthkit/data/readers/grib/metadata.py +++ b/src/earthkit/data/readers/grib/metadata.py @@ -565,7 +565,10 @@ def indexing_datetime(self): return self._datetime("indexingDate", "indexingTime") def step_timedelta(self): - return to_timedelta(self.get("step", None)) + v = self.get("endStep", None) + if v is None: + v = self.get("step", None) + return to_timedelta(v) def _datetime(self, date_key, time_key): date = self.get(date_key, None) diff --git a/src/earthkit/data/readers/grib/xarray.py b/src/earthkit/data/readers/grib/xarray.py index 84837d546..dcc6405d3 100644 --- a/src/earthkit/data/readers/grib/xarray.py +++ b/src/earthkit/data/readers/grib/xarray.py @@ -112,19 +112,62 @@ def to_xarray(self, engine="earthkit", xarray_open_dataset_kwargs=None, **kwargs A variable or list of variables to drop from the dataset. Default is None. * rename_variables: dict, None Mapping to rename variables. Default is None. - * extra_dims: str, or iterable of str, None - Metadata key or list of metadata keys to be used as additional dimensions on top of the - predefined dimensions. Only enabled when no ``fixed_dims`` is specified. Default is None. + * extra_dims: str, or iterable of str, None + Define additional dimensions on top of the predefined dimensions. Only enabled when no + ``fixed_dims`` is specified. Default is None. It can be a single item or a list. Each + item is either a metadata key, or a dict/tuple defining mapping between the dimension + name and the metadata key. The whole option can be a dict. E.g. + + .. code-block:: python + + # use key "expver" as a dimension + extra_dims = "expver" + # use keys "expver" and "steam" as a dimension + extra_dims = ["expver", "stream"] + # define dimensions "expver", mars_stream" and "mars_type" from + # metadata keys "expver", "stream" and "type" + extra_dims = [ + "expver", + {"mars_stream": "stream"}, + ("mars_type", "type"), + ] + extra_dims = [ + { + "expver": "expver", + "mars_stream": "stream", + "mars_type": "type", + } + ] + * drop_dims: str, or iterable of str, None - Metadata key or list of metadata keys to be ignored as dimensions. Default is None. + Single or multiple dimensions to be ignored. Default is None. Default is None. * ensure_dims: str, or iterable of str, None - Metadata key or list of metadata keys that should be used as dimensions even - when ``squeeze=True``. Default is None. + Dimension or dimensions that should be kept even when ``squeeze=True`` and their size + is only 1. Default is None. * fixed_dims: str, or iterable of str, None - Metadata key or list of metadata keys in the order they should be used as dimensions. When - defined no other dimensions will be used. Might be incompatible with other settings. - Default is None. + Define all the dimensions to be generated. When used no other dimensions will be created. + Might be incompatible with other settings. Default is None. It can be a single item or a list. + Each item is either a metadata key, or a dict/tuple defining mapping between the dimension + name and the metadata key. The whole option can be a dict. E.g.: + + .. code-block:: python + + # use key "step" as a dimension + fixed_dims = "step" + # use keys "step" and "levelist" as a dimension + extra_dims = ["step", "levelist"] + # define dimensions "step", level" and "level_type" from + # metadata keys "step", "levelist" and "levtype" + extra_dims = [ + "step", + {"level": "levelist"}, + ("level_type", "levtype"), + ] + extra_dims = [ + {"step": "step", "level": "levelist", "level_type": "levtype"} + ] + * dim_roles: dict, None Specify the "roles" used to form the predefined dimensions. The predefined dimensions are automatically generated when no ``fixed_dims`` specified and comprise the following @@ -137,7 +180,7 @@ def to_xarray(self, engine="earthkit", xarray_open_dataset_kwargs=None, **kwargs ``dim_roles`` is a mapping between the "roles" and the metadata keys representing the roles. The possible roles are as follows: - - "ens": metadata key interpreted as ensemble forecast members + - "number": metadata key interpreted as ensemble forecast members - "date": metadata key interpreted as date part of the "forecast_reference_time" - "time": metadata key interpreted as time part of the "forecast_reference_time" - "step": metadata key interpreted as forecast step @@ -153,7 +196,7 @@ def to_xarray(self, engine="earthkit", xarray_open_dataset_kwargs=None, **kwargs .. code-block:: python { - "ens": "number", + "number": "number", "date": "dataDate", "time": "dataTime", "step": "step", @@ -166,6 +209,11 @@ def to_xarray(self, engine="earthkit", xarray_open_dataset_kwargs=None, **kwargs ``dims_roles`` behaves differently to the other kwargs in the sense that it does not override but update the default values. So e.g. to change only "ens" in the defaults it is enough to specify: "dim_roles={"ens": "perturbationNumber"}. + * dim_name_from_role_name: bool, None + If True, the dimension names are formed from the role names. Otherwise the + dimension names are formed from the metadata keys specified in ``dim_roles``. + Its default value (None) expands to True unless the ``profile`` overwrites it. + Only used when no `fixed_dims`` are specified. *New in version 0.15.0*. * rename_dims: dict, None Mapping to rename dimensions. Default is None. * dims_as_attrs: str, or iterable of str, None diff --git a/src/earthkit/data/utils/xarray/builder.py b/src/earthkit/data/utils/xarray/builder.py index f752ab3e3..bd4def9b9 100644 --- a/src/earthkit/data/utils/xarray/builder.py +++ b/src/earthkit/data/utils/xarray/builder.py @@ -297,7 +297,7 @@ def collect_date_coords(self, tensor): ): from .coord import Coord - _dims, _vals = tensor.make_valid_datetime() + _dims, _vals = tensor.make_valid_datetime(self.dims) if _dims is not None and _vals is not None: self.tensor_coords["valid_time"] = Coord.make("valid_time", _vals, dims=_dims) @@ -318,8 +318,13 @@ def build(self): # build dataset dataset = xarray.Dataset(xr_vars, coords=xr_coords, attrs=xr_attrs) - if self.profile.rename_dims_map(): - dataset = dataset.rename(self.profile.rename_dims_map()) + dataset = self.profile.rename_dataset_dims(dataset) + + # dim_map = self.profile.rename_dims_map() + # if dim_map: + # d = {k: v for k, v in dim_map.items() if k in dataset.dims} + # if d: + # dataset = dataset.rename(d) if "source" not in dataset.encoding: dataset.encoding["source"] = None @@ -544,7 +549,7 @@ def parse(self, ds, profile=None, full=False): # LOG.debug(f"{remapping=}") # LOG.debug(f"{profile.remapping=}") - # LOG.debug(f"{profile.index_keys=}") + LOG.debug(f"{profile.index_keys=}") # create a new fieldlist for optimised access to unique values ds_xr = XArrayInputFieldList( diff --git a/src/earthkit/data/utils/xarray/coord.py b/src/earthkit/data/utils/xarray/coord.py index 1268e0118..32950fdcd 100644 --- a/src/earthkit/data/utils/xarray/coord.py +++ b/src/earthkit/data/utils/xarray/coord.py @@ -168,7 +168,10 @@ def attrs(self, name, profile): class MonthCoord(Coord): - pass + def attrs(self, name, profile): + attrs = super().attrs(name, profile) + attrs["units"] = "months" + return attrs class LevelCoord(Coord): diff --git a/src/earthkit/data/utils/xarray/defaults.yaml b/src/earthkit/data/utils/xarray/defaults.yaml index c4c4d1ccb..0aaa1ae9f 100644 --- a/src/earthkit/data/utils/xarray/defaults.yaml +++ b/src/earthkit/data/utils/xarray/defaults.yaml @@ -43,7 +43,7 @@ strict: false errors: raise dim_roles: - ens: number + number: number date: date time: time step: step @@ -52,6 +52,8 @@ dim_roles: level: level level_type: typeOfLevel +dim_name_from_role_name: true + coord_attrs: latitude: units: degrees_north diff --git a/src/earthkit/data/utils/xarray/diff.py b/src/earthkit/data/utils/xarray/diff.py index 1493c7689..8d4bb2a4f 100644 --- a/src/earthkit/data/utils/xarray/diff.py +++ b/src/earthkit/data/utils/xarray/diff.py @@ -7,6 +7,7 @@ # nor does it submit to any jurisdiction. # +import datetime import logging import math @@ -69,6 +70,8 @@ def _compare(v1, v2): return math.isclose(v1, v2, rel_tol=1e-9), ListDiff.VALUE_DIFF elif isinstance(v1, str) and isinstance(v2, str): return v1 == v2, ListDiff.VALUE_DIFF + elif isinstance(v1, datetime.timedelta) and isinstance(v2, datetime.timedelta): + return v1 == v2, ListDiff.VALUE_DIFF elif type(v1) is not type(v2): return False, ListDiff.TYPE_DIFF else: diff --git a/src/earthkit/data/utils/xarray/dim.py b/src/earthkit/data/utils/xarray/dim.py index b79aab9d3..9fd9c8cfa 100644 --- a/src/earthkit/data/utils/xarray/dim.py +++ b/src/earthkit/data/utils/xarray/dim.py @@ -42,7 +42,7 @@ class ParamLevelKey(CompoundKey): LEVEL_TYPE_KEYS = ["typeOfLevel", "levtype"] DATE_KEYS = ["date", "andate", "validityDate", "dataDate", "hdate", "referenceDate", "indexingDate"] TIME_KEYS = ["time", "antime", "validityTime", "dataTime", "referenceTime", "indexingTime"] -STEP_KEYS = ["step", "endStep", "stepRange", "forecastMonth", "fcmonth"] +STEP_KEYS = ["step_timedelta", "step", "endStep", "stepRange", "forecastMonth", "fcmonth"] MONTH_KEYS = ["forecastMonth", "fcmonth"] VALID_DATETIME_KEYS = ["valid_time", "valid_datetime"] BASE_DATETIME_KEYS = [ @@ -89,15 +89,18 @@ def find_alias(key, drop=None): return r -def make_dim(owner, name, *args, **kwargs): - if name in PREDEFINED_DIMS: - return PREDEFINED_DIMS[name](owner, *args, key=name, **kwargs) +def make_dim(owner, *args, name=None, key=None, **kwargs): + predef_key = key or name + + if predef_key in PREDEFINED_DIMS: + return PREDEFINED_DIMS[predef_key](owner, *args, name=name, key=key, **kwargs) ck = CompoundKey.make(name) if ck is not None: d = CompoundKeyDim(owner, ck) else: - d = OtherDim(owner, name, *args, **kwargs) + # print("args", args, "kwargs", kwargs, "name", name, "key", key) + d = OtherDim(owner, *args, name=name, key=key, **kwargs) return d @@ -123,6 +126,7 @@ class Dim: name = None key = None + label = None alias = None drop = None enforce_unique = False @@ -233,38 +237,43 @@ class NumberDim(Dim): class DateDim(Dim): name = "date" - drop = get_keys(DATE_KEYS + DATETIME_KEYS, drop=name) + drop = get_keys(DATE_KEYS + DATETIME_KEYS, drop="date") class TimeDim(Dim): name = "time" - drop = get_keys(TIME_KEYS + DATETIME_KEYS, drop=name) + drop = get_keys(TIME_KEYS + DATETIME_KEYS, drop="time") + + +# class StepDim(Dim): +# name = "step" +# drop = get_keys(STEP_KEYS + VALID_DATETIME_KEYS, drop="step") class StepDim(Dim): name = "step" - drop = get_keys(STEP_KEYS + VALID_DATETIME_KEYS, drop=name) + drop = get_keys(STEP_KEYS + VALID_DATETIME_KEYS, drop=["step_timedelta"]) class ValidTimeDim(Dim): name = "valid_time" - drop = get_keys(DATE_KEYS + TIME_KEYS + DATETIME_KEYS, drop=name) + drop = get_keys(DATE_KEYS + TIME_KEYS + DATETIME_KEYS, drop="valid_time") class ForecastRefTimeDim(Dim): name = "forecast_reference_time" - drop = get_keys(DATE_KEYS + TIME_KEYS + DATETIME_KEYS, drop=name) + drop = get_keys(DATE_KEYS + TIME_KEYS + DATETIME_KEYS, drop="forecast_reference_time") alias = ["base_datetime"] class IndexingTimeDim(Dim): name = "indexing_time" - drop = get_keys(DATE_KEYS + TIME_KEYS + DATETIME_KEYS, drop=name) + drop = get_keys(DATE_KEYS + TIME_KEYS + DATETIME_KEYS, drop="indexing_time") class ReferenceTimeDim(Dim): name = "reference_time" - drop = get_keys(DATE_KEYS + TIME_KEYS + DATETIME_KEYS, drop=name) + drop = get_keys(DATE_KEYS + TIME_KEYS + DATETIME_KEYS, drop="reference_time") class CustomForecastRefDim(Dim): @@ -378,13 +387,41 @@ class OtherDim(Dim): pass +class DimRole: + NAMES = ("number", "date", "time", "step", "level", "level_type", "forecast_reference_time", "valid_time") + + def __init__(self, d, name_as_key=True): + self.d = d + self.name_as_key = name_as_key + + if "ens" in d: + import warnings + + warnings.warn("'ens' key in dim_roles is deprecated. Use 'number' instead", DeprecationWarning) + self.d["number"] = self.d.pop("ens") + + for k in d: + if k not in self.NAMES: + raise ValueError(f"Invalid dim role name={k}. Must be one of {self.NAMES}") + + def role(self, name, default=None, raise_error=True): + if name in self.d: + return self.d[name], name if self.name_as_key else self.d[name] + if default is not None: + return default + if raise_error: + raise ValueError(f"Dim role {name} not found in {self.d}") + else: + return default, default + + class DimMode: - default = [] + default = {} # maps key to name def build(self, profile, owner, active=True, dims=None): if not dims: dims = self.default - return {name: make_dim(owner, name, active=active) for name in dims} + return {name: make_dim(owner, name=name, key=key, active=active) for name, key in dims.items()} class ForecastTimeDimMode(DimMode): @@ -393,25 +430,26 @@ class ForecastTimeDimMode(DimMode): TIMES = ["time", "dataTime"] def build(self, profile, owner, active=True): - ref_time = owner.dim_roles.get("forecast_reference_time", None) - if ref_time == "forecast_reference_time": - ref_time_dim = ForecastRefTimeDim(owner, active=active) - elif ref_time: - ref_time_dim = make_dim(owner, ref_time, active=active) + ref_time_key, ref_time_name = owner.dim_roles.role("forecast_reference_time", raise_error=False) + + if ref_time_key == "forecast_reference_time": + ref_time_dim = ForecastRefTimeDim(owner, name=ref_time_name, active=active) + elif ref_time_key: + ref_time_dim = make_dim(owner, name=ref_time_name, key=ref_time_key, active=active) else: - date = owner.dim_roles["date"] - time = owner.dim_roles["time"] + date, _ = owner.dim_roles.role("date") + time, _ = owner.dim_roles.role("time") built_in = date in self.DATES and time in self.TIMES if built_in: - ref_time_dim = ForecastRefTimeDim(owner, active=active) + ref_time_dim = ForecastRefTimeDim(owner, name=ref_time_name, active=active) else: - ref_time_dim = CustomForecastRefDim(owner, [date, time], active=active) + ref_time_dim = CustomForecastRefDim(owner, [date, time], name=ref_time_name, active=active) - step = owner.dim_roles["step"] - step_dim = make_dim(owner, step, active=active) + step_key, step_name = owner.dim_roles.role("step") + step_dim = make_dim(owner, name=step_name, key=step_key, active=active) - self.register_ref_time_key(ref_time_dim.name) - self.register_step_key(step_dim.name) + self.register_ref_time_key(ref_time_dim.key) + self.register_step_key(step_dim.key) return {d.name: d for d in [ref_time_dim, step_dim]} @@ -429,30 +467,33 @@ def register_step_key(self, name): class ValidTimeDimMode(DimMode): name = "valid_time" - default = ["valid_time"] + default = {"valid_time": "valid_time"} class RawTimeDimMode(DimMode): name = "raw" - default = ["date", "time", "step"] def build(self, profile, owner, active=True): - date = owner.dim_roles["date"] - time = owner.dim_roles["time"] - step = owner.dim_roles["step"] - return super().build(profile, owner, active=active, dims=[date, time, step]) + dims = {} + for k in ["date", "time", "step"]: + key, name = owner.dim_roles.role(k) + dims[name] = key + return super().build(profile, owner, active=active, dims=dims) class LevelDimMode(DimMode): name = "level" def build(self, profile, owner, **kwargs): - level_key = owner.dim_roles["level"] - level_type_key = owner.dim_roles["level_type"] - return { - level_key: LevelDim(owner, key=level_key, **kwargs), - level_type_key: LevelTypeDim(owner, key=level_type_key, **kwargs), - } + # level + key, name = owner.dim_roles.role("level") + level_dim = LevelDim(owner, name=name, key=key, **kwargs) + + # level_type + key, name = owner.dim_roles.role("level_type") + level_type_dim = LevelTypeDim(owner, name=name, key=key, **kwargs) + + return {level_dim.key: level_dim, level_type_dim.key: level_type_dim} class LevelAndTypeDimMode(DimMode): @@ -460,8 +501,9 @@ class LevelAndTypeDimMode(DimMode): dim = LevelAndTypeDim def build(self, profile, owner, **kwargs): - level_key = owner.dim_roles["level"] - level_type_key = owner.dim_roles["level_type"] + + level_key, _ = owner.dim_roles.role("level") + level_type_key, _ = owner.dim_roles.role("level_type") return {self.name: self.dim(owner, level_key, level_type_key, **kwargs)} @@ -486,8 +528,8 @@ class NumberDimBuilder(DimBuilder): name = "number" def __init__(self, profile, owner): - ens_key = owner.dim_roles["ens"] - self.used = {self.name: NumberDim(owner, key=ens_key)} + key, name = owner.dim_roles.role("number") + self.used = {self.name: NumberDim(owner, name=name, key=key)} class TimeDimBuilder(DimBuilder): @@ -523,7 +565,23 @@ def __init__(self, profile, owner): DIM_BUILDERS = {v.name: v for v in [NumberDimBuilder, TimeDimBuilder, LevelDimBuilder]} -class Dims: +def ensure_dim_map(d): + if isinstance(d, dict): + return d + d = ensure_iterable(d) + r = {} + for k in d: + if isinstance(k, str): + r[k] = k + elif isinstance(k, tuple) and len(k) == 2: + r[k[0]] = k[1] + elif isinstance(k, dict): + for kk, vv in k.items(): + r[kk] = vv + return r + + +class DimHandler: def __init__( self, profile, @@ -534,6 +592,7 @@ def __init__( split_dims, rename_dims, dim_roles, + dim_name_from_role_name, dims_as_attrs, time_dim_mode, level_dim_mode, @@ -542,11 +601,12 @@ def __init__( self.profile = profile - self.dim_roles = dim_roles - self.extra_dims = ensure_iterable(extra_dims) + self.dim_roles = DimRole(dim_roles, name_as_key=dim_name_from_role_name) + # self.dim_name_from_role_name = dim_name_from_role_name + self.extra_dims = ensure_dim_map(extra_dims) self.drop_dims = ensure_iterable(drop_dims) self.ensure_dims = ensure_iterable(ensure_dims) - self.fixed_dims = ensure_iterable(fixed_dims) + self.fixed_dims = ensure_dim_map(fixed_dims) self.split_dims = ensure_iterable(split_dims) self.rename_dims_map = ensure_dict(rename_dims) self.dims_as_attrs = list(ensure_iterable(dims_as_attrs)) @@ -554,6 +614,19 @@ def __init__( self.level_dim_mode = level_dim_mode self.squeeze = squeeze + # if "ens" in self.dim_roles: + # Warning.deprecated("'ens' key in dim_roles is deprecated. Use 'number' instead") + # self.dim_roles["number"] = self.dim_roles.pop("ens") + + # if self.dim_name_from_role_name: + # d = {v: k for k, v in self.dim_roles.items()} + # for k in list(self.rename_dims_map.keys()): + # if k in self.dim_roles: + # d[self.dim_roles[k]] = self.rename_dims_map.pop(k) + + # d.update(self.rename_dims_map) + # self.rename_dims_map = d + self.var_key_dim = None if self.fixed_dims: @@ -594,6 +667,28 @@ def __init__( self.dims = dims + # LOG.debug(f"self.dims={self.dims}") + + # for d in self.dims.values(): + # if d.name != d.key: + # if d.name in self.rename_dims_map: + # self.rename_dims_map[d.key] = d.name + # else d + # if d.key not in self.rename_dims_map: + # self.rename_dims_map[d.key] = d.name + # else d + + # if self.dim_name_from_role_name: + # d = {v: k for k, v in self.dim_roles.items()} + # for k in list(self.rename_dims_map.keys()): + # if k in self.dim_roles: + # d[self.dim_roles[k]] = self.rename_dims_map.pop(k) + + # d.update(self.rename_dims_map) + # self.rename_dims_map = d + + self.var_key_dim = None + # ensure all the required keys are in the profile keys = [] for d in self.dims.values(): @@ -640,8 +735,9 @@ def _init_fixed_dims(self): # ) # ) - self.ensure_dims = [k for k in self.fixed_dims] - dims = {k: make_dim(self, name=k) for k in self.fixed_dims} + # self.ensure_dims = [k for k in self.fixed_dims] + self.ensure_dims = list(self.fixed_dims.keys()) + dims = {k: make_dim(self, name=k, key=v) for k, v in self.fixed_dims.items()} return dims def _init_dims(self): @@ -667,7 +763,7 @@ def _remove_duplicates(keys): var_keys = [self.profile.variable_key] # non-core dims - keys = self.extra_dims + self.ensure_dims + keys = list(self.extra_dims.keys()) + self.ensure_dims keys = _remove_duplicates(keys) remapping_dims = self._init_remapping_dims(keys) @@ -806,6 +902,23 @@ def get_dims(self, names): r.append(make_dim(self, name=name)) return r + def rename_dataset_dims(self, dataset): + # first rename the dimensions where the name and key are different + mapping = {} + for d in self.dims.values(): + if d.key in dataset.dims and d.name != d.key: + mapping[d.key] = d.name + if mapping: + dataset = dataset.rename(mapping) + + # then apply the user defined rename_dims_map + if self.rename_dims_map: + mapping = {k: v for k, v in self.rename_dims_map.items() if k in dataset.dims} + if mapping: + dataset = dataset.rename(mapping) + + return dataset + PREDEFINED_DIMS = {} for i, d in enumerate( diff --git a/src/earthkit/data/utils/xarray/engine.py b/src/earthkit/data/utils/xarray/engine.py index dd4f10d24..7a8789841 100644 --- a/src/earthkit/data/utils/xarray/engine.py +++ b/src/earthkit/data/utils/xarray/engine.py @@ -29,6 +29,7 @@ def open_dataset( ensure_dims=None, fixed_dims=None, dim_roles=None, + dim_name_from_role_name=None, rename_dims=None, dims_as_attrs=None, time_dim_mode=None, @@ -71,18 +72,61 @@ def open_dataset( rename_variables: dict, None Mapping to rename variables. Default is None. extra_dims: str, or iterable of str, None - Metadata key or list of metadata keys to be used as additional dimensions on top of the - predefined dimensions. Only enabled when no ``fixed_dims`` is specified. Default is None. + Define additional dimensions on top of the predefined dimensions. Only enabled when no + ``fixed_dims`` is specified. Default is None. It can be a single item or a list. Each + item is either a metadata key, or a dict/tuple defining mapping between the dimension + name and the metadata key. The whole option can be a dict. E.g. + + .. code-block:: python + + # use key "expver" as a dimension + extra_dims = "expver" + # use keys "expver" and "steam" as a dimension + extra_dims = ["expver", "stream"] + # define dimensions "expver", mars_stream" and "mars_type" from + # metadata keys "expver", "stream" and "type" + extra_dims = [ + "expver", + {"mars_stream": "stream"}, + ("mars_type", "type"), + ] + extra_dims = [ + { + "expver": "expver", + "mars_stream": "stream", + "mars_type": "type", + } + ] + drop_dims: str, or iterable of str, None - Metadata key or list of metadata keys to be ignored as dimensions. Default is None. + Single or multiple dimensions to be ignored. Default is None. Default is None. ensure_dims: str, or iterable of str, None - Metadata key or list of metadata keys that should be used as dimensions even - when ``squeeze=True``. Default is None. + Dimension or dimensions that should be kept even when ``squeeze=True`` and their size + is only 1. Default is None. fixed_dims: str, or iterable of str, None - Metadata key or list of metadata keys in the order they should be used as dimensions. When - defined no other dimensions will be used. Might be incompatible with other settings. - Default is None. + Define all the dimensions to be generated. When used no other dimensions will be created. + Might be incompatible with other settings. Default is None. It can be a single item or a list. + Each item is either a metadata key, or a dict/tuple defining mapping between the dimension + name and the metadata key. The whole option can be a dict. E.g. + + .. code-block:: python + + # use key "step" as a dimension + fixed_dims = "step" + # use keys "step" and "levelist" as a dimension + extra_dims = ["step", "levelist"] + # define dimensions "step", level" and "level_type" from + # metadata keys "step", "levelist" and "levtype" + extra_dims = [ + "step", + {"level": "levelist"}, + ("level_type", "levtype"), + ] + extra_dims = [ + {"step": "step", "level": "levelist", "level_type": "levtype"} + ] + dim_roles: dict, None Specify the "roles" used to form the predefined dimensions. The predefined dimensions are automatically generated when no ``fixed_dims`` specified and comprise the following @@ -95,7 +139,7 @@ def open_dataset( ``dim_roles`` is a mapping between the "roles" and the metadata keys representing the roles. The possible roles are as follows: - - "ens": metadata key interpreted as ensemble forecast members + - "number": metadata key interpreted as ensemble forecast members - "date": metadata key interpreted as date part of the "forecast_reference_time" - "time": metadata key interpreted as time part of the "forecast_reference_time" - "step": metadata key interpreted as forecast step @@ -111,7 +155,7 @@ def open_dataset( .. code-block:: python { - "ens": "number", + "number": "number", "date": "dataDate", "time": "dataTime", "step": "step", @@ -123,7 +167,12 @@ def open_dataset( ``dims_roles`` behaves differently to the other kwargs in the sense that it does not override but update the default values. So e.g. to change only "ens" in - the defaults it is enough to specify: "dim_roles={"ens": "perturbationNumber"}. + the defaults it is enough to specify: "dim_roles={"number": "perturbationNumber"}. + dim_name_from_role_name: bool, None + If True, the dimension names are formed from the role names. Otherwise the + dimension names are formed from the metadata keys specified in ``dim_roles``. + Its default value (None) expands to True unless the ``profile`` overwrites it. + Only used when no `fixed_dims`` are specified. *New in version 0.15.0*. rename_dims: dict, None Mapping to rename dimensions. Default is None. dims_as_attrs: str, or iterable of str, None @@ -266,6 +315,7 @@ def open_dataset( fixed_dims=fixed_dims, rename_dims=rename_dims, dim_roles=dim_roles, + dim_name_from_role_name=dim_name_from_role_name, dims_as_attrs=dims_as_attrs, time_dim_mode=time_dim_mode, level_dim_mode=level_dim_mode, diff --git a/src/earthkit/data/utils/xarray/fieldlist.py b/src/earthkit/data/utils/xarray/fieldlist.py index 474cd62e5..c4d6c441e 100644 --- a/src/earthkit/data/utils/xarray/fieldlist.py +++ b/src/earthkit/data/utils/xarray/fieldlist.py @@ -7,7 +7,7 @@ # nor does it submit to any jurisdiction. # - +import datetime import logging from collections import defaultdict @@ -210,7 +210,7 @@ def unique_values(self, names, component=False): for k, v in vals.items(): v = [x for x in v if x is not None] - if all(isinstance(x, int) for x in v): + if all(isinstance(x, (int, datetime.timedelta)) for x in v): vals[k] = sorted(v) else: vals[k] = sorted(v, key=str) diff --git a/src/earthkit/data/utils/xarray/grib.yaml b/src/earthkit/data/utils/xarray/grib.yaml index 9384a00ac..05d544e9e 100644 --- a/src/earthkit/data/utils/xarray/grib.yaml +++ b/src/earthkit/data/utils/xarray/grib.yaml @@ -1,8 +1,8 @@ dim_roles: - ens: number + number: number date: dataDate time: dataTime - step: step + step: step_timedelta level: level level_type: typeOfLevel diff --git a/src/earthkit/data/utils/xarray/mars.yaml b/src/earthkit/data/utils/xarray/mars.yaml index 1d17dcdbf..5c07bfed6 100644 --- a/src/earthkit/data/utils/xarray/mars.yaml +++ b/src/earthkit/data/utils/xarray/mars.yaml @@ -1,8 +1,8 @@ dim_roles: - ens: number + number: number date: date time: time - step: step + step: step_timedelta level: levelist level_type: levtype diff --git a/src/earthkit/data/utils/xarray/profile.py b/src/earthkit/data/utils/xarray/profile.py index 7e1bbd843..bdf8b60f8 100644 --- a/src/earthkit/data/utils/xarray/profile.py +++ b/src/earthkit/data/utils/xarray/profile.py @@ -96,7 +96,7 @@ def __init__( **kwargs, ): from .attrs import Attrs - from .dim import Dims + from .dim import DimHandler self._kwargs = dict(**kwargs) self.name = name @@ -116,7 +116,7 @@ def __init__( self.rename_variables_map = kwargs.pop("rename_variables") # dims - self.dims = Dims( + self.dims = DimHandler( self, kwargs.pop("extra_dims"), kwargs.pop("drop_dims"), @@ -125,6 +125,7 @@ def __init__( kwargs.pop("split_dims"), kwargs.pop("rename_dims"), kwargs.pop("dim_roles"), + kwargs.pop("dim_name_from_role_name"), kwargs.pop("dims_as_attrs"), kwargs.pop("time_dim_mode"), kwargs.pop("level_dim_mode"), @@ -346,3 +347,6 @@ def rename_dims_map(self): def rename_variable(self, v): return self.rename_variables_map.get(v, v) + + def rename_dataset_dims(self, dataset): + return self.dims.rename_dataset_dims(dataset) diff --git a/tests/xr_engine/test_xr_attrs.py b/tests/xr_engine/test_xr_attrs.py index 92f1add25..5639ecd72 100644 --- a/tests/xr_engine/test_xr_attrs.py +++ b/tests/xr_engine/test_xr_attrs.py @@ -9,6 +9,7 @@ # nor does it submit to any jurisdiction. # +import datetime import os import sys @@ -47,14 +48,15 @@ def _get_attrs_for_key_2(key, metadata): "decode_times": False, "decode_timedelta": False, "strict": True, + "dim_name_from_role_name": False, }, { "date": [20240603, 20240604], "time": [0, 1200], - "step": [0, 6], + "step_timedelta": [datetime.timedelta(hours=0), datetime.timedelta(hours=6)], "levelist": [500, 700], }, - {"date": 2, "time": 2, "step": 2, "levelist": 2}, + {"date": 2, "time": 2, "step_timedelta": 2, "levelist": 2}, {}, ), ( @@ -65,14 +67,15 @@ def _get_attrs_for_key_2(key, metadata): "decode_times": False, "decode_timedelta": False, "strict": True, + "dim_name_from_role_name": False, }, { "date": [20240603, 20240604], "time": [0, 1200], - "step": [0, 6], + "step_timedelta": [datetime.timedelta(hours=0), datetime.timedelta(hours=6)], "levelist": [500, 700], }, - {"date": 2, "time": 2, "step": 2, "levelist": 2}, + {"date": 2, "time": 2, "step_timedelta": 2, "levelist": 2}, {"levtype": 2}, ), ( @@ -83,14 +86,15 @@ def _get_attrs_for_key_2(key, metadata): "decode_times": False, "decode_timedelta": False, "strict": False, + "dim_name_from_role_name": False, }, { "date": [20240603, 20240604], "time": [0, 1200], - "step": [0, 6], + "step_timedelta": [datetime.timedelta(hours=0), datetime.timedelta(hours=6)], "levelist": [500, 700], }, - {"date": 2, "time": 2, "step": 2, "levelist": 2}, + {"date": 2, "time": 2, "step_timedelta": 2, "levelist": 2}, {}, ), ( @@ -101,14 +105,15 @@ def _get_attrs_for_key_2(key, metadata): "decode_times": False, "decode_timedelta": False, "strict": False, + "dim_name_from_role_name": False, }, { "date": [20240603, 20240604], "time": [0, 1200], - "step": [0, 6], + "step_timedelta": [datetime.timedelta(hours=0), datetime.timedelta(hours=6)], "levelist": [500, 700], }, - {"date": 2, "time": 2, "step": 2, "levelist": 2}, + {"date": 2, "time": 2, "step_timedelta": 2, "levelist": 2}, {"levtype": 2}, ), ], @@ -151,14 +156,15 @@ def test_xr_dims_as_attrs(kwargs, coords, dims, attrs): "decode_times": False, "decode_timedelta": False, "strict": False, + "dim_name_from_role_name": False, }, { "date": [20240603, 20240604], "time": [0, 1200], - "step": [0, 6], + "step_timedelta": [datetime.timedelta(hours=0), datetime.timedelta(hours=6)], "levelist": [500, 700], }, - {"date": 2, "time": 2, "step": 2, "levelist": 2}, + {"date": 2, "time": 2, "step_timedelta": 2, "levelist": 2}, { "shortName": "t", "levtype": "pl", diff --git a/tests/xr_engine/test_xr_dims.py b/tests/xr_engine/test_xr_dims.py index d3f4e3e97..d2fb5dc97 100644 --- a/tests/xr_engine/test_xr_dims.py +++ b/tests/xr_engine/test_xr_dims.py @@ -9,6 +9,7 @@ # nor does it submit to any jurisdiction. # +import datetime import os import sys @@ -116,22 +117,25 @@ def test_xr_dims_input_fieldlist(): @pytest.mark.parametrize( "kwargs,var_key,variables,dim_keys", [ - ({}, "param", ["r", "t"], ["step", "levelist"]), + ({}, "param", ["r", "t"], ["step_timedelta", "levelist"]), ( - {"time_dim_mode": "forecast"}, + {"time_dim_mode": "forecast", "dim_name_from_role_name": False}, "param", ["r", "t"], - ["step", "levelist"], + ["step_timedelta", "levelist"], ), ( - {"squeeze": False, "time_dim_mode": "raw"}, + {"squeeze": False, "time_dim_mode": "raw", "dim_name_from_role_name": False}, "param", ["r", "t"], - ["time", "step", "levelist"], + ["time", "step_timedelta", "levelist"], ), ], ) def test_xr_dims_ds_lev(kwargs, var_key, variables, dim_keys): + """Test for the internal profile/dimension object. Cannot use all the options since + many tasks are performed elsewhere in the engine.""" + # TODO: consider removing this test prof = Profile.make("mars", **kwargs) ds = load_wrapped_fieldlist(DS_LEV, prof) # prof.update(ds, _attributes(ds)) @@ -144,128 +148,144 @@ def test_xr_dims_ds_lev(kwargs, var_key, variables, dim_keys): @pytest.mark.parametrize( "kwargs,var_key,variables,dims", [ - # ({"time_dim_mode": "raw"}, "param", ["r", "t"], ["date", "time", "step", "levelist"]), ( - {"time_dim_mode": "forecast"}, + {"time_dim_mode": "forecast", "dim_name_from_role_name": False}, "param", ["r", "t"], - ["forecast_reference_time", "step", "levelist", "levtype"], + ["forecast_reference_time", "step_timedelta", "levelist", "levtype"], ), ( - {"time_dim_mode": "raw", "variable_key": "param_level"}, + {"time_dim_mode": "raw", "variable_key": "param_level", "dim_name_from_role_name": False}, "param_level", ["r1000", "r850", "t1000", "t850"], - ["date", "time", "step", "levtype"], + ["date", "time", "step_timedelta", "levtype"], ), - # ( - # {"time_dim_mode": "raw", "extra_dims": "param_level"}, - # "param_level", - # [ - # "r1000", - # "r850", - # "t1000", - # "t850", - # ], - # ["date"], - # ), ( { "time_dim_mode": "raw", "variable_key": "param_level", "remapping": {"param_level": "{param}_{level}"}, + "dim_name_from_role_name": False, }, "param_level", ["r_1000", "r_850", "t_1000", "t_850"], - ["date", "time", "step", "levtype"], + ["date", "time", "step_timedelta", "levtype"], ), ( - {"time_dim_mode": "raw", "variable_key": "shortName"}, + {"time_dim_mode": "raw", "variable_key": "shortName", "dim_name_from_role_name": False}, "shortName", ["r", "t"], - ["date", "time", "step", "levelist", "levtype"], + ["date", "time", "step_timedelta", "levelist", "levtype"], ), ( - {"time_dim_mode": "raw", "variable_key": "shortName", "drop_variables": ["r"]}, + { + "time_dim_mode": "raw", + "variable_key": "shortName", + "drop_variables": ["r"], + "dim_name_from_role_name": False, + }, "shortName", ["t"], - ["date", "time", "step", "levelist", "levtype"], + ["date", "time", "step_timedelta", "levelist", "levtype"], ), ( - {"time_dim_mode": "raw", "variable_key": "param_level", "drop_variables": ["r", "r1000"]}, + { + "time_dim_mode": "raw", + "variable_key": "param_level", + "drop_variables": ["r", "r1000"], + "dim_name_from_role_name": False, + }, "param_level", ["r850", "t1000", "t850"], [ "date", "time", - "step", + "step_timedelta", "levtype", ], ), - # ( - # {"use_level_per_type_dim": True}, - # "param", - # ["r", "t"], - # {"date": ["20210101", "20210102"], "level_per_type": ["850pl", "1000pl"]}, - # ), ( - {"time_dim_mode": "raw", "level_dim_mode": "level_and_type"}, + {"time_dim_mode": "raw", "level_dim_mode": "level_and_type", "dim_name_from_role_name": False}, "param", ["r", "t"], { "date": ["20210101", "20210102"], "time": ["12"], - "step": [0], + "step_timedelta": [datetime.timedelta(hours=0)], "level_and_type": ["1000pl", "850pl"], }, ), ( - {"time_dim_mode": "raw", "extra_dims": "class"}, + {"time_dim_mode": "raw", "extra_dims": "class", "dim_name_from_role_name": False}, "param", ["r", "t"], { "class": ["od"], "date": ["20210101", "20210102"], "time": ["12"], - "step": [0], + "step_timedelta": [datetime.timedelta(hours=0)], "levelist": [850, 1000], "levtype": ["pl"], }, ), ( - {"time_dim_mode": "raw", "ensure_dims": "class"}, + {"time_dim_mode": "raw", "ensure_dims": "class", "dim_name_from_role_name": False}, "param", ["r", "t"], { "class": ["od"], "date": ["20210101", "20210102"], "time": ["12"], + "step_timedelta": [datetime.timedelta(hours=0)], + "levelist": [850, 1000], + "levtype": ["pl"], + }, + ), + ( + {"time_dim_mode": "raw", "ensure_dims": ["class", "step"], "dim_name_from_role_name": False}, + "param", + ["r", "t"], + { + "class": ["od"], "step": [0], + "date": ["20210101", "20210102"], + "time": ["12"], + "step_timedelta": [datetime.timedelta(hours=0)], "levelist": [850, 1000], "levtype": ["pl"], }, ), ( - {"time_dim_mode": "raw", "ensure_dims": ["class", "step"]}, + { + "time_dim_mode": "raw", + "ensure_dims": ["class", "step_timedelta"], + "dim_name_from_role_name": False, + }, "param", ["r", "t"], { "class": ["od"], "date": ["20210101", "20210102"], "time": ["12"], - "step": [0], + "step_timedelta": [datetime.timedelta(hours=0)], "levelist": [850, 1000], "levtype": ["pl"], }, ), ( - {"time_dim_mode": "raw", "extra_dims": "class", "squeeze": False}, + { + "time_dim_mode": "raw", + "extra_dims": "class", + "squeeze": False, + "dim_name_from_role_name": False, + }, "param", ["r", "t"], { "class": ["od"], "date": ["20210101", "20210102"], "time": ["12"], - "step": [0], + "step_timedelta": [datetime.timedelta(hours=0)], "levelist": [850, 1000], "levtype": ["pl"], }, @@ -273,6 +293,9 @@ def test_xr_dims_ds_lev(kwargs, var_key, variables, dim_keys): ], ) def test_xr_dims_ds_date_lev(kwargs, var_key, variables, dims): + """Test for the internal profile/dimension object. Cannot use all the options since + many tasks are performed elsewhere in the engine.""" + # TODO: consider removing this test prof = Profile.make("mars", **kwargs) ds = load_wrapped_fieldlist(DS_DATE_LEV, prof, remapping=prof.remapping.build()) @@ -303,13 +326,16 @@ def test_xr_dims_ds_date_lev(kwargs, var_key, variables, dims): {"time_dim_mode": "raw"}, "param", ["2t", "msl", "r", "t"], - ["date", "time", "step", "levelist", "levtype"], + ["date", "time", "step_timedelta", "levelist", "levtype"], ), # ({"base_datetime_dim": True}, "param", ["r", "t"], ["levelist", "levtype"]), # ({"squeeze": False}, "param", ["r", "t"], ["time", "step", "levelist", "levtype"]), ], ) def test_xr_dims_ds_sfc_and_pl(kwargs, var_key, variables, dim_keys): + """Test for the internal profile/dimension object. Cannot use all the options since + many tasks are performed elsewhere in the engine.""" + # TODO: consider removing this test prof = Profile.make("mars", **kwargs) ds = load_wrapped_fieldlist(DS_DATE_SFC_PL, prof) # prof.update(ds, _attributes(ds)) @@ -324,7 +350,20 @@ def test_xr_dims_ds_sfc_and_pl(kwargs, var_key, variables, dim_keys): "kwargs,dim_keys", [ ( - {"profile": "mars", "time_dim_mode": "raw", "rename_dims": {"levelist": "zz"}}, + { + "profile": "mars", + "time_dim_mode": "raw", + "rename_dims": {"levelist": "zz"}, + "dim_name_from_role_name": False, + }, + ["date", "time", "step_timedelta", "zz"], + ), + ( + { + "profile": "mars", + "time_dim_mode": "raw", + "rename_dims": {"level": "zz"}, + }, ["date", "time", "step", "zz"], ), ], @@ -339,3 +378,114 @@ def test_xr_rename_dims(kwargs, dim_keys): for v in ds: compare_dim_order(ds, dim_keys, v) + + +@pytest.mark.cache +@pytest.mark.parametrize( + "kwargs,dim_keys", + [ + ( + { + "profile": "mars", + "fixed_dims": ["date", "time", "step", "level"], + }, + ["date", "time", "step", "level"], + ), + ( + { + "profile": "mars", + "fixed_dims": ["level", "date", "time", "step"], + }, + ["level", "date", "time", "step"], + ), + ( + { + "profile": "mars", + "fixed_dims": [{"my_date": "date"}, ("my_time", "time"), "step", "level"], + }, + ["my_date", "my_time", "step", "level"], + ), + ( + { + "profile": "mars", + "fixed_dims": ["forecast_reference_time", "endStep", "level"], + }, + ["forecast_reference_time", "endStep", "level"], + ), + ( + { + "profile": "mars", + "fixed_dims": ["forecast_reference_time", ("step", "endStep"), "level"], + }, + ["forecast_reference_time", "step", "level"], + ), + ], +) +def test_xr_fixed_dims(kwargs, dim_keys): + ds_ek = from_source("url", earthkit_remote_test_data_file("test-data/xr_engine/level/pl.grib")) + ds = ds_ek.to_xarray(**kwargs) + num = len(ds) + + dim_keys = dim_keys + ["latitude", "longitude"] + assert len(ds) == num + + for v in ds: + compare_dim_order(ds, dim_keys, v) + + +@pytest.mark.cache +@pytest.mark.parametrize( + "kwargs,dim_keys", + [ + ( + { + "profile": "mars", + "drop_dims": "number", + "time_dim_mode": "raw", + "squeeze": False, + "dim_name_from_role_name": True, + }, + ["date", "time", "step", "level", "level_type"], + ), + ( + { + "profile": "mars", + "drop_dims": ["level_type", "number"], + "time_dim_mode": "raw", + "squeeze": False, + "dim_name_from_role_name": True, + }, + ["date", "time", "step", "level"], + ), + ( + { + "profile": "mars", + "drop_dims": "number", + "time_dim_mode": "raw", + "squeeze": False, + "dim_name_from_role_name": False, + }, + ["date", "time", "step_timedelta", "levelist", "levtype"], + ), + ( + { + "profile": "mars", + "drop_dims": ["levtype", "number"], + "time_dim_mode": "raw", + "squeeze": False, + "dim_name_from_role_name": False, + }, + ["date", "time", "step_timedelta", "levelist"], + ), + ], +) +def test_xr_drop_dims(kwargs, dim_keys): + ds_ek = from_source("url", earthkit_remote_test_data_file("test-data/xr_engine/level/pl.grib")) + ds = ds_ek.to_xarray(**kwargs) + num = len(ds) + + dim_keys = dim_keys + ["latitude", "longitude"] + assert len(ds) == num + + for v in ds: + compare_dim_order(ds, dim_keys, v) diff --git a/tests/xr_engine/test_xr_engine.py b/tests/xr_engine/test_xr_engine.py index f8d9be3e9..7c574b61a 100644 --- a/tests/xr_engine/test_xr_engine.py +++ b/tests/xr_engine/test_xr_engine.py @@ -20,6 +20,7 @@ here = os.path.dirname(__file__) sys.path.insert(0, here) +from xr_engine_fixtures import compare_coords # noqa: E402 from xr_engine_fixtures import load_grib_data # noqa: E402 @@ -63,12 +64,16 @@ def test_xr_engine_basic(file): @pytest.mark.cache @pytest.mark.parametrize("api", ["earthkit", "xr"]) -def test_xr_engine_detailed_check(api): +def test_xr_engine_detailed_check_1(api): ds_ek = from_source("url", earthkit_remote_test_data_file("test-data", "xr_engine", "level", "pl.grib")) if api == "earthkit": ds = ds_ek.to_xarray( - time_dim_mode="raw", decode_times=False, decode_timedelta=False, add_valid_time_coord=False + time_dim_mode="raw", + decode_times=False, + decode_timedelta=False, + add_valid_time_coord=False, + dim_name_from_role_name=False, ) else: import xarray as xr @@ -80,6 +85,7 @@ def test_xr_engine_detailed_check(api): decode_times=False, decode_timedelta=False, add_valid_time_coord=False, + dim_name_from_role_name=False, ) assert ds is not None @@ -92,7 +98,7 @@ def test_xr_engine_detailed_check(api): coords_ref_full = { "date": np.array([20240603, 20240604]), "time": np.array([0, 1200]), - "step": np.array([0, 6]), + "step_timedelta": [0, 6], "levelist": np.array([300, 400, 500, 700, 850, 1000]), "latitude": lats, "longitude": lons, @@ -101,16 +107,14 @@ def test_xr_engine_detailed_check(api): dims_ref_full = { "date": 2, "time": 2, - "step": 2, + "step_timedelta": 2, "levelist": 6, "latitude": 19, "longitude": 36, } assert len(ds.dims) == len(dims_ref_full) - assert len(ds.coords) == len(coords_ref_full) - for k, v in coords_ref_full.items(): - assert np.allclose(ds.coords[k].values, v) + compare_coords(ds, coords_ref_full) assert [v for v in ds.data_vars] == data_vars # data variable @@ -119,47 +123,37 @@ def test_xr_engine_detailed_check(api): assert ds["u"].as_numpy().shape == (2, 2, 2, 6, 19, 36) assert ds["u"].to_numpy().shape == (2, 2, 2, 6, 19, 36) r = ds["u"] - assert len(r.coords) == len(coords_ref_full) - for k, v in coords_ref_full.items(): - assert np.allclose(r.coords[k].values, v) + compare_coords(r, coords_ref_full) # sel() on dataset r = ds.sel(date=20240603, time=[0, 1200]) coords_ref = dict(coords_ref_full) coords_ref["date"] = np.array([20240603]) - assert len(r.coords) == len(coords_ref) - for k, v in coords_ref.items(): - assert np.allclose(r.coords[k].values, v) + compare_coords(r, coords_ref) assert [v for v in r.data_vars] == data_vars # sel() on data variable of filtered dataset assert r["u"].shape == (2, 2, 6, 19, 36) - r1 = r["u"].sel(step=6, levelist=[1000, 300]) + r1 = r["u"].sel(step_timedelta=6, levelist=[1000, 300]) assert r1.shape == (2, 2, 19, 36) - coords_ref["step"] = np.array([6]) + coords_ref["step_timedelta"] = [6] coords_ref["levelist"] = np.array([1000, 300]) - assert len(r1.coords) == len(coords_ref) - for k, v in coords_ref.items(): - assert np.allclose(r1.coords[k].values, v) + compare_coords(r1, coords_ref) # isel() on dataset r = ds.isel(date=0, time=[0, 1]) coords_ref = dict(coords_ref_full) coords_ref["date"] = np.array([20240603]) - assert len(r.coords) == len(coords_ref) - for k, v in coords_ref.items(): - assert np.allclose(r.coords[k].values, v) + compare_coords(r, coords_ref) assert [v for v in r.data_vars] == data_vars # isel() on data variable of filtered dataset assert r["u"].shape == (2, 2, 6, 19, 36) - r1 = r["u"].isel(step=1, levelist=[0, -1]) + r1 = r["u"].isel(step_timedelta=1, levelist=[0, -1]) assert r1.shape == (2, 2, 19, 36) - coords_ref["step"] = np.array([6]) + coords_ref["step_timedelta"] = [6] coords_ref["levelist"] = np.array([300, 1000]) - assert len(r1.coords) == len(coords_ref) - for k, v in coords_ref.items(): - assert np.allclose(r1.coords[k].values, v) + compare_coords(r1, coords_ref) # slicing of data variable da = ds["u"] @@ -173,8 +167,7 @@ def test_xr_engine_detailed_check(api): assert len(r.dims) == len(dims_ref) coords_ref = dict(coords_ref_full) coords_ref["time"] = np.array([0]) - for k, v in coords_ref.items(): - assert np.allclose(r.coords[k].values, v) + compare_coords(r, coords_ref) r = da[:, 0, :, 3:5] assert r.shape == (2, 2, 2, 19, 36) @@ -186,8 +179,7 @@ def test_xr_engine_detailed_check(api): coords_ref = dict(coords_ref_full) coords_ref["time"] = np.array([0]) coords_ref["levelist"] = np.array([700, 850]) - for k, v in coords_ref.items(): - assert np.allclose(r.coords[k].values, v) + compare_coords(r, coords_ref) r = da.loc[:, 0, :, [700, 850]] assert r.shape == (2, 2, 2, 19, 36) @@ -199,8 +191,184 @@ def test_xr_engine_detailed_check(api): coords_ref = dict(coords_ref_full) coords_ref["time"] = np.array([0]) coords_ref["levelist"] = np.array([700, 850]) - for k, v in coords_ref.items(): - assert np.allclose(r.coords[k].values, v) + compare_coords(r, coords_ref) + + # lat-lon + da = ds["t"] + + r = da[:, 0, :, 2, 9, 0] + assert r.shape == (2, 2) + vals_ref = np.array([[269.00918579, 268.78610229], [268.57771301, 268.08932495]]) + assert np.allclose(r.values, vals_ref) + + r = da[:, 0, :, 2, 9:12, :2] + assert r.shape == (2, 2, 3, 2) + vals_ref = np.array( + [ + [ + [ + [269.00918579, 269.31680298], + [269.70254517, 269.81387329], + [267.50527954, 266.83828735], + ], + [ + [268.78610229, 268.80758667], + [269.52731323, 269.75680542], + [266.61813354, 267.12106323], + ], + ], + [ + [ + [268.57771301, 269.03767395], + [269.33357239, 269.56111145], + [264.75154114, 266.55036926], + ], + [ + [268.08932495, 268.35983276], + [269.01803589, 269.02389526], + [264.29733276, 266.08248901], + ], + ], + ] + ) + assert np.allclose(r.values, vals_ref) + + r = da.loc[:, 0, :, 500, 0, 0] + assert r.shape == (2, 2) + vals_ref = np.array([[269.00918579, 268.78610229], [268.57771301, 268.08932495]]) + assert np.allclose(r.values, vals_ref) + + +@pytest.mark.cache +@pytest.mark.parametrize("api", ["earthkit", "xr"]) +def test_xr_engine_detailed_check_2(api): + ds_ek = from_source("url", earthkit_remote_test_data_file("test-data", "xr_engine", "level", "pl.grib")) + + if api == "earthkit": + ds = ds_ek.to_xarray( + time_dim_mode="raw", + decode_times=False, + decode_timedelta=False, + add_valid_time_coord=False, + dim_name_from_role_name=True, + ) + else: + import xarray as xr + + ds = xr.open_dataset( + ds_ek.path, + engine="earthkit", + time_dim_mode="raw", + decode_times=False, + decode_timedelta=False, + add_valid_time_coord=False, + dim_name_from_role_name=True, + ) + + assert ds is not None + + # dataset + lats = np.linspace(90, -90, 19) + lons = np.linspace(0, 350, 36) + data_vars = ["r", "t", "u", "v", "z"] + + coords_ref_full = { + "date": np.array([20240603, 20240604]), + "time": np.array([0, 1200]), + "step": [0, 6], + "level": np.array([300, 400, 500, 700, 850, 1000]), + "latitude": lats, + "longitude": lons, + } + + dims_ref_full = { + "date": 2, + "time": 2, + "step": 2, + "level": 6, + "latitude": 19, + "longitude": 36, + } + + assert len(ds.dims) == len(dims_ref_full) + compare_coords(ds, coords_ref_full) + assert [v for v in ds.data_vars] == data_vars + + # data variable + assert ds["u"].shape == (2, 2, 2, 6, 19, 36) + assert ds["u"].values.shape == (2, 2, 2, 6, 19, 36) + assert ds["u"].as_numpy().shape == (2, 2, 2, 6, 19, 36) + assert ds["u"].to_numpy().shape == (2, 2, 2, 6, 19, 36) + r = ds["u"] + compare_coords(r, coords_ref_full) + + # sel() on dataset + r = ds.sel(date=20240603, time=[0, 1200]) + coords_ref = dict(coords_ref_full) + coords_ref["date"] = np.array([20240603]) + compare_coords(r, coords_ref) + assert [v for v in r.data_vars] == data_vars + + # sel() on data variable of filtered dataset + assert r["u"].shape == (2, 2, 6, 19, 36) + r1 = r["u"].sel(step=6, level=[1000, 300]) + assert r1.shape == (2, 2, 19, 36) + coords_ref["step"] = [6] + coords_ref["level"] = np.array([1000, 300]) + compare_coords(r1, coords_ref) + + # isel() on dataset + r = ds.isel(date=0, time=[0, 1]) + coords_ref = dict(coords_ref_full) + coords_ref["date"] = np.array([20240603]) + compare_coords(r, coords_ref) + assert [v for v in r.data_vars] == data_vars + + # isel() on data variable of filtered dataset + assert r["u"].shape == (2, 2, 6, 19, 36) + r1 = r["u"].isel(step=1, level=[0, -1]) + assert r1.shape == (2, 2, 19, 36) + coords_ref["step"] = [6] + coords_ref["level"] = np.array([300, 1000]) + compare_coords(r1, coords_ref) + + # slicing of data variable + da = ds["u"] + + r = da[:, 0] + assert r.shape == (2, 2, 6, 19, 36) + assert r.values.shape == (2, 2, 6, 19, 36) + assert r.to_numpy().shape == (2, 2, 6, 19, 36) + dims_ref = dict(dims_ref_full) + dims_ref.pop("time") + assert len(r.dims) == len(dims_ref) + coords_ref = dict(coords_ref_full) + coords_ref["time"] = np.array([0]) + compare_coords(r, coords_ref) + + r = da[:, 0, :, 3:5] + assert r.shape == (2, 2, 2, 19, 36) + assert r.values.shape == (2, 2, 2, 19, 36) + assert r.to_numpy().shape == (2, 2, 2, 19, 36) + dims_ref = dict(dims_ref_full) + dims_ref.pop("time") + assert len(r.dims) == len(dims_ref) + coords_ref = dict(coords_ref_full) + coords_ref["time"] = np.array([0]) + coords_ref["level"] = np.array([700, 850]) + compare_coords(r, coords_ref) + + r = da.loc[:, 0, :, [700, 850]] + assert r.shape == (2, 2, 2, 19, 36) + assert r.values.shape == (2, 2, 2, 19, 36) + assert r.to_numpy().shape == (2, 2, 2, 19, 36) + dims_ref = dict(dims_ref_full) + dims_ref.pop("time") + assert len(r.dims) == len(dims_ref) + coords_ref = dict(coords_ref_full) + coords_ref["time"] = np.array([0]) + coords_ref["level"] = np.array([700, 850]) + compare_coords(r, coords_ref) # lat-lon da = ds["t"] @@ -253,7 +421,7 @@ def test_xr_engine_detailed_check(api): @pytest.mark.parametrize("lazy_load", [False, True]) @pytest.mark.parametrize("release_source", [False, True]) @pytest.mark.parametrize("direct_backend", [False, True]) -def test_xr_engine_detailed_flatten_check(stream, lazy_load, release_source, direct_backend): +def test_xr_engine_detailed_flatten_check_1(stream, lazy_load, release_source, direct_backend): filename = "test-data/xr_engine/level/pl.grib" ds_ek, ds_ek_ref = load_grib_data(filename, "url", stream=stream) @@ -268,6 +436,7 @@ def test_xr_engine_detailed_flatten_check(stream, lazy_load, release_source, dir "lazy_load": lazy_load, "release_source": release_source, "direct_backend": direct_backend, + "dim_name_from_role_name": False, } } } @@ -284,7 +453,7 @@ def test_xr_engine_detailed_flatten_check(stream, lazy_load, release_source, dir coords_ref_full = { "date": np.array([20240603, 20240604]), "time": np.array([0, 1200]), - "step": np.array([0, 6]), + "step_timedelta": np.array([0, 6]), "levelist": np.array([300, 400, 500, 700, 850, 1000]), "latitude": lats, "longitude": lons, @@ -293,7 +462,7 @@ def test_xr_engine_detailed_flatten_check(stream, lazy_load, release_source, dir dims_ref_full = { "date": 2, "time": 2, - "step": 2, + "step_timedelta": 2, "levelist": 6, "values": 684, } @@ -325,9 +494,9 @@ def test_xr_engine_detailed_flatten_check(stream, lazy_load, release_source, dir # sel() on data variable of filtered dataset assert r["u"].shape == (2, 2, 6, 684) - r1 = r["u"].sel(step=6, levelist=[1000, 300]) + r1 = r["u"].sel(step_timedelta=6, levelist=[1000, 300]) assert r1.shape == (2, 2, 684) - coords_ref["step"] = np.array([6]) + coords_ref["step_timedelta"] = np.array([6]) coords_ref["levelist"] = np.array([1000, 300]) assert len(r1.coords) == len(coords_ref) for k, v in coords_ref.items(): @@ -344,9 +513,9 @@ def test_xr_engine_detailed_flatten_check(stream, lazy_load, release_source, dir # isel() on data variable of filtered dataset assert r["u"].shape == (2, 2, 6, 684) - r1 = r["u"].isel(step=1, levelist=[0, -1]) + r1 = r["u"].isel(step_timedelta=1, levelist=[0, -1]) assert r1.shape == (2, 2, 684) - coords_ref["step"] = np.array([6]) + coords_ref["step_timedelta"] = np.array([6]) coords_ref["levelist"] = np.array([300, 1000]) assert len(r1.coords) == len(coords_ref) for k, v in coords_ref.items(): @@ -426,6 +595,170 @@ def test_xr_engine_detailed_flatten_check(stream, lazy_load, release_source, dir assert np.allclose(r.values, vals_ref) +@pytest.mark.cache +@pytest.mark.parametrize("stream", [False, True]) +@pytest.mark.parametrize("lazy_load", [False, True]) +@pytest.mark.parametrize("release_source", [False, True]) +@pytest.mark.parametrize("direct_backend", [False, True]) +def test_xr_engine_detailed_flatten_check_2(stream, lazy_load, release_source, direct_backend): + filename = "test-data/xr_engine/level/pl.grib" + ds_ek, ds_ek_ref = load_grib_data(filename, "url", stream=stream) + + kwargs = { + "xarray_open_dataset_kwargs": { + "backend_kwargs": { + "time_dim_mode": "raw", + "decode_times": False, + "decode_timedelta": False, + "flatten_values": True, + "add_valid_time_coord": False, + "lazy_load": lazy_load, + "release_source": release_source, + "direct_backend": direct_backend, + "dim_name_from_role_name": True, + } + } + } + + ds = ds_ek.to_xarray(**kwargs) + assert ds is not None + + # dataset + ll = ds_ek_ref[0].to_latlon(flatten=True) + lats = ll["lat"] + lons = ll["lon"] + data_vars = ["r", "t", "u", "v", "z"] + + coords_ref_full = { + "date": np.array([20240603, 20240604]), + "time": np.array([0, 1200]), + "step": np.array([0, 6]), + "level": np.array([300, 400, 500, 700, 850, 1000]), + "latitude": lats, + "longitude": lons, + } + + dims_ref_full = { + "date": 2, + "time": 2, + "step": 2, + "level": 6, + "values": 684, + } + + assert len(ds.dims) == len(dims_ref_full) + compare_coords(ds, coords_ref_full) + assert [v for v in ds.data_vars] == data_vars + + # data variable + assert ds["u"].shape == (2, 2, 2, 6, 684) + assert ds["u"].values.shape == (2, 2, 2, 6, 684) + assert ds["u"].as_numpy().shape == (2, 2, 2, 6, 684) + assert ds["u"].to_numpy().shape == (2, 2, 2, 6, 684) + r = ds["u"] + compare_coords(r, coords_ref_full) + + # sel() on dataset + r = ds.sel(date=20240603, time=[0, 1200]) + coords_ref = dict(coords_ref_full) + coords_ref["date"] = np.array([20240603]) + compare_coords(r, coords_ref) + assert [v for v in r.data_vars] == data_vars + + # sel() on data variable of filtered dataset + assert r["u"].shape == (2, 2, 6, 684) + r1 = r["u"].sel(step=6, level=[1000, 300]) + assert r1.shape == (2, 2, 684) + coords_ref["step"] = np.array([6]) + coords_ref["level"] = np.array([1000, 300]) + compare_coords(r1, coords_ref) + + # isel() on dataset + r = ds.isel(date=0, time=[0, 1]) + coords_ref = dict(coords_ref_full) + coords_ref["date"] = np.array([20240603]) + compare_coords(r, coords_ref) + assert [v for v in r.data_vars] == data_vars + + # isel() on data variable of filtered dataset + assert r["u"].shape == (2, 2, 6, 684) + r1 = r["u"].isel(step=1, level=[0, -1]) + assert r1.shape == (2, 2, 684) + coords_ref["step"] = np.array([6]) + coords_ref["level"] = np.array([300, 1000]) + compare_coords(r1, coords_ref) + + # slicing of data variable + da = ds["u"] + + r = da[:, 0] + assert r.shape == (2, 2, 6, 684) + assert r.values.shape == (2, 2, 6, 684) + assert r.to_numpy().shape == (2, 2, 6, 684) + dims_ref = dict(dims_ref_full) + dims_ref.pop("time") + assert len(r.dims) == len(dims_ref) + coords_ref = dict(coords_ref_full) + coords_ref["time"] = np.array([0]) + compare_coords(r, coords_ref) + + r = da[:, 0, :, 3:5] + assert r.shape == (2, 2, 2, 684) + assert r.values.shape == (2, 2, 2, 684) + assert r.to_numpy().shape == (2, 2, 2, 684) + dims_ref = dict(dims_ref_full) + dims_ref.pop("time") + assert len(r.dims) == len(dims_ref) + coords_ref = dict(coords_ref_full) + coords_ref["time"] = np.array([0]) + coords_ref["level"] = np.array([700, 850]) + compare_coords(r, coords_ref) + + r = da.loc[:, 0, :, [700, 850]] + assert r.shape == (2, 2, 2, 684) + assert r.values.shape == (2, 2, 2, 684) + assert r.to_numpy().shape == (2, 2, 2, 684) + dims_ref = dict(dims_ref_full) + dims_ref.pop("time") + assert len(r.dims) == len(dims_ref) + coords_ref = dict(coords_ref_full) + coords_ref["time"] = np.array([0]) + coords_ref["level"] = np.array([700, 850]) + compare_coords(r, coords_ref) + + # level=500, lat=0, lon=0 + da = ds["t"] + + r = da[:, 0, :, 2, 9 * 36 + 0] + assert r.shape == (2, 2) + vals_ref = np.array([[269.00918579, 268.78610229], [268.57771301, 268.08932495]]) + assert np.allclose(r.values, vals_ref) + + r = da[:, 0, :, 2, [9 * 36, 10 * 36, 11 * 36]] + assert r.shape == (2, 2, 3) + vals_ref = np.array( + [ + [ + [269.00918579, 269.70254517, 267.50527954], + [268.78610229, 269.52731323, 266.61813354], + ], + [ + [268.57771301, 269.33357239, 264.75154114], + [268.08932495, 269.01803589, 264.29733276], + ], + ] + ) + + v_ek = ds_ek_ref.sel(param="t", time=0, levelist=500).to_numpy(flatten=True) + assert np.allclose(r.values.flatten(), v_ek[:, [9 * 36, 10 * 36, 11 * 36]].flatten()) + assert np.allclose(r.values, vals_ref) + + r = da.loc[:, 0, :, 500, 9 * 36 + 0] + assert r.shape == (2, 2) + vals_ref = np.array([[269.00918579, 268.78610229], [268.57771301, 268.08932495]]) + assert np.allclose(r.values, vals_ref) + + @pytest.mark.cache @pytest.mark.parametrize( "kwargs", diff --git a/tests/xr_engine/test_xr_ens.py b/tests/xr_engine/test_xr_ens.py new file mode 100644 index 000000000..319155bf9 --- /dev/null +++ b/tests/xr_engine/test_xr_ens.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python3 + +# (C) Copyright 2020 ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. +# + +import os +import sys + +import pytest + +from earthkit.data import from_source +from earthkit.data.testing import earthkit_remote_test_data_file + +here = os.path.dirname(__file__) +sys.path.insert(0, here) +from xr_engine_fixtures import compare_dims # noqa: E402 + + +@pytest.mark.cache +@pytest.mark.parametrize( + "kwargs,dims", + [ + ( + {}, + { + "number": [0, 1, 2], + }, + ), + ( + { + "dim_roles": {"number": "perturbationNumber"}, + "dim_name_from_role_name": True, + }, + { + "number": [0, 1, 2], + }, + ), + ( + { + "dim_roles": {"ens": "perturbationNumber"}, + "dim_name_from_role_name": True, + }, + { + "number": [0, 1, 2], + }, + ), + ( + { + "dim_roles": {"number": "perturbationNumber"}, + "dim_name_from_role_name": False, + }, + { + "perturbationNumber": [0, 1, 2], + }, + ), + ], +) +def test_xr_number_dim(kwargs, dims): + ds_ek = from_source("url", earthkit_remote_test_data_file("test-data/xr_engine/ens/ens_cf_pf.grib")) + + ds = ds_ek.to_xarray(**kwargs) + compare_dims(ds, dims, order_ref_var="t") diff --git a/tests/xr_engine/test_xr_level.py b/tests/xr_engine/test_xr_level.py index 2f3f4b91e..6d4260acb 100644 --- a/tests/xr_engine/test_xr_level.py +++ b/tests/xr_engine/test_xr_level.py @@ -28,11 +28,11 @@ "kwargs,dims", [ ( - {"profile": "mars", "level_dim_mode": "level"}, + {"profile": "mars", "level_dim_mode": "level", "dim_name_from_role_name": False}, {"levelist": [300, 400, 500, 700, 850, 1000]}, ), ( - {"profile": "mars", "level_dim_mode": "level_and_type"}, + {"profile": "mars", "level_dim_mode": "level_and_type", "dim_name_from_role_name": False}, {"level_and_type": ["1000pl", "300pl", "400pl", "500pl", "700pl", "850pl"]}, ), ], @@ -50,67 +50,117 @@ def test_xr_level_dim(kwargs, dims): [ ( "pl.grib", - {"profile": "grib", "level_dim_mode": "level"}, + {"profile": "grib", "level_dim_mode": "level", "dim_name_from_role_name": False}, {"level": [300, 400, 500, 700, 850, 1000]}, "isobaricInhPa", ), ( "pl_80_Pa.grib2", - {"profile": "grib", "level_dim_mode": "level", "ensure_dims": "level"}, + { + "profile": "grib", + "level_dim_mode": "level", + "ensure_dims": "level", + "dim_name_from_role_name": False, + }, {"level": [80]}, "isobaricInPa", ), ( "hpa_and_pa.grib", - {"profile": "mars", "level_dim_mode": "level", "ensure_dims": "levelist"}, + { + "profile": "mars", + "level_dim_mode": "level", + "ensure_dims": "levelist", + "dim_name_from_role_name": False, + }, {"levelist": [0.01, 0.1, 1]}, "pl", ), ( "hl_1000_m_asl.grib2", - {"profile": "grib", "level_dim_mode": "level", "ensure_dims": "level"}, + { + "profile": "grib", + "level_dim_mode": "level", + "ensure_dims": "level", + "dim_name_from_role_name": False, + }, {"level": [100, 1000, 2000, 3000]}, "heightAboveSea", ), ( "hl_1000_m_agr.grib2", - {"profile": "grib", "level_dim_mode": "level", "ensure_dims": "level"}, + { + "profile": "grib", + "level_dim_mode": "level", + "ensure_dims": "level", + "dim_name_from_role_name": False, + }, {"level": [500, 1000, 2500, 10000]}, "heightAboveGround", ), ( "pt_320_K.grib1", - {"profile": "grib", "level_dim_mode": "level", "ensure_dims": "level"}, + { + "profile": "grib", + "level_dim_mode": "level", + "ensure_dims": "level", + "dim_name_from_role_name": False, + }, {"level": [320]}, "theta", ), ( "pv_1500.grib1", - {"profile": "grib", "level_dim_mode": "level", "ensure_dims": "level"}, + { + "profile": "grib", + "level_dim_mode": "level", + "ensure_dims": "level", + "dim_name_from_role_name": False, + }, {"level": [1500]}, "potentialVorticity", ), ( "soil_7.grib1", - {"profile": "grib", "level_dim_mode": "level", "ensure_dims": "level"}, + { + "profile": "grib", + "level_dim_mode": "level", + "ensure_dims": "level", + "dim_name_from_role_name": False, + }, {"level": [7]}, "depthBelowLand", ), ( "sol_3.grib2", - {"profile": "grib", "level_dim_mode": "level", "ensure_dims": "level"}, + { + "profile": "grib", + "level_dim_mode": "level", + "ensure_dims": "level", + "dim_name_from_role_name": False, + }, {"level": [3]}, "snowLayer", ), ( "ml_77.grib2", - {"profile": "grib", "level_dim_mode": "level", "ensure_dims": "level"}, + { + "profile": "grib", + "level_dim_mode": "level", + "ensure_dims": "level", + "dim_name_from_role_name": False, + }, {"level": [77]}, "hybrid", ), ( "sfc.grib1", - {"profile": "grib", "level_dim_mode": "level", "ensure_dims": "level"}, + { + "profile": "grib", + "level_dim_mode": "level", + "ensure_dims": "level", + "dim_name_from_role_name": False, + }, {"level": [0]}, "surface", ), @@ -122,49 +172,84 @@ def test_xr_level_dim(kwargs, dims): # ), ( "mean_sea_level_reduced_ll.grib1", - {"profile": "grib", "level_dim_mode": "level", "ensure_dims": "level"}, + { + "profile": "grib", + "level_dim_mode": "level", + "ensure_dims": "level", + "dim_name_from_role_name": False, + }, {"level": [0]}, "meanSea", ), ( "gen_vert_layer.grib", - {"profile": "grib", "level_dim_mode": "level", "ensure_dims": "level"}, + { + "profile": "grib", + "level_dim_mode": "level", + "ensure_dims": "level", + "dim_name_from_role_name": False, + }, {"level": [1]}, "generalVerticalLayer", ), ( "pl.grib", - {"profile": "mars", "level_dim_mode": "level"}, + {"profile": "mars", "level_dim_mode": "level", "dim_name_from_role_name": False}, {"levelist": [300, 400, 500, 700, 850, 1000]}, "pl", ), ( "pl_80_Pa.grib2", - {"profile": "mars", "level_dim_mode": "level", "ensure_dims": "levelist"}, + { + "profile": "mars", + "level_dim_mode": "level", + "ensure_dims": "levelist", + "dim_name_from_role_name": False, + }, {"levelist": [0.8]}, "pl", ), ( "pt_320_K.grib1", - {"profile": "mars", "level_dim_mode": "level", "ensure_dims": "levelist"}, + { + "profile": "mars", + "level_dim_mode": "level", + "ensure_dims": "levelist", + "dim_name_from_role_name": False, + }, {"levelist": [320]}, "pt", ), ( "pv_1500.grib1", - {"profile": "mars", "level_dim_mode": "level", "ensure_dims": "levelist"}, + { + "profile": "mars", + "level_dim_mode": "level", + "ensure_dims": "levelist", + "dim_name_from_role_name": False, + }, {"levelist": [1500]}, "pv", ), ( "sol_3.grib2", - {"profile": "grib", "level_dim_mode": "level", "ensure_dims": "levelist"}, + { + "profile": "grib", + "level_dim_mode": "level", + "ensure_dims": "levelist", + "dim_name_from_role_name": False, + }, {"levelist": [3]}, "sol", ), ( "hpa_and_pa.grib", - {"profile": "mars", "level_dim_mode": "level", "ensure_dims": "levelist"}, + { + "profile": "mars", + "level_dim_mode": "level", + "ensure_dims": "levelist", + "dim_name_from_role_name": False, + }, {"levelist": [0.01, 0.1, 1]}, "pl", ), diff --git a/tests/xr_engine/test_xr_remapping.py b/tests/xr_engine/test_xr_remapping.py index 859c51c6c..3011c0df9 100644 --- a/tests/xr_engine/test_xr_remapping.py +++ b/tests/xr_engine/test_xr_remapping.py @@ -38,18 +38,52 @@ def test_xr_remapping_1(): @pytest.mark.cache -def test_xr_remapping_2(): +@pytest.mark.parametrize( + "kwargs,coords,dims", + [ + ( + dict( + dim_roles={"level": "_k"}, + level_dim_mode="level", + remapping={"_k": "{levelist}_{levtype}"}, + dim_name_from_role_name=False, + ), + {"_k": ["500_pl", "700_pl"]}, + {"forecast_reference_time": 4, "step_timedelta": 2, "_k": 2, "latitude": 19, "longitude": 36}, + ), + ( + dict( + dim_roles={"level": "_k"}, + level_dim_mode="level", + remapping={"_k": "{levelist}_{levtype}"}, + dim_name_from_role_name=True, + ), + {"level": ["500_pl", "700_pl"]}, + {"forecast_reference_time": 4, "step": 2, "level": 2, "latitude": 19, "longitude": 36}, + ), + ( + dict( + dim_roles={"level": "_k"}, + level_dim_mode="level", + remapping={"_k": "{levelist}_{levtype}"}, + rename_dims={"level": "_k"}, + dim_name_from_role_name=True, + ), + {"_k": ["500_pl", "700_pl"]}, + {"forecast_reference_time": 4, "step": 2, "_k": 2, "latitude": 19, "longitude": 36}, + ), + ], +) +def test_xr_remapping_2(kwargs, coords, dims): ds0 = from_source("url", earthkit_remote_test_data_file("test-data/xr_engine/level/pl_small.grib")) - ds = ds0.to_xarray( - dim_roles={"level": "_k"}, level_dim_mode="level", remapping={"_k": "{levelist}_{levtype}"} - ) + ds = ds0.to_xarray(**kwargs) data_vars = ["r", "t"] assert [v for v in ds.data_vars] == data_vars - coords = {"_k": ["500_pl", "700_pl"]} + # coords = {"_k": ["500_pl", "700_pl"]} compare_coords(ds, coords) - dims = {"forecast_reference_time": 4, "step": 2, "_k": 2, "latitude": 19, "longitude": 36} + # dims = {"forecast_reference_time": 4, "step_timedelta": 2, "_k": 2, "latitude": 19, "longitude": 36} compare_dims(ds, dims, sizes=True) diff --git a/tests/xr_engine/test_xr_split.py b/tests/xr_engine/test_xr_split.py index abd42a4c0..af537cb9a 100644 --- a/tests/xr_engine/test_xr_split.py +++ b/tests/xr_engine/test_xr_split.py @@ -22,7 +22,7 @@ [ ( ["level", "pl.grib"], - {"time_dim_mode": "raw", "split_dims": ["step"]}, + {"time_dim_mode": "raw", "split_dims": ["step"], "dim_name_from_role_name": False}, 2, ["2t", "msl", "r", "t"], ["date", "time", "levelist"], @@ -30,7 +30,12 @@ ), ( ["level", "pl.grib"], - {"time_dim_mode": "raw", "split_dims": ["step"], "ensure_dims": "step"}, + { + "time_dim_mode": "raw", + "split_dims": ["step"], + "ensure_dims": "step", + "dim_name_from_role_name": False, + }, 2, ["2t", "msl", "r", "t"], ["date", "time", "step", "levelist"], @@ -38,7 +43,11 @@ ), ( ["cds-reanalysis-era5-single-levels-20230101-low-resol.grib"], - {"time_dim_mode": "valid_time", "split_dims": ["stream", "dataType", "edition", "Ni"]}, + { + "time_dim_mode": "valid_time", + "split_dims": ["stream", "dataType", "edition", "Ni"], + "dim_name_from_role_name": False, + }, 11, None, ["valid_time"], @@ -56,6 +65,27 @@ {"stream": "wave", "dataType": "an", "edition": 1, "Ni": 18}, ], ), + ( + ["level", "pl.grib"], + {"time_dim_mode": "raw", "split_dims": ["step"], "dim_name_from_role_name": True}, + 2, + ["2t", "msl", "r", "t"], + ["date", "time", "level"], + [{"step": 0}, {"step": 6}], + ), + ( + ["level", "pl.grib"], + { + "time_dim_mode": "raw", + "split_dims": ["step"], + "ensure_dims": "step", + "dim_name_from_role_name": True, + }, + 2, + ["2t", "msl", "r", "t"], + ["date", "time", "step", "level"], + [{"step": 0}, {"step": 6}], + ), # ({"base_datetime_dim": True}, "param", ["r", "t"], ["levelist"]), # ({"squeeze": False}, "param", ["r", "t"], ["time", "step", "levelist"]), ], diff --git a/tests/xr_engine/test_xr_time.py b/tests/xr_engine/test_xr_time.py index f81dc583e..ff8ebd5b4 100644 --- a/tests/xr_engine/test_xr_time.py +++ b/tests/xr_engine/test_xr_time.py @@ -26,22 +26,37 @@ @pytest.mark.cache @pytest.mark.parametrize( - "kwargs,dims", + "kwargs,dims,step_units", [ ( - {"time_dim_mode": "raw", "decode_times": False, "decode_timedelta": False}, + { + "time_dim_mode": "raw", + "decode_times": False, + "decode_timedelta": False, + "dim_name_from_role_name": True, + }, {"date": [20240603, 20240604], "time": [0, 1200], "step": [0, 6]}, + ("step", "hours"), ), ( - {"time_dim_mode": "raw"}, + { + "time_dim_mode": "raw", + "dim_name_from_role_name": True, + }, { "date": [np.datetime64("2024-06-03", "ns"), np.datetime64("2024-06-04", "ns")], "time": [np.timedelta64(0, "s"), np.timedelta64(43200, "s")], "step": [np.timedelta64(0, "h"), np.timedelta64(6, "h")], }, + None, ), ( - {"time_dim_mode": "forecast", "decode_times": False, "decode_timedelta": False}, + { + "time_dim_mode": "forecast", + "decode_times": False, + "decode_timedelta": False, + "dim_name_from_role_name": True, + }, { "forecast_reference_time": [ np.datetime64("2024-06-03T00", "ns"), @@ -51,9 +66,13 @@ ], "step": [0, 6], }, + ("step", "hours"), ), ( - {"time_dim_mode": "forecast"}, + { + "time_dim_mode": "forecast", + "dim_name_from_role_name": True, + }, { "forecast_reference_time": [ np.datetime64("2024-06-03T00", "ns"), @@ -63,9 +82,15 @@ ], "step": [np.timedelta64(0, "h"), np.timedelta64(6, "h")], }, + None, ), ( - {"time_dim_mode": "valid_time", "decode_times": False, "decode_timedelta": False}, + { + "time_dim_mode": "valid_time", + "decode_times": False, + "decode_timedelta": False, + "dim_name_from_role_name": True, + }, { "valid_time": [ np.datetime64("2024-06-03T00", "ns"), @@ -78,9 +103,15 @@ np.datetime64("2024-06-04T18", "ns"), ], }, + None, ), ( - {"time_dim_mode": "valid_time", "decode_times": True, "decode_timedelta": True}, + { + "time_dim_mode": "valid_time", + "decode_times": True, + "decode_timedelta": True, + "dim_name_from_role_name": True, + }, { "valid_time": [ np.datetime64("2024-06-03T00", "ns"), @@ -93,25 +124,54 @@ np.datetime64("2024-06-04T18", "ns"), ], }, + None, + ), + ( + { + "time_dim_mode": "raw", + "decode_times": False, + "decode_timedelta": False, + "dim_name_from_role_name": False, + }, + {"date": [20240603, 20240604], "time": [0, 1200], "step_timedelta": [0, 6]}, + ("step_timedelta", "hours"), + ), + ( + { + "time_dim_mode": "raw", + "dim_name_from_role_name": False, + }, + { + "date": [np.datetime64("2024-06-03", "ns"), np.datetime64("2024-06-04", "ns")], + "time": [np.timedelta64(0, "s"), np.timedelta64(43200, "s")], + "step_timedelta": [np.timedelta64(0, "h"), np.timedelta64(6, "h")], + }, + None, ), ], ) -def test_xr_time_basic(kwargs, dims): +def test_xr_time_basic(kwargs, dims, step_units): ds_ek = from_source("url", earthkit_remote_test_data_file("test-data/xr_engine/level/pl.grib")) ds = ds_ek.to_xarray(**kwargs) compare_dims(ds, dims, order_ref_var="t") + if step_units is not None: + assert ( + ds[step_units[0]].attrs["units"] == step_units[1] + ), f"step units mismatch {ds[step_units[0]].attrs['units']} != {step_units[1]}" + @pytest.mark.cache @pytest.mark.parametrize( - "kwargs,dims", + "kwargs,dims,step_units", [ ( { "dim_roles": {"date": "indexingDate", "time": "indexingTime", "step": "forecastMonth"}, "decode_times": False, "decode_timedelta": False, + "dim_name_from_role_name": False, }, { "indexing_time": [ @@ -120,12 +180,14 @@ def test_xr_time_basic(kwargs, dims): ], "forecastMonth": [1, 2, 3], }, + ("forecastMonth", "months"), ), ( { "dim_roles": {"forecast_reference_time": "indexing_time", "step": "forecastMonth"}, "decode_times": False, "decode_timedelta": False, + "dim_name_from_role_name": False, }, { "indexing_time": [ @@ -134,10 +196,43 @@ def test_xr_time_basic(kwargs, dims): ], "forecastMonth": [1, 2, 3], }, + ("forecastMonth", "months"), + ), + ( + { + "dim_roles": {"date": "indexingDate", "time": "indexingTime", "step": "forecastMonth"}, + "decode_times": False, + "decode_timedelta": False, + "dim_name_from_role_name": True, + }, + { + "forecast_reference_time": [ + np.datetime64("2014-09-01", "ns"), + np.datetime64("2014-10-01", "ns"), + ], + "step": [1, 2, 3], + }, + ("step", "months"), + ), + ( + { + "dim_roles": {"forecast_reference_time": "indexing_time", "step": "forecastMonth"}, + "decode_times": False, + "decode_timedelta": False, + "dim_name_from_role_name": True, + }, + { + "forecast_reference_time": [ + np.datetime64("2014-09-01", "ns"), + np.datetime64("2014-10-01", "ns"), + ], + "step": [1, 2, 3], + }, + ("step", "months"), ), ], ) -def test_xr_time_seasonal_monthly_indexing_date(kwargs, dims): +def test_xr_time_seasonal_monthly_indexing_date(kwargs, dims, step_units): ds_ek = from_source( "url", earthkit_remote_test_data_file("test-data/xr_engine/date/jma_seasonal_fc_ref_time_per_member.grib"), @@ -146,10 +241,15 @@ def test_xr_time_seasonal_monthly_indexing_date(kwargs, dims): ds = ds_ek.to_xarray(**kwargs) compare_dims(ds, dims, order_ref_var="2t") + if step_units is not None: + assert ( + ds[step_units[0]].attrs["units"] == step_units[1] + ), f"step units mismatch {ds[step_units[0]].attrs['units']} != {step_units[1]}" + @pytest.mark.cache @pytest.mark.parametrize( - "kwargs,dims", + "kwargs,dims,step_units", [ ( { @@ -157,6 +257,7 @@ def test_xr_time_seasonal_monthly_indexing_date(kwargs, dims): "dim_roles": {"step": "forecastMonth"}, "decode_times": False, "decode_timedelta": False, + "dim_name_from_role_name": False, }, { "number": [0, 1, 2], @@ -168,6 +269,7 @@ def test_xr_time_seasonal_monthly_indexing_date(kwargs, dims): ], "forecastMonth": [1, 2, 3, 4, 5, 6], }, + ("forecastMonth", "months"), ), ( { @@ -175,6 +277,7 @@ def test_xr_time_seasonal_monthly_indexing_date(kwargs, dims): "dim_roles": {"step": "fcmonth"}, "decode_times": False, "decode_timedelta": False, + "dim_name_from_role_name": False, }, { "number": [0, 1, 2], @@ -186,6 +289,7 @@ def test_xr_time_seasonal_monthly_indexing_date(kwargs, dims): ], "fcmonth": [1, 2, 3, 4, 5, 6], }, + ("fcmonth", "months"), ), ( { @@ -194,6 +298,7 @@ def test_xr_time_seasonal_monthly_indexing_date(kwargs, dims): "decode_times": False, "decode_timedelta": False, "ensure_dims": ["number", "date", "time", "forecastMonth"], + "dim_name_from_role_name": False, }, { "number": [0, 1, 2], @@ -206,10 +311,73 @@ def test_xr_time_seasonal_monthly_indexing_date(kwargs, dims): "time": [np.timedelta64(0, "s")], "forecastMonth": [1, 2, 3, 4, 5, 6], }, + ("forecastMonth", "months"), + ), + ( + { + "time_dim_mode": "forecast", + "dim_roles": {"step": "forecastMonth"}, + "decode_times": False, + "decode_timedelta": False, + "dim_name_from_role_name": True, + }, + { + "number": [0, 1, 2], + "forecast_reference_time": [ + np.datetime64("1993-10-01", "ns"), + np.datetime64("1994-10-01", "ns"), + np.datetime64("1995-10-01", "ns"), + np.datetime64("1996-10-01", "ns"), + ], + "step": [1, 2, 3, 4, 5, 6], + }, + ("step", "months"), + ), + ( + { + "time_dim_mode": "forecast", + "dim_roles": {"step": "fcmonth"}, + "decode_times": False, + "decode_timedelta": False, + "dim_name_from_role_name": True, + }, + { + "number": [0, 1, 2], + "forecast_reference_time": [ + np.datetime64("1993-10-01", "ns"), + np.datetime64("1994-10-01", "ns"), + np.datetime64("1995-10-01", "ns"), + np.datetime64("1996-10-01", "ns"), + ], + "step": [1, 2, 3, 4, 5, 6], + }, + ("step", "months"), + ), + ( + { + "time_dim_mode": "raw", + "dim_roles": {"step": "forecastMonth"}, + "decode_times": False, + "decode_timedelta": False, + "ensure_dims": ["number", "date", "time", "step"], + "dim_name_from_role_name": True, + }, + { + "number": [0, 1, 2], + "date": [ + np.datetime64("1993-10-01", "ns"), + np.datetime64("1994-10-01", "ns"), + np.datetime64("1995-10-01", "ns"), + np.datetime64("1996-10-01", "ns"), + ], + "time": [np.timedelta64(0, "s")], + "step": [1, 2, 3, 4, 5, 6], + }, + ("step", "months"), ), ], ) -def test_xr_time_seasonal_monthly_simple(kwargs, dims): +def test_xr_time_seasonal_monthly_simple(kwargs, dims, step_units): ds_ek = from_source( "url", earthkit_remote_test_data_file("test-data/xr_engine/date/seasonal_monthly.grib"), @@ -218,32 +386,195 @@ def test_xr_time_seasonal_monthly_simple(kwargs, dims): ds = ds_ek.to_xarray(**kwargs) compare_dims(ds, dims, order_ref_var="2t") + if step_units is not None: + assert ( + ds[step_units[0]].attrs["units"] == step_units[1] + ), f"step units mismatch {ds[step_units[0]].attrs['units']} != {step_units[1]}" + @pytest.mark.cache -def test_xr_valid_time_coord(): +@pytest.mark.parametrize( + "kwargs,dims,step_units,coords", + [ + ( + { + "time_dim_mode": "forecast", + "add_valid_time_coord": True, + "decode_times": False, + "decode_timedelta": False, + "dim_name_from_role_name": True, + }, + { + "forecast_reference_time": [ + np.datetime64("2024-06-03T00", "ns"), + np.datetime64("2024-06-03T12", "ns"), + ], + "step": [0, 6], + }, + ("step", "hours"), + { + "valid_time": [ + [np.datetime64("2024-06-03T00", "ns"), np.datetime64("2024-06-03T06", "ns")], + [np.datetime64("2024-06-03T12", "ns"), np.datetime64("2024-06-03T18", "ns")], + ] + }, + ), + ( + { + "fixed_dims": ["level", "forecast_reference_time", "step"], + "add_valid_time_coord": True, + "decode_times": False, + "decode_timedelta": False, + }, + { + "forecast_reference_time": [ + np.datetime64("2024-06-03T00", "ns"), + np.datetime64("2024-06-03T12", "ns"), + ], + "step": [0, 6], + }, + ("step", "hours"), + { + "valid_time": [ + [np.datetime64("2024-06-03T00", "ns"), np.datetime64("2024-06-03T06", "ns")], + [np.datetime64("2024-06-03T12", "ns"), np.datetime64("2024-06-03T18", "ns")], + ] + }, + ), + ( + { + "fixed_dims": ["level", "step", "forecast_reference_time"], + "add_valid_time_coord": True, + "decode_times": False, + "decode_timedelta": False, + }, + { + "step": [0, 6], + "forecast_reference_time": [ + np.datetime64("2024-06-03T00", "ns"), + np.datetime64("2024-06-03T12", "ns"), + ], + }, + ("step", "hours"), + { + "valid_time": [ + [np.datetime64("2024-06-03T00", "ns"), np.datetime64("2024-06-03T12", "ns")], + [np.datetime64("2024-06-03T06", "ns"), np.datetime64("2024-06-03T18", "ns")], + ] + }, + ), + ( + { + "time_dim_mode": "forecast", + "add_valid_time_coord": True, + "decode_times": False, + "decode_timedelta": False, + "dim_name_from_role_name": False, + }, + { + "forecast_reference_time": [ + np.datetime64("2024-06-03T00", "ns"), + np.datetime64("2024-06-03T12", "ns"), + ], + "step_timedelta": [0, 6], + }, + ("step_timedelta", "hours"), + { + "valid_time": [ + [np.datetime64("2024-06-03T00", "ns"), np.datetime64("2024-06-03T06", "ns")], + [np.datetime64("2024-06-03T12", "ns"), np.datetime64("2024-06-03T18", "ns")], + ] + }, + ), + ], +) +def test_xr_valid_time_coord(kwargs, dims, step_units, coords): ds_ek = from_source("url", earthkit_remote_test_data_file("test-data/xr_engine/level/pl_small.grib")).sel( date=20240603, time=[0, 1200] ) - ds = ds_ek.to_xarray( - time_dim_mode="forecast", add_valid_time_coord=True, decode_times=False, decode_timedelta=False - ) + ds = ds_ek.to_xarray(**kwargs) - dims = { - "forecast_reference_time": [ - np.datetime64("2024-06-03T00", "ns"), - np.datetime64("2024-06-03T12", "ns"), - ], - "step": [0, 6], - } compare_dims(ds, dims, order_ref_var="t") vt = ds.coords["valid_time"] - assert vt.dims == ("forecast_reference_time", "step") + assert vt.dims == tuple(dims.keys()) + + compare_coords(ds, coords) - ref = [ - [np.datetime64("2024-06-03T00", "ns"), np.datetime64("2024-06-03T06", "ns")], - [np.datetime64("2024-06-03T12", "ns"), np.datetime64("2024-06-03T18", "ns")], - ] + if step_units is not None: + assert ( + ds[step_units[0]].attrs["units"] == step_units[1] + ), f"step units mismatch {ds[step_units[0]].attrs['units']} != {step_units[1]}" + + +@pytest.mark.cache +@pytest.mark.parametrize( + "kwargs,dims,step_units", + [ + ( + { + "time_dim_mode": "raw", + "dim_name_from_role_name": True, + "ensure_dims": ["date", "time", "step"], + }, + { + "date": [np.datetime64("2011-12-15", "ns")], + "time": [np.timedelta64(12, "h")], + "step": [ + np.timedelta64(12, "h"), + np.timedelta64(18, "h"), + np.timedelta64(24, "h"), + np.timedelta64(30, "h"), + np.timedelta64(36, "h"), + ], + }, + None, + ), + ], +) +def test_xr_time_step_range_1(kwargs, dims, step_units): + ds_ek = from_source( + "url", earthkit_remote_test_data_file("test-data/xr_engine/date/wgust_step_range.grib1") + ) + + ds = ds_ek.to_xarray(**kwargs) + compare_dims(ds, dims, order_ref_var="10fg6") + + if step_units is not None: + assert ( + ds[step_units[0]].attrs["units"] == step_units[1] + ), f"step units mismatch {ds[step_units[0]].attrs['units']} != {step_units[1]}" + + +@pytest.mark.cache +@pytest.mark.parametrize( + "kwargs,dims,step_units", + [ + ( + { + "time_dim_mode": "raw", + "dim_name_from_role_name": True, + "ensure_dims": ["date", "time", "step"], + }, + { + "date": [np.datetime64("2025-05-27", "ns")], + "time": [np.timedelta64(0, "ns")], + "step": [np.timedelta64(72, "h"), np.timedelta64(73, "h")], + }, + None, + ), + ], +) +def test_xr_time_step_range_2(kwargs, dims, step_units): + ds_ek = from_source( + "url", earthkit_remote_test_data_file("test-data/xr_engine/date/lsp_step_range.grib2") + ) + + ds = ds_ek.to_xarray(**kwargs) + compare_dims(ds, dims, order_ref_var="lsp") - compare_coords(ds, {"valid_time": ref}) + if step_units is not None: + assert ( + ds[step_units[0]].attrs["units"] == step_units[1] + ), f"step units mismatch {ds[step_units[0]].attrs['units']} != {step_units[1]}" diff --git a/tests/xr_engine/test_xr_write.py b/tests/xr_engine/test_xr_write.py index 118bb8c5e..0154690dc 100644 --- a/tests/xr_engine/test_xr_write.py +++ b/tests/xr_engine/test_xr_write.py @@ -211,6 +211,7 @@ def test_xr_write_seasonal(): ds = ds_ek.to_xarray( time_dim_mode="forecast", dim_roles={"date": "indexingDate", "time": "indexingTime", "step": "forecastMonth"}, + dim_name_from_role_name=False, ) import xarray as xr diff --git a/tests/xr_engine/xr_engine_fixtures.py b/tests/xr_engine/xr_engine_fixtures.py index 713f95ef4..208819f43 100644 --- a/tests/xr_engine/xr_engine_fixtures.py +++ b/tests/xr_engine/xr_engine_fixtures.py @@ -90,7 +90,7 @@ def compare_coord(ds, name, ref_vals, mode="coord"): assert np.allclose(ds.coords[name].values, vals), f"{name=} {ds.coords[name].values} != {vals}" -def compare_dim_order(ds, dims, order_ref_var): +def compare_dim_order(ds, dims, order_ref_var, check_coord=True): if order_ref_var is None: return @@ -98,6 +98,8 @@ def compare_dim_order(ds, dims, order_ref_var): for d in ds[order_ref_var].dims: if d in dims: dim_order.append(d) + if check_coord: + assert d in ds.coords, f"{d} not in {ds.coords}" if isinstance(dims, dict): assert dim_order == list(dims.keys()), f"{dim_order=} != {list(dims.keys())}"