From d7bccef66dea45c65989a200a72817f64f06a8da Mon Sep 17 00:00:00 2001 From: Sandor Kertesz Date: Mon, 12 May 2025 11:00:35 +0100 Subject: [PATCH 1/6] Add zarr target --- src/earthkit/data/encoders/zarr.py | 74 ++++++++++++++++++++++++++++++ src/earthkit/data/targets/zarr.py | 50 ++++++++++++++++++++ tests/xr_engine/test_xr_engine.py | 15 +++++- 3 files changed, 138 insertions(+), 1 deletion(-) create mode 100644 src/earthkit/data/encoders/zarr.py create mode 100644 src/earthkit/data/targets/zarr.py diff --git a/src/earthkit/data/encoders/zarr.py b/src/earthkit/data/encoders/zarr.py new file mode 100644 index 000000000..3f05a8eb5 --- /dev/null +++ b/src/earthkit/data/encoders/zarr.py @@ -0,0 +1,74 @@ +# (C) Copyright 2023 ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. +# + +import logging + +from . import EncodedData +from . import Encoder + +LOG = logging.getLogger(__name__) + + +class ZarrEncodedData(EncodedData): + def __init__(self, ds): + self.ds = ds + + def to_bytes(self): + return None + + def to_file(self, f): + return None + + def to_xarray(self): + return self.ds + + def metadata(self, key): + raise NotImplementedError + + +class ZarrEncoder(Encoder): + def __init__(self, **kwargs): + super().__init__(**kwargs) + + def encode( + self, + data=None, + **kwargs, + ): + if data is not None: + from earthkit.data.wrappers import get_wrapper + + data = get_wrapper(data) + return data._encode(self, **kwargs) + else: + raise ValueError("No data to encode") + + def _encode( + self, + data=None, + values=None, + min=None, + max=None, + check_nans=False, + metadata={}, + template=None, + # return_bytes=False, + missing_value=9999, + **kwargs, + ): + return ZarrEncodedData(data.to_xarray(add_earthkit_attrs=False)) + + def _encode_field(self, field, **kwargs): + raise NotImplementedError("ZarrEncoder does not support encoding individual fields.") + + def _encode_fieldlist(self, data, **kwargs): + return self._encode(data, **kwargs) + + +encoder = ZarrEncoder diff --git a/src/earthkit/data/targets/zarr.py b/src/earthkit/data/targets/zarr.py new file mode 100644 index 000000000..384f3a4d4 --- /dev/null +++ b/src/earthkit/data/targets/zarr.py @@ -0,0 +1,50 @@ +# (C) Copyright 2020 ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. +# + +import logging + +from . import SimpleTarget + +LOG = logging.getLogger(__name__) + + +class ZarrTarget(SimpleTarget): + def __init__(self, store, **kwargs): + super().__init__(**kwargs) + self._store = store + self._zarr_kwargs = kwargs + self._encoder = "zarr" + + def close(self): + """Close the target and flush the fdb. + + The target will not be able to write anymore. + + Raises: + ------- + ValueError: If the target is already closed. + """ + pass + + def flush(self): + """Flush the fdb. + + Raises: + ------- + ValueError: If the target is already closed. + """ + pass + + def _write(self, data, **kwargs): + r = self._encode(data, **kwargs) + ds = r.to_xarray() + ds.to_zarr(self._store) + + +target = ZarrTarget diff --git a/tests/xr_engine/test_xr_engine.py b/tests/xr_engine/test_xr_engine.py index 0910f783c..7fbc9f37b 100644 --- a/tests/xr_engine/test_xr_engine.py +++ b/tests/xr_engine/test_xr_engine.py @@ -542,7 +542,7 @@ def test_xr_engine_single_field(): @pytest.mark.cache @pytest.mark.parametrize("add", [False, True]) -def test_xr_engine_add_earthkit_attrs(add): +def test_xr_engine_add_earthkit_attrs_1(add): ds_ek = from_source("url", earthkit_remote_test_data_file("test-data/xr_engine/level/pl.grib")) ds_ek = ds_ek[0] @@ -558,3 +558,16 @@ def test_xr_engine_add_earthkit_attrs(add): assert "_earthkit" in ds["t"].attrs else: assert "_earthkit" not in ds["t"].attrs + + +@pytest.mark.cache +def test_xr_engine_add_earthkit_attrs_2(add): + ds_ek = from_source("url", earthkit_remote_test_data_file("test-data/xr_engine/level/pl.grib")) + ds_ek = ds_ek[0] + + ds = ds_ek.to_xarray( + add_earthkit_attrs=False, + ) + + assert ds + assert "_earthkit" not in ds["t"].attrs From 630bcc92b9479989c008b926fdaa25915d18ef39 Mon Sep 17 00:00:00 2001 From: Sandor Kertesz Date: Mon, 12 May 2025 11:27:43 +0100 Subject: [PATCH 2/6] Add zarr target --- src/earthkit/data/utils/xarray/builder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/earthkit/data/utils/xarray/builder.py b/src/earthkit/data/utils/xarray/builder.py index a303ae8cf..4d0276c82 100644 --- a/src/earthkit/data/utils/xarray/builder.py +++ b/src/earthkit/data/utils/xarray/builder.py @@ -407,7 +407,7 @@ def prepare_tensor(self, ds, dims, name): elif num > 1 or not self.profile.dims.squeeze or d.name in self.profile.dims.ensure_dims: tensor_dims.append(d) tensor_coords[d.key] = vals[d.key] - if d.key in component_vals: + if component_vals and d.key in component_vals: tensor_coords_component[d.key] = component_vals[d.key] # check if the dims/coords are consistent with the tensors of From b4af907279eefc816a07dc53edc4a9bf0c46a3b1 Mon Sep 17 00:00:00 2001 From: Sandor Kertesz Date: Tue, 13 May 2025 09:16:38 +0100 Subject: [PATCH 3/6] Add zarr target --- src/earthkit/data/encoders/zarr.py | 8 +++++++- src/earthkit/data/targets/zarr.py | 9 +++++---- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/earthkit/data/encoders/zarr.py b/src/earthkit/data/encoders/zarr.py index 3f05a8eb5..2e9b05085 100644 --- a/src/earthkit/data/encoders/zarr.py +++ b/src/earthkit/data/encoders/zarr.py @@ -68,7 +68,13 @@ def _encode_field(self, field, **kwargs): raise NotImplementedError("ZarrEncoder does not support encoding individual fields.") def _encode_fieldlist(self, data, **kwargs): - return self._encode(data, **kwargs) + earthkit_to_xarray_kwargs = kwargs.pop("earthkit_to_xarray_kwargs", {}) + # earthkit_to_xarray_kwargs.update(kwargs) + earthkit_to_xarray_kwargs["add_earthkit_attrs"] = False + kwargs = earthkit_to_xarray_kwargs + + ds = data.to_xarray(**kwargs) + return ZarrEncodedData(ds) encoder = ZarrEncoder diff --git a/src/earthkit/data/targets/zarr.py b/src/earthkit/data/targets/zarr.py index 384f3a4d4..9bb4fdd6f 100644 --- a/src/earthkit/data/targets/zarr.py +++ b/src/earthkit/data/targets/zarr.py @@ -15,10 +15,11 @@ class ZarrTarget(SimpleTarget): - def __init__(self, store, **kwargs): + def __init__(self, **kwargs): super().__init__(**kwargs) - self._store = store self._zarr_kwargs = kwargs + self._ekd_kwargs = kwargs.pop("earthkit_to_xarray_kwargs", {}) + self._xr_kwargs = kwargs.pop("xarray_to_zarr_kwargs", {}) self._encoder = "zarr" def close(self): @@ -42,9 +43,9 @@ def flush(self): pass def _write(self, data, **kwargs): - r = self._encode(data, **kwargs) + r = self._encode(data, earthkit_to_xarray_kwargs=self._ekd_kwargs) ds = r.to_xarray() - ds.to_zarr(self._store) + ds.to_zarr(**self._xr_kwargs) target = ZarrTarget From 5f3f792541c07842ec47dd7b867bca85256998b5 Mon Sep 17 00:00:00 2001 From: Sandor Kertesz Date: Thu, 29 May 2025 13:46:45 +0100 Subject: [PATCH 4/6] Add zarr target --- docs/examples/grib_to_file_target.ipynb | 6 +- docs/examples/grib_to_zarr_target.ipynb | 822 ++++++ docs/examples/index.rst | 1 + docs/examples/list_of_dicts_to_xarray.ipynb | 1930 +++++++------- docs/examples/xarray_engine_chunks.ipynb | 2484 +++++++++---------- docs/guide/targets/index.rst | 1 + docs/guide/targets/to_target.rst | 23 + 7 files changed, 3057 insertions(+), 2210 deletions(-) create mode 100644 docs/examples/grib_to_zarr_target.ipynb diff --git a/docs/examples/grib_to_file_target.ipynb b/docs/examples/grib_to_file_target.ipynb index c37ffa05f..97bcf1dad 100644 --- a/docs/examples/grib_to_file_target.ipynb +++ b/docs/examples/grib_to_file_target.ipynb @@ -442,9 +442,9 @@ ], "metadata": { "kernelspec": { - "display_name": "dev_ecc", + "display_name": "dev", "language": "python", - "name": "dev_ecc" + "name": "dev" }, "language_info": { "codemirror_mode": { @@ -456,7 +456,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.13" + "version": "3.11.12" } }, "nbformat": 4, diff --git a/docs/examples/grib_to_zarr_target.ipynb b/docs/examples/grib_to_zarr_target.ipynb new file mode 100644 index 000000000..91a8aa546 --- /dev/null +++ b/docs/examples/grib_to_zarr_target.ipynb @@ -0,0 +1,822 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "8e308cd3-7f5a-4b62-bd2d-027850282c00", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Writing GRIB data to zarr" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "62b00621-67cd-46b0-81ef-16278a6eee18", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "102f8028287b46f9be4682e5d04cbfd8", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "pl.grib: 0%| | 0.00/48.8k [00:00\n", + "#T_62fe6 th {\n", + " text-align: left;\n", + "}\n", + "#T_62fe6_row0_col0, #T_62fe6_row0_col1, #T_62fe6_row0_col2, #T_62fe6_row0_col3, #T_62fe6_row0_col4, #T_62fe6_row0_col5, #T_62fe6_row0_col6, #T_62fe6_row0_col7, #T_62fe6_row0_col8, #T_62fe6_row1_col0, #T_62fe6_row1_col1, #T_62fe6_row1_col2, #T_62fe6_row1_col3, #T_62fe6_row1_col4, #T_62fe6_row1_col5, #T_62fe6_row1_col6, #T_62fe6_row1_col7, #T_62fe6_row1_col8 {\n", + " text-align: left;\n", + "}\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
  leveldatetimestepparamIdclassstreamtypeexperimentVersionNumber
shortNametypeOfLevel         
risobaricInhPa700,50020240603,202406040,12000,6157odoperfc0001
tisobaricInhPa700,50020240603,202406040,12000,6130odoperfc0001
\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds.describe()" + ] + }, + { + "cell_type": "markdown", + "id": "2ab4d979-7c02-42e4-9b52-8ec12e76853b", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "#### Using to_target() on the data object" + ] + }, + { + "cell_type": "raw", + "id": "60ee891e-f0cd-426c-8f26-8155e9c25381", + "metadata": { + "editable": true, + "raw_mimetype": "text/restructuredtext", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "We use :func:`to_target` to write the GRIB fieldlist/field into a zarr store. First, the data is converted to Xarray then :py:func:`xarray.Dataset.to_zarr` is called to generate the zarr store. We need to set the kwargs accordingly." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "e3ff25d0-bce4-4cfc-bdd0-93ca0864d08d", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/homebrew/Caskroom/miniforge/base/envs/dev/lib/python3.11/site-packages/zarr/api/asynchronous.py:205: UserWarning: Consolidated metadata is currently not part in the Zarr format 3 specification. It may not be supported by other zarr implementations and may change in the future.\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "# with these options each field will be a separate chunk\n", + "ds.to_target(\"zarr\", \n", + " earthkit_to_xarray_kwargs={\"chunks\": {\"forecast_reference_time\": 1, \n", + " \"step\": 1, \n", + " \"levelist\": 1}},\n", + " xarray_to_zarr_kwargs={\"store\": \"_pl.zarr\", \"mode\": \"w\"})" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "3ffafb60-c412-4560-9bea-089143bcf85d", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
/\n",
+       "├── forecast_reference_time (4,) int64\n",
+       "├── latitude (19,) float64\n",
+       "├── levelist (2,) int64\n",
+       "├── longitude (36,) float64\n",
+       "├── r (4, 2, 2, 19, 36) float64\n",
+       "├── step (2,) int64\n",
+       "└── t (4, 2, 2, 19, 36) float64\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1m/\u001b[0m\n", + "├── \u001b[1mforecast_reference_time\u001b[0m (4,) int64\n", + "├── \u001b[1mlatitude\u001b[0m (19,) float64\n", + "├── \u001b[1mlevelist\u001b[0m (2,) int64\n", + "├── \u001b[1mlongitude\u001b[0m (36,) float64\n", + "├── \u001b[1mr\u001b[0m (4, 2, 2, 19, 36) float64\n", + "├── \u001b[1mstep\u001b[0m (2,) int64\n", + "└── \u001b[1mt\u001b[0m (4, 2, 2, 19, 36) float64\n" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import zarr \n", + "root = zarr.group(\"_pl.zarr\")\n", + "root.tree()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "515bc071-d45f-48aa-abea-0cc688f4eebc", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Type : Array\n", + "Zarr format : 3\n", + "Data type : DataType.float64\n", + "Shape : (4, 2, 2, 19, 36)\n", + "Chunk shape : (1, 1, 1, 19, 36)\n", + "Order : C\n", + "Read-only : False\n", + "Store type : LocalStore\n", + "Filters : ()\n", + "Serializer : BytesCodec(endian=)\n", + "Compressors : (ZstdCodec(level=0, checksum=False),)\n", + "No. bytes : 87552 (85.5K)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "root[\"t\"].info" + ] + }, + { + "cell_type": "markdown", + "id": "ef19bc33-fcc7-4b5a-83b0-ee59da4179f0", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "The zarr store can be loaded to Xarray to check its content." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "706d4467-64b8-46bf-894d-346088208fa2", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/93/w0p869rx17q98wxk83gn9ys40000gn/T/ipykernel_35742/754541422.py:2: FutureWarning: In a future version of xarray decode_timedelta will default to False rather than None. To silence this warning, set decode_timedelta to True, False, or a 'CFTimedeltaCoder' instance.\n", + " xarray.open_dataset(\"_pl.zarr\")\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 176kB\n",
+       "Dimensions:                  (levelist: 2, forecast_reference_time: 4, step: 2,\n",
+       "                              latitude: 19, longitude: 36)\n",
+       "Coordinates:\n",
+       "  * levelist                 (levelist) int64 16B 500 700\n",
+       "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 32B 202...\n",
+       "  * step                     (step) timedelta64[ns] 16B 00:00:00 06:00:00\n",
+       "  * longitude                (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n",
+       "  * latitude                 (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n",
+       "Data variables:\n",
+       "    r                        (forecast_reference_time, step, levelist, latitude, longitude) float64 88kB ...\n",
+       "    t                        (forecast_reference_time, step, levelist, latitude, longitude) float64 88kB ...\n",
+       "Attributes:\n",
+       "    class:        od\n",
+       "    stream:       oper\n",
+       "    levtype:      pl\n",
+       "    type:         fc\n",
+       "    expver:       0001\n",
+       "    date:         20240603\n",
+       "    time:         0\n",
+       "    domain:       g\n",
+       "    number:       0\n",
+       "    Conventions:  CF-1.8\n",
+       "    institution:  ECMWF
" + ], + "text/plain": [ + " Size: 176kB\n", + "Dimensions: (levelist: 2, forecast_reference_time: 4, step: 2,\n", + " latitude: 19, longitude: 36)\n", + "Coordinates:\n", + " * levelist (levelist) int64 16B 500 700\n", + " * forecast_reference_time (forecast_reference_time) datetime64[ns] 32B 202...\n", + " * step (step) timedelta64[ns] 16B 00:00:00 06:00:00\n", + " * longitude (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n", + " * latitude (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n", + "Data variables:\n", + " r (forecast_reference_time, step, levelist, latitude, longitude) float64 88kB ...\n", + " t (forecast_reference_time, step, levelist, latitude, longitude) float64 88kB ...\n", + "Attributes:\n", + " class: od\n", + " stream: oper\n", + " levtype: pl\n", + " type: fc\n", + " expver: 0001\n", + " date: 20240603\n", + " time: 0\n", + " domain: g\n", + " number: 0\n", + " Conventions: CF-1.8\n", + " institution: ECMWF" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import xarray\n", + "xarray.open_dataset(\"_pl.zarr\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8479a12e-e907-43de-a3a6-aefb8cbfa754", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "dev", + "language": "python", + "name": "dev" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/examples/index.rst b/docs/examples/index.rst index f9f5af4a9..05fd8a143 100644 --- a/docs/examples/index.rst +++ b/docs/examples/index.rst @@ -178,6 +178,7 @@ Targets and encoders grib_to_file_pattern_target.ipynb grib_to_fdb_target.ipynb grib_to_geotiff.ipynb + grib_to_zarr_target.ipynb grib_encoder.ipynb Miscellaneous diff --git a/docs/examples/list_of_dicts_to_xarray.ipynb b/docs/examples/list_of_dicts_to_xarray.ipynb index 955d60420..322f3097c 100644 --- a/docs/examples/list_of_dicts_to_xarray.ipynb +++ b/docs/examples/list_of_dicts_to_xarray.ipynb @@ -1,968 +1,968 @@ { - "cells": [ - { - "cell_type": "markdown", - "id": "ee0f0104-8077-45f1-9746-58f29b64db92", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" + "cells": [ + { + "cell_type": "markdown", + "id": "ee0f0104-8077-45f1-9746-58f29b64db92", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## list-of-dict: converting to Xarray" + ] + }, + { + "cell_type": "raw", + "id": "6cadbfbf-c7af-4927-8927-c320d9160c4f", + "metadata": { + "editable": true, + "raw_mimetype": "text/restructuredtext", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "This example demonstrates how :ref:`data-sources-lod` fieldlists can be converted into Xarray." + ] + }, + { + "cell_type": "markdown", + "id": "2e087423-8c96-49b4-984c-f15472fa8381", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "#### Data containing geography" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "1e5ebf7a-2fc6-453a-9e14-6b04b5135810", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 248B\n",
+                            "Dimensions:    (levelist: 2, latitude: 3, longitude: 2)\n",
+                            "Coordinates:\n",
+                            "  * levelist   (levelist) int64 16B 500 850\n",
+                            "  * latitude   (latitude) float64 24B 10.0 0.0 -10.0\n",
+                            "  * longitude  (longitude) float64 16B 20.0 40.0\n",
+                            "Data variables:\n",
+                            "    t          (levelist, latitude, longitude) float64 96B ...\n",
+                            "    u          (levelist, latitude, longitude) float64 96B ...\n",
+                            "Attributes:\n",
+                            "    Conventions:  CF-1.8\n",
+                            "    institution:  ECMWF
" + ], + "text/plain": [ + " Size: 248B\n", + "Dimensions: (levelist: 2, latitude: 3, longitude: 2)\n", + "Coordinates:\n", + " * levelist (levelist) int64 16B 500 850\n", + " * latitude (latitude) float64 24B 10.0 0.0 -10.0\n", + " * longitude (longitude) float64 16B 20.0 40.0\n", + "Data variables:\n", + " t (levelist, latitude, longitude) float64 96B ...\n", + " u (levelist, latitude, longitude) float64 96B ...\n", + "Attributes:\n", + " Conventions: CF-1.8\n", + " institution: ECMWF" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import earthkit.data as ekd\n", + "\n", + "prototype = {\n", + " \"latitudes\": [10.0, 0.0, -10.0],\n", + " \"longitudes\": [20, 40.0],\n", + " \"values\": [1, 2, 3, 4, 5, 6],\n", + " \"valid_datetime\": \"2018-08-01T09:00:00Z\",\n", + " }\n", + "\n", + "d = [\n", + " {\"param\": \"t\", \"level\": 500, **prototype},\n", + " {\"param\": \"t\", \"level\": 850, **prototype},\n", + " {\"param\": \"u\", \"level\": 500, **prototype},\n", + " {\"param\": \"u\", \"level\": 850, **prototype},\n", + " ]\n", + "\n", + "ds = ekd.from_source(\"list-of-dicts\", d)\n", + "ds.to_xarray()" + ] + }, + { + "cell_type": "markdown", + "id": "94b46ec8-614b-480a-8ffe-0b1dd4e344bb", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "#### Data without geography" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "7ea3d8bf-a432-4aef-94d9-5ac0c6b19503", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 208B\n",
+                            "Dimensions:   (levelist: 2, values: 6)\n",
+                            "Coordinates:\n",
+                            "  * levelist  (levelist) int64 16B 500 850\n",
+                            "Dimensions without coordinates: values\n",
+                            "Data variables:\n",
+                            "    t         (levelist, values) float64 96B ...\n",
+                            "    u         (levelist, values) float64 96B ...\n",
+                            "Attributes:\n",
+                            "    Conventions:  CF-1.8\n",
+                            "    institution:  ECMWF
" + ], + "text/plain": [ + " Size: 208B\n", + "Dimensions: (levelist: 2, values: 6)\n", + "Coordinates:\n", + " * levelist (levelist) int64 16B 500 850\n", + "Dimensions without coordinates: values\n", + "Data variables:\n", + " t (levelist, values) float64 96B ...\n", + " u (levelist, values) float64 96B ...\n", + "Attributes:\n", + " Conventions: CF-1.8\n", + " institution: ECMWF" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "prototype = {\n", + " \"values\": [1, 2, 3, 4, 5, 6],\n", + " \"valid_datetime\": \"2018-08-01T09:00:00Z\",\n", + " }\n", + "\n", + "d = [\n", + " {\"param\": \"t\", \"level\": 500, **prototype},\n", + " {\"param\": \"t\", \"level\": 850, **prototype},\n", + " {\"param\": \"u\", \"level\": 500, **prototype},\n", + " {\"param\": \"u\", \"level\": 850, **prototype},\n", + " ]\n", + "\n", + "ds = ekd.from_source(\"list-of-dicts\", d)\n", + "ds.to_xarray()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c6463409-7686-4d90-8cab-00a04b7119bb", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "dev", + "language": "python", + "name": "dev" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.12" + } }, - "tags": [] - }, - "source": [ - "## list-of-dict: converting to Xarray" - ] - }, - { - "cell_type": "raw", - "id": "6cadbfbf-c7af-4927-8927-c320d9160c4f", - "metadata": { - "editable": true, - "raw_mimetype": "text/x-rst", - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "source": [ - "This example demonstrates how :ref:`data-sources-lod` fieldlists can be converted into Xarray." - ] - }, - { - "cell_type": "markdown", - "id": "2e087423-8c96-49b4-984c-f15472fa8381", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "source": [ - "#### Data containing geography" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "1e5ebf7a-2fc6-453a-9e14-6b04b5135810", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.Dataset> Size: 248B\n",
-       "Dimensions:    (levelist: 2, latitude: 3, longitude: 2)\n",
-       "Coordinates:\n",
-       "  * levelist   (levelist) int64 16B 500 850\n",
-       "  * latitude   (latitude) float64 24B 10.0 0.0 -10.0\n",
-       "  * longitude  (longitude) float64 16B 20.0 40.0\n",
-       "Data variables:\n",
-       "    t          (levelist, latitude, longitude) float64 96B ...\n",
-       "    u          (levelist, latitude, longitude) float64 96B ...\n",
-       "Attributes:\n",
-       "    Conventions:  CF-1.8\n",
-       "    institution:  ECMWF
" - ], - "text/plain": [ - " Size: 248B\n", - "Dimensions: (levelist: 2, latitude: 3, longitude: 2)\n", - "Coordinates:\n", - " * levelist (levelist) int64 16B 500 850\n", - " * latitude (latitude) float64 24B 10.0 0.0 -10.0\n", - " * longitude (longitude) float64 16B 20.0 40.0\n", - "Data variables:\n", - " t (levelist, latitude, longitude) float64 96B ...\n", - " u (levelist, latitude, longitude) float64 96B ...\n", - "Attributes:\n", - " Conventions: CF-1.8\n", - " institution: ECMWF" - ] - }, - "execution_count": 1, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import earthkit.data as ekd\n", - "\n", - "prototype = {\n", - " \"latitudes\": [10.0, 0.0, -10.0],\n", - " \"longitudes\": [20, 40.0],\n", - " \"values\": [1, 2, 3, 4, 5, 6],\n", - " \"valid_datetime\": \"2018-08-01T09:00:00Z\",\n", - " }\n", - "\n", - "d = [\n", - " {\"param\": \"t\", \"level\": 500, **prototype},\n", - " {\"param\": \"t\", \"level\": 850, **prototype},\n", - " {\"param\": \"u\", \"level\": 500, **prototype},\n", - " {\"param\": \"u\", \"level\": 850, **prototype},\n", - " ]\n", - "\n", - "ds = ekd.from_source(\"list-of-dicts\", d)\n", - "ds.to_xarray()" - ] - }, - { - "cell_type": "markdown", - "id": "94b46ec8-614b-480a-8ffe-0b1dd4e344bb", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "source": [ - "#### Data without geography" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "7ea3d8bf-a432-4aef-94d9-5ac0c6b19503", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.Dataset> Size: 208B\n",
-       "Dimensions:   (levelist: 2, values: 6)\n",
-       "Coordinates:\n",
-       "  * levelist  (levelist) int64 16B 500 850\n",
-       "Dimensions without coordinates: values\n",
-       "Data variables:\n",
-       "    t         (levelist, values) float64 96B ...\n",
-       "    u         (levelist, values) float64 96B ...\n",
-       "Attributes:\n",
-       "    Conventions:  CF-1.8\n",
-       "    institution:  ECMWF
" - ], - "text/plain": [ - " Size: 208B\n", - "Dimensions: (levelist: 2, values: 6)\n", - "Coordinates:\n", - " * levelist (levelist) int64 16B 500 850\n", - "Dimensions without coordinates: values\n", - "Data variables:\n", - " t (levelist, values) float64 96B ...\n", - " u (levelist, values) float64 96B ...\n", - "Attributes:\n", - " Conventions: CF-1.8\n", - " institution: ECMWF" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "prototype = {\n", - " \"values\": [1, 2, 3, 4, 5, 6],\n", - " \"valid_datetime\": \"2018-08-01T09:00:00Z\",\n", - " }\n", - "\n", - "d = [\n", - " {\"param\": \"t\", \"level\": 500, **prototype},\n", - " {\"param\": \"t\", \"level\": 850, **prototype},\n", - " {\"param\": \"u\", \"level\": 500, **prototype},\n", - " {\"param\": \"u\", \"level\": 850, **prototype},\n", - " ]\n", - "\n", - "ds = ekd.from_source(\"list-of-dicts\", d)\n", - "ds.to_xarray()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c6463409-7686-4d90-8cab-00a04b7119bb", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "dev", - "language": "python", - "name": "dev" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.12" - } - }, - "nbformat": 4, - "nbformat_minor": 5 + "nbformat": 4, + "nbformat_minor": 5 } diff --git a/docs/examples/xarray_engine_chunks.ipynb b/docs/examples/xarray_engine_chunks.ipynb index 39eb8f6f9..0d785fb51 100644 --- a/docs/examples/xarray_engine_chunks.ipynb +++ b/docs/examples/xarray_engine_chunks.ipynb @@ -1,1245 +1,1245 @@ { - "cells": [ - { - "cell_type": "markdown", - "id": "f3568669-9884-491d-8597-5130ad273337", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" + "cells": [ + { + "cell_type": "markdown", + "id": "f3568669-9884-491d-8597-5130ad273337", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Xarray engine: chunks" + ] + }, + { + "cell_type": "raw", + "id": "b42eccf8-abcc-44a1-8406-f8aa966b1bf5", + "metadata": { + "editable": true, + "raw_mimetype": "text/restructuredtext", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "This notebook demonstrates how to use chunking in computations when a GRIB fieldlist is converted to to Xarray with :py:meth:`~data.readers.grib.index.GribFieldList.to_xarray`. Chunking can be used to handle data that does not fit into memory." + ] + }, + { + "cell_type": "markdown", + "id": "8b1ceb8a-967d-4324-9af3-3b6eec468da1", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "First, we get 2m temperature data for a whole year on a low resolution regular latitude-longitude grid. It contains 2 fields per day (at 0 and 12 UTC). This data obviously fit into memory, so only used for demonstration purposes." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "3a4f7dd0-f443-4cda-8725-cd61927d1409", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "98299fdfafa74aa5b8cbc0f95188b8d5", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "t2_1_year_hourly.grib: 0%| | 0.00/429k [00:00\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.DataArray '2t' (valid_time: 732, latitude: 13, longitude: 24)> Size: 2MB\n",
+                            "dask.array<open_dataset-2t, shape=(732, 13, 24), dtype=float64, chunksize=(10, 13, 24), chunktype=numpy.ndarray>\n",
+                            "Coordinates:\n",
+                            "  * valid_time  (valid_time) datetime64[ns] 6kB 2020-01-01 ... 2020-12-31T06:...\n",
+                            "  * latitude    (latitude) float64 104B 90.0 75.0 60.0 ... -60.0 -75.0 -90.0\n",
+                            "  * longitude   (longitude) float64 192B 0.0 15.0 30.0 ... 315.0 330.0 345.0\n",
+                            "Attributes:\n",
+                            "    standard_name:  air_temperature\n",
+                            "    long_name:      2 metre temperature\n",
+                            "    units:          K
" + ], + "text/plain": [ + " Size: 2MB\n", + "dask.array\n", + "Coordinates:\n", + " * valid_time (valid_time) datetime64[ns] 6kB 2020-01-01 ... 2020-12-31T06:...\n", + " * latitude (latitude) float64 104B 90.0 75.0 60.0 ... -60.0 -75.0 -90.0\n", + " * longitude (longitude) float64 192B 0.0 15.0 30.0 ... 315.0 330.0 345.0\n", + "Attributes:\n", + " standard_name: air_temperature\n", + " long_name: 2 metre temperature\n", + " units: K" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds = ds_fl.to_xarray(time_dim_mode=\"valid_time\", \n", + " chunks={\"valid_time\": 10}, \n", + " add_earthkit_attrs=False)\n", + "ds[\"2t\"]" + ] + }, + { + "cell_type": "markdown", + "id": "d5caa260-5e6c-432b-96b7-ea84cb261432", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "We compute the mean along the temporal dimension. Xarray will load data in chunks for this computation keeping the memory usage low." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "46e0abd9-7866-4e9f-9c89-c9234b372bc2", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.DataArray '2t' (latitude: 13, longitude: 24)> Size: 2kB\n",
+                            "array([[259.17798273, 259.17798273, 259.17798273, 259.17798273,\n",
+                            "        259.17798273, 259.17798273, 259.17798273, 259.17798273,\n",
+                            "        259.17798273, 259.17798273, 259.17798273, 259.17798273,\n",
+                            "        259.17798273, 259.17798273, 259.17798273, 259.17798273,\n",
+                            "        259.17798273, 259.17798273, 259.17798273, 259.17798273,\n",
+                            "        259.17798273, 259.17798273, 259.17798273, 259.17798273],\n",
+                            "       [273.2611026 , 275.61228088, 275.48984236, 274.29307835,\n",
+                            "        268.16812105, 267.89195131, 264.09208792, 262.4144496 ,\n",
+                            "        262.67648853, 261.67375629, 261.81749775, 261.75990725,\n",
+                            "        261.65672248, 261.12205718, 260.31177713, 259.69160124,\n",
+                            "        259.44480308, 258.91397999, 256.69544345, 261.1351634 ,\n",
+                            "        263.80255581, 245.3709899 , 246.22366237, 263.91035124],\n",
+                            "       [281.80054932, 277.3069957 , 278.84242945, 276.02408075,\n",
+                            "        274.49351381, 274.21627678, 274.1331996 , 272.61215281,\n",
+                            "        271.49176346, 269.63533416, 273.44181469, 275.6214595 ,\n",
+                            "        276.75602234, 275.00730308, 276.87611285, 273.58944106,\n",
+                            "        271.92337207, 268.99705718, 266.13354113, 265.23450595,\n",
+                            "        271.60276073, 273.63473648, 279.19937105, 281.8119052 ],\n",
+                            "       [284.15830206, 283.85715793, 286.20103601, 283.92187788,\n",
+                            "        283.76810397, 284.21051346, 282.39472624, 279.20961695,\n",
+                            "...\n",
+                            "        283.64451278, 283.35801176, 282.91684031, 282.9554759 ,\n",
+                            "        282.14711695, 282.26140144, 281.0409011 , 280.42200595],\n",
+                            "       [269.24513607, 270.23454864, 271.63341305, 271.82437105,\n",
+                            "        271.60942057, 270.65992432, 270.95410978, 271.93656367,\n",
+                            "        273.56741237, 274.12688129, 273.18558177, 275.06365554,\n",
+                            "        275.63789564, 274.6907901 , 272.28731504, 273.95432323,\n",
+                            "        275.29179762, 275.50107016, 275.76251141, 276.15411831,\n",
+                            "        273.34336866, 269.42683006, 268.963758  , 268.6785804 ],\n",
+                            "       [234.73941544, 228.72784611, 229.33594038, 225.86723307,\n",
+                            "        238.04060226, 241.2718608 , 229.15774707, 226.06100868,\n",
+                            "        224.74338573, 228.59588744, 232.7254554 , 258.2240039 ,\n",
+                            "        257.83465964, 258.4833106 , 262.08999605, 259.26575595,\n",
+                            "        256.58991066, 260.79205376, 251.32129728, 250.12530172,\n",
+                            "        253.13652952, 256.13648682, 258.81438129, 254.06318594],\n",
+                            "       [227.70048102, 227.70048102, 227.70048102, 227.70048102,\n",
+                            "        227.70048102, 227.70048102, 227.70048102, 227.70048102,\n",
+                            "        227.70048102, 227.70048102, 227.70048102, 227.70048102,\n",
+                            "        227.70048102, 227.70048102, 227.70048102, 227.70048102,\n",
+                            "        227.70048102, 227.70048102, 227.70048102, 227.70048102,\n",
+                            "        227.70048102, 227.70048102, 227.70048102, 227.70048102]])\n",
+                            "Coordinates:\n",
+                            "  * latitude   (latitude) float64 104B 90.0 75.0 60.0 45.0 ... -60.0 -75.0 -90.0\n",
+                            "  * longitude  (longitude) float64 192B 0.0 15.0 30.0 45.0 ... 315.0 330.0 345.0
" + ], + "text/plain": [ + " Size: 2kB\n", + "array([[259.17798273, 259.17798273, 259.17798273, 259.17798273,\n", + " 259.17798273, 259.17798273, 259.17798273, 259.17798273,\n", + " 259.17798273, 259.17798273, 259.17798273, 259.17798273,\n", + " 259.17798273, 259.17798273, 259.17798273, 259.17798273,\n", + " 259.17798273, 259.17798273, 259.17798273, 259.17798273,\n", + " 259.17798273, 259.17798273, 259.17798273, 259.17798273],\n", + " [273.2611026 , 275.61228088, 275.48984236, 274.29307835,\n", + " 268.16812105, 267.89195131, 264.09208792, 262.4144496 ,\n", + " 262.67648853, 261.67375629, 261.81749775, 261.75990725,\n", + " 261.65672248, 261.12205718, 260.31177713, 259.69160124,\n", + " 259.44480308, 258.91397999, 256.69544345, 261.1351634 ,\n", + " 263.80255581, 245.3709899 , 246.22366237, 263.91035124],\n", + " [281.80054932, 277.3069957 , 278.84242945, 276.02408075,\n", + " 274.49351381, 274.21627678, 274.1331996 , 272.61215281,\n", + " 271.49176346, 269.63533416, 273.44181469, 275.6214595 ,\n", + " 276.75602234, 275.00730308, 276.87611285, 273.58944106,\n", + " 271.92337207, 268.99705718, 266.13354113, 265.23450595,\n", + " 271.60276073, 273.63473648, 279.19937105, 281.8119052 ],\n", + " [284.15830206, 283.85715793, 286.20103601, 283.92187788,\n", + " 283.76810397, 284.21051346, 282.39472624, 279.20961695,\n", + "...\n", + " 283.64451278, 283.35801176, 282.91684031, 282.9554759 ,\n", + " 282.14711695, 282.26140144, 281.0409011 , 280.42200595],\n", + " [269.24513607, 270.23454864, 271.63341305, 271.82437105,\n", + " 271.60942057, 270.65992432, 270.95410978, 271.93656367,\n", + " 273.56741237, 274.12688129, 273.18558177, 275.06365554,\n", + " 275.63789564, 274.6907901 , 272.28731504, 273.95432323,\n", + " 275.29179762, 275.50107016, 275.76251141, 276.15411831,\n", + " 273.34336866, 269.42683006, 268.963758 , 268.6785804 ],\n", + " [234.73941544, 228.72784611, 229.33594038, 225.86723307,\n", + " 238.04060226, 241.2718608 , 229.15774707, 226.06100868,\n", + " 224.74338573, 228.59588744, 232.7254554 , 258.2240039 ,\n", + " 257.83465964, 258.4833106 , 262.08999605, 259.26575595,\n", + " 256.58991066, 260.79205376, 251.32129728, 250.12530172,\n", + " 253.13652952, 256.13648682, 258.81438129, 254.06318594],\n", + " [227.70048102, 227.70048102, 227.70048102, 227.70048102,\n", + " 227.70048102, 227.70048102, 227.70048102, 227.70048102,\n", + " 227.70048102, 227.70048102, 227.70048102, 227.70048102,\n", + " 227.70048102, 227.70048102, 227.70048102, 227.70048102,\n", + " 227.70048102, 227.70048102, 227.70048102, 227.70048102,\n", + " 227.70048102, 227.70048102, 227.70048102, 227.70048102]])\n", + "Coordinates:\n", + " * latitude (latitude) float64 104B 90.0 75.0 60.0 45.0 ... -60.0 -75.0 -90.0\n", + " * longitude (longitude) float64 192B 0.0 15.0 30.0 45.0 ... 315.0 330.0 345.0" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "m = ds[\"2t\"].mean(dim=\"valid_time\").load()\n", + "m" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eb9d85ea-a52f-4dc6-b081-7688f9c90536", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "dev", + "language": "python", + "name": "dev" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.12" + } }, - "tags": [] - }, - "source": [ - "## Xarray engine: chunks" - ] - }, - { - "cell_type": "raw", - "id": "b42eccf8-abcc-44a1-8406-f8aa966b1bf5", - "metadata": { - "editable": true, - "raw_mimetype": "text/x-rst", - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "source": [ - "This notebook demonstrates how to use chunking in computations when a GRIB fieldlist is converted to to Xarray with :py:meth:`~data.readers.grib.index.GribFieldList.to_xarray`. Chunking can be used to handle data that does not fit into memory." - ] - }, - { - "cell_type": "markdown", - "id": "8b1ceb8a-967d-4324-9af3-3b6eec468da1", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "source": [ - "First, we get 2m temperature data for a whole year on a low resolution regular latitude-longitude grid. It contains 2 fields per day (at 0 and 12 UTC). This data obviously fit into memory, so only used for demonstration purposes." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "3a4f7dd0-f443-4cda-8725-cd61927d1409", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "98299fdfafa74aa5b8cbc0f95188b8d5", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "t2_1_year_hourly.grib: 0%| | 0.00/429k [00:00\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.DataArray '2t' (valid_time: 732, latitude: 13, longitude: 24)> Size: 2MB\n",
-       "dask.array<open_dataset-2t, shape=(732, 13, 24), dtype=float64, chunksize=(10, 13, 24), chunktype=numpy.ndarray>\n",
-       "Coordinates:\n",
-       "  * valid_time  (valid_time) datetime64[ns] 6kB 2020-01-01 ... 2020-12-31T06:...\n",
-       "  * latitude    (latitude) float64 104B 90.0 75.0 60.0 ... -60.0 -75.0 -90.0\n",
-       "  * longitude   (longitude) float64 192B 0.0 15.0 30.0 ... 315.0 330.0 345.0\n",
-       "Attributes:\n",
-       "    standard_name:  air_temperature\n",
-       "    long_name:      2 metre temperature\n",
-       "    units:          K
" - ], - "text/plain": [ - " Size: 2MB\n", - "dask.array\n", - "Coordinates:\n", - " * valid_time (valid_time) datetime64[ns] 6kB 2020-01-01 ... 2020-12-31T06:...\n", - " * latitude (latitude) float64 104B 90.0 75.0 60.0 ... -60.0 -75.0 -90.0\n", - " * longitude (longitude) float64 192B 0.0 15.0 30.0 ... 315.0 330.0 345.0\n", - "Attributes:\n", - " standard_name: air_temperature\n", - " long_name: 2 metre temperature\n", - " units: K" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ds = ds_fl.to_xarray(time_dim_mode=\"valid_time\", \n", - " chunks={\"valid_time\": 10}, \n", - " add_earthkit_attrs=False)\n", - "ds[\"2t\"]" - ] - }, - { - "cell_type": "markdown", - "id": "d5caa260-5e6c-432b-96b7-ea84cb261432", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "source": [ - "We compute the mean along the temporal dimension. Xarray will load data in chunks for this computation keeping the memory usage low." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "46e0abd9-7866-4e9f-9c89-c9234b372bc2", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.DataArray '2t' (latitude: 13, longitude: 24)> Size: 2kB\n",
-       "array([[259.17798273, 259.17798273, 259.17798273, 259.17798273,\n",
-       "        259.17798273, 259.17798273, 259.17798273, 259.17798273,\n",
-       "        259.17798273, 259.17798273, 259.17798273, 259.17798273,\n",
-       "        259.17798273, 259.17798273, 259.17798273, 259.17798273,\n",
-       "        259.17798273, 259.17798273, 259.17798273, 259.17798273,\n",
-       "        259.17798273, 259.17798273, 259.17798273, 259.17798273],\n",
-       "       [273.2611026 , 275.61228088, 275.48984236, 274.29307835,\n",
-       "        268.16812105, 267.89195131, 264.09208792, 262.4144496 ,\n",
-       "        262.67648853, 261.67375629, 261.81749775, 261.75990725,\n",
-       "        261.65672248, 261.12205718, 260.31177713, 259.69160124,\n",
-       "        259.44480308, 258.91397999, 256.69544345, 261.1351634 ,\n",
-       "        263.80255581, 245.3709899 , 246.22366237, 263.91035124],\n",
-       "       [281.80054932, 277.3069957 , 278.84242945, 276.02408075,\n",
-       "        274.49351381, 274.21627678, 274.1331996 , 272.61215281,\n",
-       "        271.49176346, 269.63533416, 273.44181469, 275.6214595 ,\n",
-       "        276.75602234, 275.00730308, 276.87611285, 273.58944106,\n",
-       "        271.92337207, 268.99705718, 266.13354113, 265.23450595,\n",
-       "        271.60276073, 273.63473648, 279.19937105, 281.8119052 ],\n",
-       "       [284.15830206, 283.85715793, 286.20103601, 283.92187788,\n",
-       "        283.76810397, 284.21051346, 282.39472624, 279.20961695,\n",
-       "...\n",
-       "        283.64451278, 283.35801176, 282.91684031, 282.9554759 ,\n",
-       "        282.14711695, 282.26140144, 281.0409011 , 280.42200595],\n",
-       "       [269.24513607, 270.23454864, 271.63341305, 271.82437105,\n",
-       "        271.60942057, 270.65992432, 270.95410978, 271.93656367,\n",
-       "        273.56741237, 274.12688129, 273.18558177, 275.06365554,\n",
-       "        275.63789564, 274.6907901 , 272.28731504, 273.95432323,\n",
-       "        275.29179762, 275.50107016, 275.76251141, 276.15411831,\n",
-       "        273.34336866, 269.42683006, 268.963758  , 268.6785804 ],\n",
-       "       [234.73941544, 228.72784611, 229.33594038, 225.86723307,\n",
-       "        238.04060226, 241.2718608 , 229.15774707, 226.06100868,\n",
-       "        224.74338573, 228.59588744, 232.7254554 , 258.2240039 ,\n",
-       "        257.83465964, 258.4833106 , 262.08999605, 259.26575595,\n",
-       "        256.58991066, 260.79205376, 251.32129728, 250.12530172,\n",
-       "        253.13652952, 256.13648682, 258.81438129, 254.06318594],\n",
-       "       [227.70048102, 227.70048102, 227.70048102, 227.70048102,\n",
-       "        227.70048102, 227.70048102, 227.70048102, 227.70048102,\n",
-       "        227.70048102, 227.70048102, 227.70048102, 227.70048102,\n",
-       "        227.70048102, 227.70048102, 227.70048102, 227.70048102,\n",
-       "        227.70048102, 227.70048102, 227.70048102, 227.70048102,\n",
-       "        227.70048102, 227.70048102, 227.70048102, 227.70048102]])\n",
-       "Coordinates:\n",
-       "  * latitude   (latitude) float64 104B 90.0 75.0 60.0 45.0 ... -60.0 -75.0 -90.0\n",
-       "  * longitude  (longitude) float64 192B 0.0 15.0 30.0 45.0 ... 315.0 330.0 345.0
" - ], - "text/plain": [ - " Size: 2kB\n", - "array([[259.17798273, 259.17798273, 259.17798273, 259.17798273,\n", - " 259.17798273, 259.17798273, 259.17798273, 259.17798273,\n", - " 259.17798273, 259.17798273, 259.17798273, 259.17798273,\n", - " 259.17798273, 259.17798273, 259.17798273, 259.17798273,\n", - " 259.17798273, 259.17798273, 259.17798273, 259.17798273,\n", - " 259.17798273, 259.17798273, 259.17798273, 259.17798273],\n", - " [273.2611026 , 275.61228088, 275.48984236, 274.29307835,\n", - " 268.16812105, 267.89195131, 264.09208792, 262.4144496 ,\n", - " 262.67648853, 261.67375629, 261.81749775, 261.75990725,\n", - " 261.65672248, 261.12205718, 260.31177713, 259.69160124,\n", - " 259.44480308, 258.91397999, 256.69544345, 261.1351634 ,\n", - " 263.80255581, 245.3709899 , 246.22366237, 263.91035124],\n", - " [281.80054932, 277.3069957 , 278.84242945, 276.02408075,\n", - " 274.49351381, 274.21627678, 274.1331996 , 272.61215281,\n", - " 271.49176346, 269.63533416, 273.44181469, 275.6214595 ,\n", - " 276.75602234, 275.00730308, 276.87611285, 273.58944106,\n", - " 271.92337207, 268.99705718, 266.13354113, 265.23450595,\n", - " 271.60276073, 273.63473648, 279.19937105, 281.8119052 ],\n", - " [284.15830206, 283.85715793, 286.20103601, 283.92187788,\n", - " 283.76810397, 284.21051346, 282.39472624, 279.20961695,\n", - "...\n", - " 283.64451278, 283.35801176, 282.91684031, 282.9554759 ,\n", - " 282.14711695, 282.26140144, 281.0409011 , 280.42200595],\n", - " [269.24513607, 270.23454864, 271.63341305, 271.82437105,\n", - " 271.60942057, 270.65992432, 270.95410978, 271.93656367,\n", - " 273.56741237, 274.12688129, 273.18558177, 275.06365554,\n", - " 275.63789564, 274.6907901 , 272.28731504, 273.95432323,\n", - " 275.29179762, 275.50107016, 275.76251141, 276.15411831,\n", - " 273.34336866, 269.42683006, 268.963758 , 268.6785804 ],\n", - " [234.73941544, 228.72784611, 229.33594038, 225.86723307,\n", - " 238.04060226, 241.2718608 , 229.15774707, 226.06100868,\n", - " 224.74338573, 228.59588744, 232.7254554 , 258.2240039 ,\n", - " 257.83465964, 258.4833106 , 262.08999605, 259.26575595,\n", - " 256.58991066, 260.79205376, 251.32129728, 250.12530172,\n", - " 253.13652952, 256.13648682, 258.81438129, 254.06318594],\n", - " [227.70048102, 227.70048102, 227.70048102, 227.70048102,\n", - " 227.70048102, 227.70048102, 227.70048102, 227.70048102,\n", - " 227.70048102, 227.70048102, 227.70048102, 227.70048102,\n", - " 227.70048102, 227.70048102, 227.70048102, 227.70048102,\n", - " 227.70048102, 227.70048102, 227.70048102, 227.70048102,\n", - " 227.70048102, 227.70048102, 227.70048102, 227.70048102]])\n", - "Coordinates:\n", - " * latitude (latitude) float64 104B 90.0 75.0 60.0 45.0 ... -60.0 -75.0 -90.0\n", - " * longitude (longitude) float64 192B 0.0 15.0 30.0 45.0 ... 315.0 330.0 345.0" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "m = ds[\"2t\"].mean(dim=\"valid_time\").load()\n", - "m" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "eb9d85ea-a52f-4dc6-b081-7688f9c90536", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "dev", - "language": "python", - "name": "dev" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.12" - } - }, - "nbformat": 4, - "nbformat_minor": 5 + "nbformat": 4, + "nbformat_minor": 5 } diff --git a/docs/guide/targets/index.rst b/docs/guide/targets/index.rst index 2611967ff..2efcb80f2 100644 --- a/docs/guide/targets/index.rst +++ b/docs/guide/targets/index.rst @@ -23,6 +23,7 @@ Examples - :ref:`/examples/grib_to_file_pattern_target.ipynb` - :ref:`/examples/grib_to_fdb_target.ipynb` - :ref:`/examples/grib_to_geotiff.ipynb` + - :ref:`/examples/grib_to_zarr_target.ipynb` Overview diff --git a/docs/guide/targets/to_target.rst b/docs/guide/targets/to_target.rst index f39d4d33d..8c943527f 100644 --- a/docs/guide/targets/to_target.rst +++ b/docs/guide/targets/to_target.rst @@ -39,6 +39,10 @@ Built in targets * - :ref:`targets-fdb` - add data to a `Fields DataBase `_ (FDB) - :py:class:`~data.targets.FDBTarget` + * - :ref:`targets-zarr` + - add data to a `zarr `_ store + - :py:class:`~data.targets.ZarrTarget` + .. _targets-file: @@ -170,6 +174,25 @@ fdb - :ref:`/examples/grib_to_fdb_target.ipynb` +.. _targets-zarr: + +zarr +---- + +.. py:function:: to_target("zarr", earthkit_to_xarray_kwargs=None, xarray_to_zarr_kwargs=None, data=None) + :noindex: + + The ``zarr`` target writes to a `zarr `_ store. + + :param dict earthkit_to_xarray_kwargs: the keyword arguments passed to the :func:`to_xarray` function. If not provided, the default values are used. + :param dict xarray_to_zarr_kwargs: the keyword arguments passed to the :py:func:`xarray.Dataset.to_zarr` function. As a bare minimum, the ``store`` keyword argument must be provided. + :param data: specify the data to write. Cannot be set when :func:`to_target` is called on a data object. + + This target converts the data to an :ref:`xarray.Dataset ` and then writes it to a zarr store using the :py:func:`xarray.Dataset.to_zarr` function. The conversion to an xarray dataset is done by the :func:`to_xarray` function. + + Notebook examples: + + - :ref:`/examples/grib_to_zarr_target.ipynb` .. .. _data-targets-multio: From c898778c6be8ce5a9d6952d35d1cab09ffb36dd3 Mon Sep 17 00:00:00 2001 From: Sandor Kertesz Date: Tue, 10 Jun 2025 15:55:21 +0100 Subject: [PATCH 5/6] Add tests --- docs/examples/grib_to_zarr_target.ipynb | 124 ++++++++++++------------ src/earthkit/data/testing.py | 7 ++ tests/sources/test_zarr.py | 17 ++-- tests/targets/test_target_zarr.py | 65 +++++++++++++ 4 files changed, 145 insertions(+), 68 deletions(-) create mode 100644 tests/targets/test_target_zarr.py diff --git a/docs/examples/grib_to_zarr_target.ipynb b/docs/examples/grib_to_zarr_target.ipynb index 91a8aa546..1df45a6fc 100644 --- a/docs/examples/grib_to_zarr_target.ipynb +++ b/docs/examples/grib_to_zarr_target.ipynb @@ -11,7 +11,7 @@ "tags": [] }, "source": [ - "## Writing GRIB data to zarr" + "## Writing GRIB data to Zarr" ] }, { @@ -29,7 +29,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "102f8028287b46f9be4682e5d04cbfd8", + "model_id": "8d75e667605a4e4eb22967f6a3d6e9c6", "version_major": 2, "version_minor": 0 }, @@ -78,27 +78,27 @@ "data": { "text/html": [ "\n", - "\n", + "
\n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -116,36 +116,36 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
  leveldatetimestepparamIdclassstreamtypeexperimentVersionNumberleveldatetimestepparamIdclassstreamtypeexperimentVersionNumber
shortName
risobaricInhPa700,50020240603,202406040,12000,6157odoperfc0001risobaricInhPa700,50020240603,202406040,12000,6157odoperfc0001
tisobaricInhPa700,50020240603,202406040,12000,6130odoperfc0001tisobaricInhPa700,50020240603,202406040,12000,6130odoperfc0001
\n" ], "text/plain": [ - "" + "" ] }, "execution_count": 2, @@ -212,7 +212,7 @@ "ds.to_target(\"zarr\", \n", " earthkit_to_xarray_kwargs={\"chunks\": {\"forecast_reference_time\": 1, \n", " \"step\": 1, \n", - " \"levelist\": 1}},\n", + " \"level\": 1}},\n", " xarray_to_zarr_kwargs={\"store\": \"_pl.zarr\", \"mode\": \"w\"})" ] }, @@ -234,7 +234,7 @@ "
/\n",
        "├── forecast_reference_time (4,) int64\n",
        "├── latitude (19,) float64\n",
-       "├── levelist (2,) int64\n",
+       "├── level (2,) int64\n",
        "├── longitude (36,) float64\n",
        "├── r (4, 2, 2, 19, 36) float64\n",
        "├── step (2,) int64\n",
@@ -245,7 +245,7 @@
        "\u001b[1m/\u001b[0m\n",
        "├── \u001b[1mforecast_reference_time\u001b[0m (4,) int64\n",
        "├── \u001b[1mlatitude\u001b[0m (19,) float64\n",
-       "├── \u001b[1mlevelist\u001b[0m (2,) int64\n",
+       "├── \u001b[1mlevel\u001b[0m (2,) int64\n",
        "├── \u001b[1mlongitude\u001b[0m (36,) float64\n",
        "├── \u001b[1mr\u001b[0m (4, 2, 2, 19, 36) float64\n",
        "├── \u001b[1mstep\u001b[0m (2,) int64\n",
@@ -331,7 +331,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/var/folders/93/w0p869rx17q98wxk83gn9ys40000gn/T/ipykernel_35742/754541422.py:2: FutureWarning: In a future version of xarray decode_timedelta will default to False rather than None. To silence this warning, set decode_timedelta to True, False, or a 'CFTimedeltaCoder' instance.\n",
+      "/var/folders/93/w0p869rx17q98wxk83gn9ys40000gn/T/ipykernel_45349/754541422.py:2: FutureWarning: In a future version of xarray decode_timedelta will default to False rather than None. To silence this warning, set decode_timedelta to True, False, or a 'CFTimedeltaCoder' instance.\n",
       "  xarray.open_dataset(\"_pl.zarr\")\n"
      ]
     },
@@ -709,17 +709,17 @@
        "  fill: currentColor;\n",
        "}\n",
        "
<xarray.Dataset> Size: 176kB\n",
-       "Dimensions:                  (levelist: 2, forecast_reference_time: 4, step: 2,\n",
-       "                              latitude: 19, longitude: 36)\n",
+       "Dimensions:                  (step: 2, longitude: 36,\n",
+       "                              forecast_reference_time: 4, latitude: 19, level: 2)\n",
        "Coordinates:\n",
-       "  * levelist                 (levelist) int64 16B 500 700\n",
-       "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 32B 202...\n",
        "  * step                     (step) timedelta64[ns] 16B 00:00:00 06:00:00\n",
        "  * longitude                (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n",
+       "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 32B 202...\n",
        "  * latitude                 (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n",
+       "  * level                    (level) int64 16B 500 700\n",
        "Data variables:\n",
-       "    r                        (forecast_reference_time, step, levelist, latitude, longitude) float64 88kB ...\n",
-       "    t                        (forecast_reference_time, step, levelist, latitude, longitude) float64 88kB ...\n",
+       "    r                        (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n",
+       "    t                        (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n",
        "Attributes:\n",
        "    class:        od\n",
        "    stream:       oper\n",
@@ -731,34 +731,34 @@
        "    domain:       g\n",
        "    number:       0\n",
        "    Conventions:  CF-1.8\n",
-       "    institution:  ECMWF
  • level
    PandasIndex
    PandasIndex(Index([500, 700], dtype='int64', name='level'))
  • class :
    od
    stream :
    oper
    levtype :
    pl
    type :
    fc
    expver :
    0001
    date :
    20240603
    time :
    0
    domain :
    g
    number :
    0
    Conventions :
    CF-1.8
    institution :
    ECMWF
  • " ], "text/plain": [ " Size: 176kB\n", - "Dimensions: (levelist: 2, forecast_reference_time: 4, step: 2,\n", - " latitude: 19, longitude: 36)\n", + "Dimensions: (step: 2, longitude: 36,\n", + " forecast_reference_time: 4, latitude: 19, level: 2)\n", "Coordinates:\n", - " * levelist (levelist) int64 16B 500 700\n", - " * forecast_reference_time (forecast_reference_time) datetime64[ns] 32B 202...\n", " * step (step) timedelta64[ns] 16B 00:00:00 06:00:00\n", " * longitude (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n", + " * forecast_reference_time (forecast_reference_time) datetime64[ns] 32B 202...\n", " * latitude (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n", + " * level (level) int64 16B 500 700\n", "Data variables:\n", - " r (forecast_reference_time, step, levelist, latitude, longitude) float64 88kB ...\n", - " t (forecast_reference_time, step, levelist, latitude, longitude) float64 88kB ...\n", + " r (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n", + " t (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n", "Attributes:\n", " class: od\n", " stream: oper\n", diff --git a/src/earthkit/data/testing.py b/src/earthkit/data/testing.py index e9b08d28b..5f463cb61 100644 --- a/src/earthkit/data/testing.py +++ b/src/earthkit/data/testing.py @@ -7,6 +7,7 @@ # nor does it submit to any jurisdiction. # +import importlib.util import logging import os import pathlib @@ -133,6 +134,12 @@ def modules_installed(*modules): NO_ECFS = True +if importlib.util.find_spec("zarr") is not None: + NO_ZARR = False +else: + NO_ZARR = True + + def MISSING(*modules): return not modules_installed(*modules) diff --git a/tests/sources/test_zarr.py b/tests/sources/test_zarr.py index f84a58693..109cf2d82 100644 --- a/tests/sources/test_zarr.py +++ b/tests/sources/test_zarr.py @@ -1,16 +1,21 @@ -import importlib.util +#!/usr/bin/env python3 + +# (C) Copyright 2020 ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. +# import pytest from earthkit.data import from_source from earthkit.data.readers.netcdf.field import XArrayField +from earthkit.data.testing import NO_ZARR from earthkit.data.testing import earthkit_test_data_file -if importlib.util.find_spec("zarr") is not None: - NO_ZARR = False -else: - NO_ZARR = True - @pytest.mark.skipif(NO_ZARR, reason="Zarr not installed") def test_zarr_source(): diff --git a/tests/targets/test_target_zarr.py b/tests/targets/test_target_zarr.py new file mode 100644 index 000000000..fed4cb6b2 --- /dev/null +++ b/tests/targets/test_target_zarr.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python3 + +# (C) Copyright 2020 ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. +# + + +import os + +import pytest + +from earthkit.data import from_source +from earthkit.data.core.temporary import temp_directory +from earthkit.data.targets import to_target +from earthkit.data.testing import NO_ZARR + + +@pytest.mark.skipif(NO_ZARR, reason="Zarr not installed") +@pytest.mark.cache +@pytest.mark.parametrize("direct_call", [True, False]) +def test_target_zarr_from_grib(direct_call): + ds = from_source("sample", "pl.grib") + + with temp_directory() as tmp: + path = os.path.join(tmp, "_res.zarr") + + if direct_call: + to_target( + "zarr", + earthkit_to_xarray_kwargs={"chunks": {"forecast_reference_time": 1, "step": 1, "level": 1}}, + xarray_to_zarr_kwargs={"store": path, "mode": "w"}, + data=ds, + ) + else: + ds.to_target( + "zarr", + earthkit_to_xarray_kwargs={"chunks": {"forecast_reference_time": 1, "step": 1, "level": 1}}, + xarray_to_zarr_kwargs={"store": path, "mode": "w"}, + ) + + import zarr + + root = zarr.group(path) + assert root + + shapes = { + "t": (4, 2, 2, 19, 36), + "r": (4, 2, 2, 19, 36), + "forecast_reference_time": (4,), + "step": (2,), + "level": (2,), + "latitude": (19,), + "longitude": (36,), + } + + for k in ["t", "r", "forecast_reference_time", "step", "level", "latitude", "longitude"]: + k in root, f"Key {k} not found in Zarr root" + assert ( + root[k].shape == shapes[k] + ), f"Shape mismatch for {k}: expected {shapes[k]}, got {root[k].shape}" From 9a1e9180a91f6d51eb3a6ddba719f543b4915e37 Mon Sep 17 00:00:00 2001 From: Sandor Kertesz Date: Tue, 10 Jun 2025 16:13:39 +0100 Subject: [PATCH 6/6] Add tests --- src/earthkit/data/testing.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/earthkit/data/testing.py b/src/earthkit/data/testing.py index 5f463cb61..115a87ced 100644 --- a/src/earthkit/data/testing.py +++ b/src/earthkit/data/testing.py @@ -7,7 +7,6 @@ # nor does it submit to any jurisdiction. # -import importlib.util import logging import os import pathlib @@ -134,10 +133,14 @@ def modules_installed(*modules): NO_ECFS = True -if importlib.util.find_spec("zarr") is not None: - NO_ZARR = False -else: - NO_ZARR = True +NO_ZARR = True +try: + import zarr # noqa + + if int(zarr.__version__.split(".")[0]) >= 3: + NO_ZARR = False +except Exception: + pass def MISSING(*modules):