Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
822 changes: 822 additions & 0 deletions docs/examples/grib_to_zarr_target.ipynb

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions docs/examples/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ Targets and encoders
grib_to_file_pattern_target.ipynb
grib_to_fdb_target.ipynb
grib_to_geotiff.ipynb
grib_to_zarr_target.ipynb
grib_encoder.ipynb

Miscellaneous
Expand Down
1 change: 1 addition & 0 deletions docs/guide/targets/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ Examples
- :ref:`/examples/grib_to_file_pattern_target.ipynb`
- :ref:`/examples/grib_to_fdb_target.ipynb`
- :ref:`/examples/grib_to_geotiff.ipynb`
- :ref:`/examples/grib_to_zarr_target.ipynb`


Overview
Expand Down
23 changes: 23 additions & 0 deletions docs/guide/targets/to_target.rst
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ Built in targets
* - :ref:`targets-fdb`
- add data to a `Fields DataBase <https://fields-database.readthedocs.io/en/latest/>`_ (FDB)
- :py:class:`~data.targets.FDBTarget`
* - :ref:`targets-zarr`
- add data to a `zarr <https://zarr.dev>`_ store
- :py:class:`~data.targets.ZarrTarget`



.. _targets-file:
Expand Down Expand Up @@ -170,6 +174,25 @@ fdb
- :ref:`/examples/grib_to_fdb_target.ipynb`


.. _targets-zarr:

zarr
----

.. py:function:: to_target("zarr", earthkit_to_xarray_kwargs=None, xarray_to_zarr_kwargs=None, data=None)
:noindex:

The ``zarr`` target writes to a `zarr <https://zarr.dev>`_ store.

:param dict earthkit_to_xarray_kwargs: the keyword arguments passed to the :func:`to_xarray` function. If not provided, the default values are used.
:param dict xarray_to_zarr_kwargs: the keyword arguments passed to the :py:func:`xarray.Dataset.to_zarr` function. As a bare minimum, the ``store`` keyword argument must be provided.
:param data: specify the data to write. Cannot be set when :func:`to_target` is called on a data object.

This target converts the data to an :ref:`xarray.Dataset <xarray-dataset>` and then writes it to a zarr store using the :py:func:`xarray.Dataset.to_zarr` function. The conversion to an xarray dataset is done by the :func:`to_xarray` function.

Notebook examples:

- :ref:`/examples/grib_to_zarr_target.ipynb`


.. .. _data-targets-multio:
Expand Down
80 changes: 80 additions & 0 deletions src/earthkit/data/encoders/zarr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# (C) Copyright 2023 ECMWF.
#
# This software is licensed under the terms of the Apache Licence Version 2.0
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
# In applying this licence, ECMWF does not waive the privileges and immunities
# granted to it by virtue of its status as an intergovernmental organisation
# nor does it submit to any jurisdiction.
#

import logging

from . import EncodedData
from . import Encoder

LOG = logging.getLogger(__name__)


class ZarrEncodedData(EncodedData):
def __init__(self, ds):
self.ds = ds

def to_bytes(self):
return None

def to_file(self, f):
return None

def to_xarray(self):
return self.ds

def metadata(self, key):
raise NotImplementedError


class ZarrEncoder(Encoder):
def __init__(self, **kwargs):
super().__init__(**kwargs)

def encode(
self,
data=None,
**kwargs,
):
if data is not None:
from earthkit.data.wrappers import get_wrapper

data = get_wrapper(data)
return data._encode(self, **kwargs)
else:
raise ValueError("No data to encode")

def _encode(
self,
data=None,
values=None,
min=None,
max=None,
check_nans=False,
metadata={},
template=None,
# return_bytes=False,
missing_value=9999,
**kwargs,
):
return ZarrEncodedData(data.to_xarray(add_earthkit_attrs=False))

def _encode_field(self, field, **kwargs):
raise NotImplementedError("ZarrEncoder does not support encoding individual fields.")

def _encode_fieldlist(self, data, **kwargs):
earthkit_to_xarray_kwargs = kwargs.pop("earthkit_to_xarray_kwargs", {})
# earthkit_to_xarray_kwargs.update(kwargs)
earthkit_to_xarray_kwargs["add_earthkit_attrs"] = False
kwargs = earthkit_to_xarray_kwargs

ds = data.to_xarray(**kwargs)
return ZarrEncodedData(ds)


encoder = ZarrEncoder
51 changes: 51 additions & 0 deletions src/earthkit/data/targets/zarr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# (C) Copyright 2020 ECMWF.
#
# This software is licensed under the terms of the Apache Licence Version 2.0
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
# In applying this licence, ECMWF does not waive the privileges and immunities
# granted to it by virtue of its status as an intergovernmental organisation
# nor does it submit to any jurisdiction.
#

import logging

from . import SimpleTarget

LOG = logging.getLogger(__name__)


class ZarrTarget(SimpleTarget):
def __init__(self, **kwargs):
super().__init__(**kwargs)
self._zarr_kwargs = kwargs
self._ekd_kwargs = kwargs.pop("earthkit_to_xarray_kwargs", {})
self._xr_kwargs = kwargs.pop("xarray_to_zarr_kwargs", {})
self._encoder = "zarr"

def close(self):
"""Close the target and flush the fdb.

The target will not be able to write anymore.

Raises:
-------
ValueError: If the target is already closed.
"""
pass

def flush(self):
"""Flush the fdb.

Raises:
-------
ValueError: If the target is already closed.
"""
pass

def _write(self, data, **kwargs):
r = self._encode(data, earthkit_to_xarray_kwargs=self._ekd_kwargs)
ds = r.to_xarray()
ds.to_zarr(**self._xr_kwargs)


target = ZarrTarget
10 changes: 10 additions & 0 deletions src/earthkit/data/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,16 @@ def modules_installed(*modules):
NO_ECFS = True


NO_ZARR = True
try:
import zarr # noqa

if int(zarr.__version__.split(".")[0]) >= 3:
NO_ZARR = False
except Exception:
pass


def MISSING(*modules):
return not modules_installed(*modules)

Expand Down
17 changes: 11 additions & 6 deletions tests/sources/test_zarr.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,21 @@
import importlib.util
#!/usr/bin/env python3

# (C) Copyright 2020 ECMWF.
#
# This software is licensed under the terms of the Apache Licence Version 2.0
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
# In applying this licence, ECMWF does not waive the privileges and immunities
# granted to it by virtue of its status as an intergovernmental organisation
# nor does it submit to any jurisdiction.
#

import pytest

from earthkit.data import from_source
from earthkit.data.readers.netcdf.field import XArrayField
from earthkit.data.testing import NO_ZARR
from earthkit.data.testing import earthkit_test_data_file

if importlib.util.find_spec("zarr") is not None:
NO_ZARR = False
else:
NO_ZARR = True


@pytest.mark.skipif(NO_ZARR, reason="Zarr not installed")
def test_zarr_source():
Expand Down
65 changes: 65 additions & 0 deletions tests/targets/test_target_zarr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#!/usr/bin/env python3

# (C) Copyright 2020 ECMWF.
#
# This software is licensed under the terms of the Apache Licence Version 2.0
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
# In applying this licence, ECMWF does not waive the privileges and immunities
# granted to it by virtue of its status as an intergovernmental organisation
# nor does it submit to any jurisdiction.
#


import os

import pytest

from earthkit.data import from_source
from earthkit.data.core.temporary import temp_directory
from earthkit.data.targets import to_target
from earthkit.data.testing import NO_ZARR


@pytest.mark.skipif(NO_ZARR, reason="Zarr not installed")
@pytest.mark.cache
@pytest.mark.parametrize("direct_call", [True, False])
def test_target_zarr_from_grib(direct_call):
ds = from_source("sample", "pl.grib")

with temp_directory() as tmp:
path = os.path.join(tmp, "_res.zarr")

if direct_call:
to_target(
"zarr",
earthkit_to_xarray_kwargs={"chunks": {"forecast_reference_time": 1, "step": 1, "level": 1}},
xarray_to_zarr_kwargs={"store": path, "mode": "w"},
data=ds,
)
else:
ds.to_target(
"zarr",
earthkit_to_xarray_kwargs={"chunks": {"forecast_reference_time": 1, "step": 1, "level": 1}},
xarray_to_zarr_kwargs={"store": path, "mode": "w"},
)

import zarr

root = zarr.group(path)
assert root

shapes = {
"t": (4, 2, 2, 19, 36),
"r": (4, 2, 2, 19, 36),
"forecast_reference_time": (4,),
"step": (2,),
"level": (2,),
"latitude": (19,),
"longitude": (36,),
}

for k in ["t", "r", "forecast_reference_time", "step", "level", "latitude", "longitude"]:
k in root, f"Key {k} not found in Zarr root"
assert (
root[k].shape == shapes[k]
), f"Shape mismatch for {k}: expected {shapes[k]}, got {root[k].shape}"
Loading