Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ optional-dependencies.test = [
"pytest-timeout",
]
optional-dependencies.wekeo = [ "hda>=2.22" ]
optional-dependencies.zarr = [ "zarr>=3" ]
urls.Documentation = "https://earthkit-data.readthedocs.io/"
urls.Homepage = "https://github.com/ecmwf/earthkit-data/"
urls.Issues = "https://github.com/ecmwf/earthkit-data.issues"
Expand Down
27 changes: 13 additions & 14 deletions src/earthkit/data/readers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,27 +204,26 @@ def reader(source, path, **kwargs):

raise TypeError("Provided reader must be a callable or a string, not %s" % type(reader))

if os.path.isdir(path):
from .directory import DirectoryReader

return DirectoryReader(source, path).mutate()
LOG.debug("Reader for %s", path)

if not os.path.exists(path):
r = _non_existing(source, path, **kwargs)
if r is not None:
return r
raise FileNotFoundError(f"No such file exists: '{path}'")

if os.path.getsize(path) == 0:
r = _empty(source, path, **kwargs)
if r is not None:
return r
raise Exception(f"File is empty: '{path}'")
LOG.debug("Reader for %s", path)

n_bytes = CONFIG.get("reader-type-check-bytes")
with open(path, "rb") as f:
magic = f.read(n_bytes)
if os.path.isdir(path):
magic = None
else:
if os.path.getsize(path) == 0:
r = _empty(source, path, **kwargs)
if r is not None:
return r
raise Exception(f"File is empty: '{path}'")

n_bytes = CONFIG.get("reader-type-check-bytes")
with open(path, "rb") as f:
magic = f.read(n_bytes)

LOG.debug("Looking for a reader for %s (%s)", path, magic)

Expand Down
20 changes: 17 additions & 3 deletions src/earthkit/data/readers/directory.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,15 @@ def mutate(self):
return self

def mutate_source(self):
if os.path.exists(os.path.join(self.path, ".zattrs")):
if (
os.path.exists(os.path.join(self.path, ".zarray"))
or os.path.exists(os.path.join(self.path, ".zgroup"))
or os.path.exists(os.path.join(self.path, ".zmetadata"))
or os.path.exists(os.path.join(self.path, ".zattrs"))
):
if self.stream:
raise ValueError("Cannot stream zarr directories")
return from_source("zarr", self.path)
return from_source("xarray-zarr", self.path)

if len(self._content) == 1:
return from_source(
Expand Down Expand Up @@ -103,5 +108,14 @@ def write(self, f, **kwargs):


def reader(source, path, *, magic=None, deeper_check=False, **kwargs):
if magic is None and os.path.isdir(path):
if (
magic is None
and os.path.isdir(path)
and not (
os.path.exists(os.path.join(path, ".zarray"))
or os.path.exists(os.path.join(path, ".zgroup"))
or os.path.exists(os.path.join(path, ".zmetadata"))
or os.path.exists(os.path.join(path, ".zattrs"))
)
):
return DirectoryReader(source, path)
14 changes: 6 additions & 8 deletions src/earthkit/data/readers/numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,10 @@ def to_numpy(self, numpy_load_kwargs={}):


def reader(source, path, *, magic=None, deeper_check=False, **kwargs):
if magic is None: # Bypass check and force
return NumpyReader(source, path)
if magic is not None:
if magic[:6] == b"\x93NUMPY":
return NumpyReader(source, path)

if magic[:6] == b"\x93NUMPY":
return NumpyReader(source, path)

_, extension = os.path.splitext(path)
if magic[:4] == b"PK\x03\x04" and extension == ".npz":
return NumpyZipReader(source, path)
_, extension = os.path.splitext(path)
if magic[:4] == b"PK\x03\x04" and extension == ".npz":
return NumpyZipReader(source, path)
2 changes: 1 addition & 1 deletion src/earthkit/data/readers/tar.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,5 +34,5 @@ def reader(source, path, *, magic=None, deeper_check=False, **kwargs):

kind, compression = mimetypes.guess_type(path)

if magic is None or kind == "application/x-tar":
if kind == "application/x-tar":
return TarReader(source, path, compression)
3 changes: 0 additions & 3 deletions src/earthkit/data/readers/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,6 @@ def mutate(self):


def reader(source, path, *, magic=None, deeper_check=False, **kwargs):
if magic is None: # Bypass check and force
return TextReader(source, path)

if deeper_check:
if is_text(path):
return TextReader(source, path)
45 changes: 45 additions & 0 deletions src/earthkit/data/readers/zarr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# (C) Copyright 2020 ECMWF.
#
# This software is licensed under the terms of the Apache Licence Version 2.0
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
# In applying this licence, ECMWF does not waive the privileges and immunities
# granted to it by virtue of its status as an intergovernmental organisation
# nor does it submit to any jurisdiction.
#

import os

from . import Reader
from .netcdf.fieldlist import XArrayFieldList


class ZarrReader(XArrayFieldList, Reader):

def __init__(self, source, path, **kwargs):
Reader.__init__(self, source, path, **kwargs)
XArrayFieldList.__init__(self, self._open_zarr(**kwargs))

def mutate_source(self):
return self

def to_xarray(self, **kwargs):
return self._open_zarr(**kwargs)

def _open_zarr(self, **kwargs):
import xarray as xr

options = kwargs.get("xarray_open_zarr_kwargs", kwargs)
return xr.open_zarr(self.path, **options)

def __repr__(self):
return f"ZarrReader({self.path})"


def reader(source, path, *, magic=None, deeper_check=False, **kwargs):
if (
os.path.exists(os.path.join(path, ".zarray"))
or os.path.exists(os.path.join(path, ".zgroup"))
or os.path.exists(os.path.join(path, ".zmetadata"))
or os.path.exists(os.path.join(path, ".zattrs"))
):
return ZarrReader(source, path)
29 changes: 29 additions & 0 deletions src/earthkit/data/sources/xarray_zarr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# (C) Copyright 2020 ECMWF.
#
# This software is licensed under the terms of the Apache Licence Version 2.0
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
# In applying this licence, ECMWF does not waive the privileges and immunities
# granted to it by virtue of its status as an intergovernmental organisation
# nor does it submit to any jurisdiction.
#

from ..readers.zarr import ZarrReader
from . import Source


class ZarrSource(Source):

def __init__(self, path, **kwargs):
super().__init__(**kwargs)
self._reader = ZarrReader(self, path, **kwargs)

def mutate(self):
source = self._reader.mutate_source()
if source not in (None, self):
source._parent = self
return source

return self


source = ZarrSource
4 changes: 4 additions & 0 deletions tests/data/test_zarr/.zattrs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"Conventions": "CF-1.6",
"history": "2024-04-24 15:40:12 GMT by grib_to_netcdf-2.36.0: /Users/cgr/install/eccodes/release/bin/grib_to_netcdf -o test4.nc test4.grib"
}
3 changes: 3 additions & 0 deletions tests/data/test_zarr/.zgroup
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"zarr_format": 2
}
Loading