From bd1bff763909ca756d0709b5255fe8f5cb8cc92c Mon Sep 17 00:00:00 2001 From: Sandor Kertesz Date: Fri, 6 Jun 2025 13:47:18 +0100 Subject: [PATCH 1/9] Xarray step range --- src/earthkit/data/core/select.py | 2 +- src/earthkit/data/indexing/tensor.py | 77 +++- src/earthkit/data/readers/grib/metadata.py | 5 +- src/earthkit/data/utils/xarray/builder.py | 13 +- src/earthkit/data/utils/xarray/coord.py | 5 +- src/earthkit/data/utils/xarray/defaults.yaml | 4 +- src/earthkit/data/utils/xarray/diff.py | 3 + src/earthkit/data/utils/xarray/dim.py | 213 +++++++--- src/earthkit/data/utils/xarray/engine.py | 2 + src/earthkit/data/utils/xarray/grib.yaml | 4 +- src/earthkit/data/utils/xarray/mars.yaml | 4 +- src/earthkit/data/utils/xarray/profile.py | 11 +- tests/xr_engine/test_xr_attrs.py | 26 +- tests/xr_engine/test_xr_dims.py | 219 +++++++--- tests/xr_engine/test_xr_engine.py | 411 +++++++++++++++++-- tests/xr_engine/test_xr_level.py | 127 +++++- tests/xr_engine/test_xr_time.py | 205 ++++++++- tests/xr_engine/xr_engine_fixtures.py | 4 +- 18 files changed, 1140 insertions(+), 195 deletions(-) diff --git a/src/earthkit/data/core/select.py b/src/earthkit/data/core/select.py index 2e6a1fdf2..104cc9c31 100644 --- a/src/earthkit/data/core/select.py +++ b/src/earthkit/data/core/select.py @@ -33,7 +33,7 @@ def normalize_selection(*args, **kwargs): or v is ALL or callable(v) or isinstance(v, (list, tuple, set, slice)) - or isinstance(v, (str, int, float, datetime.datetime)) + or isinstance(v, (str, int, float, datetime.datetime, datetime.timedelta)) ), f"Unsupported type: {type(v)} for key {k}" return _kwargs diff --git a/src/earthkit/data/indexing/tensor.py b/src/earthkit/data/indexing/tensor.py index 533f9d4f6..568c7ddf1 100644 --- a/src/earthkit/data/indexing/tensor.py +++ b/src/earthkit/data/indexing/tensor.py @@ -443,7 +443,8 @@ def _subset(self, indexes): ds = self.source[tuple(dataset_indexes)] return self.from_tensor(self, ds, coords) - def make_valid_datetime(self, dtype="datetime64[ns]"): + def make_valid_datetime(self, dims, dtype="datetime64[ns]"): + # TODO: make it more general dims_opt = [ ["base_datetime", "step"], @@ -457,12 +458,86 @@ def make_valid_datetime(self, dtype="datetime64[ns]"): ["step"], ] + # in the tensor the dims.coords are GRIB keys for k in ["valid_datetime", "valid_time"]: if k in self.user_coords: import datetime return (k,), [datetime.datetime.fromisoformat(x) for x in self.user_coords[k]] + DIM_ROLES = { + "forecast_reference_time": ("forecast_reference_time", "base_datetime"), + "step": ("step_timedelta", "step", "ensStep", "stepRange"), + "date": ("date", "dataDate"), + "time": ("time", "dataTime"), + } + + keys = {} + for k in DIM_ROLES: + for d in dims: + if d.name == k: + keys[k] = d.key + break + if k not in keys: + for d in self.user_dims: + if d in DIM_ROLES[k]: + keys[k] = d + break + + dims_opt = [ + ["forecast_reference_time", "step"], + ["forecast_reference_time"], + ["date", "time", "step"], + ["date", "time"], + ["date", "step"], + ["time", "step"], + ["step"], + ] + + print(f"{keys=}") + for dims in dims_opt: + if all(d in keys for d in dims): + # use same dim order as in user_dims + dims = [d for d in dims if d in self.user_dims] + other_dims = [d for d in self.user_dims if d not in dims] + # print(f"{dims=} {other_dims=}") + if other_dims: + import datetime + + import numpy as np + + other_coords = { + k: next(iter(self.user_coords[k])) for k in other_dims if k in self.user_coords + } + + vals = np.array( + [ + datetime.datetime.fromisoformat(x) + for x in self.source.sel(**other_coords).metadata("valid_datetime") + ], + dtype=dtype, + ) + + shape = tuple([self.user_dims[d] for d in dims]) + return tuple(dims), vals.reshape(shape) + else: + import datetime + + import numpy as np + + vals = np.array( + [datetime.datetime.fromisoformat(x) for x in self.source.metadata("valid_datetime")], + dtype=dtype, + ) + + shape = tuple([self.user_dims[d] for d in dims]) + return tuple(dims), vals.reshape(shape) + return None, None + + # print(f"{keys=}") + + # print(f"{keys=}") + # print(f"{self.user_dims=}") for dims in dims_opt: if all(d in self.user_dims for d in dims): diff --git a/src/earthkit/data/readers/grib/metadata.py b/src/earthkit/data/readers/grib/metadata.py index 9de63e1cf..5f3bc2e51 100644 --- a/src/earthkit/data/readers/grib/metadata.py +++ b/src/earthkit/data/readers/grib/metadata.py @@ -565,7 +565,10 @@ def indexing_datetime(self): return self._datetime("indexingDate", "indexingTime") def step_timedelta(self): - return to_timedelta(self.get("step", None)) + v = self.get("endStep", None) + if v is None: + v = self.get("step", None) + return to_timedelta(v) def _datetime(self, date_key, time_key): date = self.get(date_key, None) diff --git a/src/earthkit/data/utils/xarray/builder.py b/src/earthkit/data/utils/xarray/builder.py index f752ab3e3..bd4def9b9 100644 --- a/src/earthkit/data/utils/xarray/builder.py +++ b/src/earthkit/data/utils/xarray/builder.py @@ -297,7 +297,7 @@ def collect_date_coords(self, tensor): ): from .coord import Coord - _dims, _vals = tensor.make_valid_datetime() + _dims, _vals = tensor.make_valid_datetime(self.dims) if _dims is not None and _vals is not None: self.tensor_coords["valid_time"] = Coord.make("valid_time", _vals, dims=_dims) @@ -318,8 +318,13 @@ def build(self): # build dataset dataset = xarray.Dataset(xr_vars, coords=xr_coords, attrs=xr_attrs) - if self.profile.rename_dims_map(): - dataset = dataset.rename(self.profile.rename_dims_map()) + dataset = self.profile.rename_dataset_dims(dataset) + + # dim_map = self.profile.rename_dims_map() + # if dim_map: + # d = {k: v for k, v in dim_map.items() if k in dataset.dims} + # if d: + # dataset = dataset.rename(d) if "source" not in dataset.encoding: dataset.encoding["source"] = None @@ -544,7 +549,7 @@ def parse(self, ds, profile=None, full=False): # LOG.debug(f"{remapping=}") # LOG.debug(f"{profile.remapping=}") - # LOG.debug(f"{profile.index_keys=}") + LOG.debug(f"{profile.index_keys=}") # create a new fieldlist for optimised access to unique values ds_xr = XArrayInputFieldList( diff --git a/src/earthkit/data/utils/xarray/coord.py b/src/earthkit/data/utils/xarray/coord.py index 1268e0118..32950fdcd 100644 --- a/src/earthkit/data/utils/xarray/coord.py +++ b/src/earthkit/data/utils/xarray/coord.py @@ -168,7 +168,10 @@ def attrs(self, name, profile): class MonthCoord(Coord): - pass + def attrs(self, name, profile): + attrs = super().attrs(name, profile) + attrs["units"] = "months" + return attrs class LevelCoord(Coord): diff --git a/src/earthkit/data/utils/xarray/defaults.yaml b/src/earthkit/data/utils/xarray/defaults.yaml index c4c4d1ccb..d2316340c 100644 --- a/src/earthkit/data/utils/xarray/defaults.yaml +++ b/src/earthkit/data/utils/xarray/defaults.yaml @@ -43,7 +43,7 @@ strict: false errors: raise dim_roles: - ens: number + number: number date: date time: time step: step @@ -52,6 +52,8 @@ dim_roles: level: level level_type: typeOfLevel +keep_dim_role_names: true + coord_attrs: latitude: units: degrees_north diff --git a/src/earthkit/data/utils/xarray/diff.py b/src/earthkit/data/utils/xarray/diff.py index 1493c7689..8d4bb2a4f 100644 --- a/src/earthkit/data/utils/xarray/diff.py +++ b/src/earthkit/data/utils/xarray/diff.py @@ -7,6 +7,7 @@ # nor does it submit to any jurisdiction. # +import datetime import logging import math @@ -69,6 +70,8 @@ def _compare(v1, v2): return math.isclose(v1, v2, rel_tol=1e-9), ListDiff.VALUE_DIFF elif isinstance(v1, str) and isinstance(v2, str): return v1 == v2, ListDiff.VALUE_DIFF + elif isinstance(v1, datetime.timedelta) and isinstance(v2, datetime.timedelta): + return v1 == v2, ListDiff.VALUE_DIFF elif type(v1) is not type(v2): return False, ListDiff.TYPE_DIFF else: diff --git a/src/earthkit/data/utils/xarray/dim.py b/src/earthkit/data/utils/xarray/dim.py index b79aab9d3..1ff48a778 100644 --- a/src/earthkit/data/utils/xarray/dim.py +++ b/src/earthkit/data/utils/xarray/dim.py @@ -42,7 +42,7 @@ class ParamLevelKey(CompoundKey): LEVEL_TYPE_KEYS = ["typeOfLevel", "levtype"] DATE_KEYS = ["date", "andate", "validityDate", "dataDate", "hdate", "referenceDate", "indexingDate"] TIME_KEYS = ["time", "antime", "validityTime", "dataTime", "referenceTime", "indexingTime"] -STEP_KEYS = ["step", "endStep", "stepRange", "forecastMonth", "fcmonth"] +STEP_KEYS = ["step_timedelta", "step", "endStep", "stepRange", "forecastMonth", "fcmonth"] MONTH_KEYS = ["forecastMonth", "fcmonth"] VALID_DATETIME_KEYS = ["valid_time", "valid_datetime"] BASE_DATETIME_KEYS = [ @@ -89,15 +89,18 @@ def find_alias(key, drop=None): return r -def make_dim(owner, name, *args, **kwargs): - if name in PREDEFINED_DIMS: - return PREDEFINED_DIMS[name](owner, *args, key=name, **kwargs) +def make_dim(owner, *args, name=None, key=None, **kwargs): + predef_key = key or name + + if predef_key in PREDEFINED_DIMS: + return PREDEFINED_DIMS[predef_key](owner, *args, name=name, key=key, **kwargs) ck = CompoundKey.make(name) if ck is not None: d = CompoundKeyDim(owner, ck) else: - d = OtherDim(owner, name, *args, **kwargs) + print("args", args, "kwargs", kwargs, "name", name, "key", key) + d = OtherDim(owner, *args, name=name, key=key, **kwargs) return d @@ -123,6 +126,7 @@ class Dim: name = None key = None + label = None alias = None drop = None enforce_unique = False @@ -233,38 +237,43 @@ class NumberDim(Dim): class DateDim(Dim): name = "date" - drop = get_keys(DATE_KEYS + DATETIME_KEYS, drop=name) + drop = get_keys(DATE_KEYS + DATETIME_KEYS, drop="date") class TimeDim(Dim): name = "time" - drop = get_keys(TIME_KEYS + DATETIME_KEYS, drop=name) + drop = get_keys(TIME_KEYS + DATETIME_KEYS, drop="time") + + +# class StepDim(Dim): +# name = "step" +# drop = get_keys(STEP_KEYS + VALID_DATETIME_KEYS, drop="step") class StepDim(Dim): name = "step" - drop = get_keys(STEP_KEYS + VALID_DATETIME_KEYS, drop=name) + drop = get_keys(STEP_KEYS + VALID_DATETIME_KEYS, drop=["step_timedelta"]) class ValidTimeDim(Dim): name = "valid_time" - drop = get_keys(DATE_KEYS + TIME_KEYS + DATETIME_KEYS, drop=name) + drop = get_keys(DATE_KEYS + TIME_KEYS + DATETIME_KEYS, drop="valid_time") class ForecastRefTimeDim(Dim): name = "forecast_reference_time" - drop = get_keys(DATE_KEYS + TIME_KEYS + DATETIME_KEYS, drop=name) + drop = get_keys(DATE_KEYS + TIME_KEYS + DATETIME_KEYS, drop="forecast_reference_time") alias = ["base_datetime"] class IndexingTimeDim(Dim): name = "indexing_time" - drop = get_keys(DATE_KEYS + TIME_KEYS + DATETIME_KEYS, drop=name) + drop = get_keys(DATE_KEYS + TIME_KEYS + DATETIME_KEYS, drop="indexing_time") class ReferenceTimeDim(Dim): name = "reference_time" - drop = get_keys(DATE_KEYS + TIME_KEYS + DATETIME_KEYS, drop=name) + drop = get_keys(DATE_KEYS + TIME_KEYS + DATETIME_KEYS, drop="reference_time") class CustomForecastRefDim(Dim): @@ -378,13 +387,41 @@ class OtherDim(Dim): pass +class DimRole: + NAMES = ("number", "date", "time", "step", "level", "level_type", "forecast_reference_time", "valid_time") + + def __init__(self, d, name_as_key=True): + self.d = d + self.name_as_key = name_as_key + + if "ens" in d: + import warnings + + warnings.warn("'ens' key in dim_roles is deprecated. Use 'number' instead", DeprecationWarning) + self.d["number"] = self.d.pop("ens") + + for k in d: + if k not in self.NAMES: + raise ValueError(f"Invalid dim role name={k}. Must be one of {self.NAMES}") + + def role(self, name, default=None, raise_error=True): + if name in self.d: + return self.d[name], name if self.name_as_key else self.d[name] + if default is not None: + return default + if raise_error: + raise ValueError(f"Dim role {name} not found in {self.d}") + else: + return default, default + + class DimMode: - default = [] + default = {} # maps key to name def build(self, profile, owner, active=True, dims=None): if not dims: dims = self.default - return {name: make_dim(owner, name, active=active) for name in dims} + return {name: make_dim(owner, name=name, key=key, active=active) for name, key in dims.items()} class ForecastTimeDimMode(DimMode): @@ -393,25 +430,26 @@ class ForecastTimeDimMode(DimMode): TIMES = ["time", "dataTime"] def build(self, profile, owner, active=True): - ref_time = owner.dim_roles.get("forecast_reference_time", None) - if ref_time == "forecast_reference_time": - ref_time_dim = ForecastRefTimeDim(owner, active=active) - elif ref_time: - ref_time_dim = make_dim(owner, ref_time, active=active) + ref_time_key, ref_time_name = owner.dim_roles.role("forecast_reference_time", raise_error=False) + + if ref_time_key == "forecast_reference_time": + ref_time_dim = ForecastRefTimeDim(owner, name=ref_time_name, active=active) + elif ref_time_key: + ref_time_dim = make_dim(owner, name=ref_time_name, key=ref_time_key, active=active) else: - date = owner.dim_roles["date"] - time = owner.dim_roles["time"] + date, _ = owner.dim_roles.role("date") + time, _ = owner.dim_roles.role("time") built_in = date in self.DATES and time in self.TIMES if built_in: - ref_time_dim = ForecastRefTimeDim(owner, active=active) + ref_time_dim = ForecastRefTimeDim(owner, name=ref_time_name, active=active) else: - ref_time_dim = CustomForecastRefDim(owner, [date, time], active=active) + ref_time_dim = CustomForecastRefDim(owner, [date, time], name=ref_time_name, active=active) - step = owner.dim_roles["step"] - step_dim = make_dim(owner, step, active=active) + step_key, step_name = owner.dim_roles.role("step") + step_dim = make_dim(owner, name=step_name, key=step_key, active=active) - self.register_ref_time_key(ref_time_dim.name) - self.register_step_key(step_dim.name) + self.register_ref_time_key(ref_time_dim.key) + self.register_step_key(step_dim.key) return {d.name: d for d in [ref_time_dim, step_dim]} @@ -429,30 +467,33 @@ def register_step_key(self, name): class ValidTimeDimMode(DimMode): name = "valid_time" - default = ["valid_time"] + default = {"valid_time": "valid_time"} class RawTimeDimMode(DimMode): name = "raw" - default = ["date", "time", "step"] def build(self, profile, owner, active=True): - date = owner.dim_roles["date"] - time = owner.dim_roles["time"] - step = owner.dim_roles["step"] - return super().build(profile, owner, active=active, dims=[date, time, step]) + dims = {} + for k in ["date", "time", "step"]: + key, name = owner.dim_roles.role(k) + dims[name] = key + return super().build(profile, owner, active=active, dims=dims) class LevelDimMode(DimMode): name = "level" def build(self, profile, owner, **kwargs): - level_key = owner.dim_roles["level"] - level_type_key = owner.dim_roles["level_type"] - return { - level_key: LevelDim(owner, key=level_key, **kwargs), - level_type_key: LevelTypeDim(owner, key=level_type_key, **kwargs), - } + # level + key, name = owner.dim_roles.role("level") + level_dim = LevelDim(owner, name=name, key=key, **kwargs) + + # level_type + key, name = owner.dim_roles.role("level_type") + level_type_dim = LevelTypeDim(owner, name=name, key=key, **kwargs) + + return {level_dim.key: level_dim, level_type_dim.key: level_type_dim} class LevelAndTypeDimMode(DimMode): @@ -460,8 +501,9 @@ class LevelAndTypeDimMode(DimMode): dim = LevelAndTypeDim def build(self, profile, owner, **kwargs): - level_key = owner.dim_roles["level"] - level_type_key = owner.dim_roles["level_type"] + + level_key, _ = owner.dim_roles.role("level") + level_type_key, _ = owner.dim_roles.role("level_type") return {self.name: self.dim(owner, level_key, level_type_key, **kwargs)} @@ -486,8 +528,8 @@ class NumberDimBuilder(DimBuilder): name = "number" def __init__(self, profile, owner): - ens_key = owner.dim_roles["ens"] - self.used = {self.name: NumberDim(owner, key=ens_key)} + key, name = owner.dim_roles.role("number") + self.used = {self.name: NumberDim(owner, name=name, key=key)} class TimeDimBuilder(DimBuilder): @@ -523,7 +565,23 @@ def __init__(self, profile, owner): DIM_BUILDERS = {v.name: v for v in [NumberDimBuilder, TimeDimBuilder, LevelDimBuilder]} -class Dims: +def ensure_dim_map(d): + if isinstance(d, dict): + return d + d = ensure_iterable(d) + r = {} + for k in d: + if isinstance(k, str): + r[k] = k + elif isinstance(k, tuple) and len(k) == 2: + r[k[0]] = k[1] + elif isinstance(k, dict): + for kk, vv in k.items(): + r[kk] = vv + return r + + +class DimHandler: def __init__( self, profile, @@ -534,6 +592,7 @@ def __init__( split_dims, rename_dims, dim_roles, + keep_dim_role_names, dims_as_attrs, time_dim_mode, level_dim_mode, @@ -542,11 +601,12 @@ def __init__( self.profile = profile - self.dim_roles = dim_roles - self.extra_dims = ensure_iterable(extra_dims) + self.dim_roles = DimRole(dim_roles, name_as_key=keep_dim_role_names) + # self.keep_dim_role_names = keep_dim_role_names + self.extra_dims = ensure_dim_map(extra_dims) self.drop_dims = ensure_iterable(drop_dims) self.ensure_dims = ensure_iterable(ensure_dims) - self.fixed_dims = ensure_iterable(fixed_dims) + self.fixed_dims = ensure_dim_map(fixed_dims) self.split_dims = ensure_iterable(split_dims) self.rename_dims_map = ensure_dict(rename_dims) self.dims_as_attrs = list(ensure_iterable(dims_as_attrs)) @@ -554,6 +614,19 @@ def __init__( self.level_dim_mode = level_dim_mode self.squeeze = squeeze + # if "ens" in self.dim_roles: + # Warning.deprecated("'ens' key in dim_roles is deprecated. Use 'number' instead") + # self.dim_roles["number"] = self.dim_roles.pop("ens") + + # if self.keep_dim_role_names: + # d = {v: k for k, v in self.dim_roles.items()} + # for k in list(self.rename_dims_map.keys()): + # if k in self.dim_roles: + # d[self.dim_roles[k]] = self.rename_dims_map.pop(k) + + # d.update(self.rename_dims_map) + # self.rename_dims_map = d + self.var_key_dim = None if self.fixed_dims: @@ -594,6 +667,28 @@ def __init__( self.dims = dims + print(f"self.dims={self.dims}") + + # for d in self.dims.values(): + # if d.name != d.key: + # if d.name in self.rename_dims_map: + # self.rename_dims_map[d.key] = d.name + # else d + # if d.key not in self.rename_dims_map: + # self.rename_dims_map[d.key] = d.name + # else d + + # if self.keep_dim_role_names: + # d = {v: k for k, v in self.dim_roles.items()} + # for k in list(self.rename_dims_map.keys()): + # if k in self.dim_roles: + # d[self.dim_roles[k]] = self.rename_dims_map.pop(k) + + # d.update(self.rename_dims_map) + # self.rename_dims_map = d + + self.var_key_dim = None + # ensure all the required keys are in the profile keys = [] for d in self.dims.values(): @@ -640,8 +735,9 @@ def _init_fixed_dims(self): # ) # ) - self.ensure_dims = [k for k in self.fixed_dims] - dims = {k: make_dim(self, name=k) for k in self.fixed_dims} + # self.ensure_dims = [k for k in self.fixed_dims] + self.ensure_dims = list(self.fixed_dims.keys()) + dims = {k: make_dim(self, name=k, key=v) for k, v in self.fixed_dims.items()} return dims def _init_dims(self): @@ -667,7 +763,7 @@ def _remove_duplicates(keys): var_keys = [self.profile.variable_key] # non-core dims - keys = self.extra_dims + self.ensure_dims + keys = list(self.extra_dims.keys()) + self.ensure_dims keys = _remove_duplicates(keys) remapping_dims = self._init_remapping_dims(keys) @@ -806,6 +902,23 @@ def get_dims(self, names): r.append(make_dim(self, name=name)) return r + def rename_dataset_dims(self, dataset): + # first rename the dimensions where the name and key are different + mapping = {} + for d in self.dims.values(): + if d.key in dataset.dims and d.name != d.key: + mapping[d.key] = d.name + if mapping: + dataset = dataset.rename(mapping) + + # then apply the user defined rename_dims_map + if self.rename_dims_map: + mapping = {k: v for k, v in self.rename_dims_map.items() if k in dataset.dims} + if mapping: + dataset = dataset.rename(mapping) + + return dataset + PREDEFINED_DIMS = {} for i, d in enumerate( diff --git a/src/earthkit/data/utils/xarray/engine.py b/src/earthkit/data/utils/xarray/engine.py index dd4f10d24..f1288b190 100644 --- a/src/earthkit/data/utils/xarray/engine.py +++ b/src/earthkit/data/utils/xarray/engine.py @@ -29,6 +29,7 @@ def open_dataset( ensure_dims=None, fixed_dims=None, dim_roles=None, + keep_dim_role_names=None, rename_dims=None, dims_as_attrs=None, time_dim_mode=None, @@ -266,6 +267,7 @@ def open_dataset( fixed_dims=fixed_dims, rename_dims=rename_dims, dim_roles=dim_roles, + keep_dim_role_names=keep_dim_role_names, dims_as_attrs=dims_as_attrs, time_dim_mode=time_dim_mode, level_dim_mode=level_dim_mode, diff --git a/src/earthkit/data/utils/xarray/grib.yaml b/src/earthkit/data/utils/xarray/grib.yaml index 9384a00ac..05d544e9e 100644 --- a/src/earthkit/data/utils/xarray/grib.yaml +++ b/src/earthkit/data/utils/xarray/grib.yaml @@ -1,8 +1,8 @@ dim_roles: - ens: number + number: number date: dataDate time: dataTime - step: step + step: step_timedelta level: level level_type: typeOfLevel diff --git a/src/earthkit/data/utils/xarray/mars.yaml b/src/earthkit/data/utils/xarray/mars.yaml index 1d17dcdbf..5c07bfed6 100644 --- a/src/earthkit/data/utils/xarray/mars.yaml +++ b/src/earthkit/data/utils/xarray/mars.yaml @@ -1,8 +1,8 @@ dim_roles: - ens: number + number: number date: date time: time - step: step + step: step_timedelta level: levelist level_type: levtype diff --git a/src/earthkit/data/utils/xarray/profile.py b/src/earthkit/data/utils/xarray/profile.py index 7e1bbd843..d160fad51 100644 --- a/src/earthkit/data/utils/xarray/profile.py +++ b/src/earthkit/data/utils/xarray/profile.py @@ -96,7 +96,7 @@ def __init__( **kwargs, ): from .attrs import Attrs - from .dim import Dims + from .dim import DimHandler self._kwargs = dict(**kwargs) self.name = name @@ -116,7 +116,7 @@ def __init__( self.rename_variables_map = kwargs.pop("rename_variables") # dims - self.dims = Dims( + self.dims = DimHandler( self, kwargs.pop("extra_dims"), kwargs.pop("drop_dims"), @@ -125,6 +125,7 @@ def __init__( kwargs.pop("split_dims"), kwargs.pop("rename_dims"), kwargs.pop("dim_roles"), + kwargs.pop("keep_dim_role_names"), kwargs.pop("dims_as_attrs"), kwargs.pop("time_dim_mode"), kwargs.pop("level_dim_mode"), @@ -331,6 +332,9 @@ def update(self, ds): assert self.variables assert self.variable_key not in self.dim_keys + print("UPDATE dims", self.dims.dims) + print("UPDATE dim_keys", self.dim_keys) + # print("UPDATE variable_key", self.variable_key) # print("UPDATE variables", self.variables) # print(" -> dim_keys", self.dim_keys) @@ -346,3 +350,6 @@ def rename_dims_map(self): def rename_variable(self, v): return self.rename_variables_map.get(v, v) + + def rename_dataset_dims(self, dataset): + return self.dims.rename_dataset_dims(dataset) diff --git a/tests/xr_engine/test_xr_attrs.py b/tests/xr_engine/test_xr_attrs.py index 92f1add25..03a404fa4 100644 --- a/tests/xr_engine/test_xr_attrs.py +++ b/tests/xr_engine/test_xr_attrs.py @@ -9,6 +9,7 @@ # nor does it submit to any jurisdiction. # +import datetime import os import sys @@ -47,14 +48,15 @@ def _get_attrs_for_key_2(key, metadata): "decode_times": False, "decode_timedelta": False, "strict": True, + "keep_dim_role_names": False, }, { "date": [20240603, 20240604], "time": [0, 1200], - "step": [0, 6], + "step_timedelta": [datetime.timedelta(hours=0), datetime.timedelta(hours=6)], "levelist": [500, 700], }, - {"date": 2, "time": 2, "step": 2, "levelist": 2}, + {"date": 2, "time": 2, "step_timedelta": 2, "levelist": 2}, {}, ), ( @@ -65,14 +67,15 @@ def _get_attrs_for_key_2(key, metadata): "decode_times": False, "decode_timedelta": False, "strict": True, + "keep_dim_role_names": False, }, { "date": [20240603, 20240604], "time": [0, 1200], - "step": [0, 6], + "step_timedelta": [datetime.timedelta(hours=0), datetime.timedelta(hours=6)], "levelist": [500, 700], }, - {"date": 2, "time": 2, "step": 2, "levelist": 2}, + {"date": 2, "time": 2, "step_timedelta": 2, "levelist": 2}, {"levtype": 2}, ), ( @@ -83,14 +86,15 @@ def _get_attrs_for_key_2(key, metadata): "decode_times": False, "decode_timedelta": False, "strict": False, + "keep_dim_role_names": False, }, { "date": [20240603, 20240604], "time": [0, 1200], - "step": [0, 6], + "step_timedelta": [datetime.timedelta(hours=0), datetime.timedelta(hours=6)], "levelist": [500, 700], }, - {"date": 2, "time": 2, "step": 2, "levelist": 2}, + {"date": 2, "time": 2, "step_timedelta": 2, "levelist": 2}, {}, ), ( @@ -101,14 +105,15 @@ def _get_attrs_for_key_2(key, metadata): "decode_times": False, "decode_timedelta": False, "strict": False, + "keep_dim_role_names": False, }, { "date": [20240603, 20240604], "time": [0, 1200], - "step": [0, 6], + "step_timedelta": [datetime.timedelta(hours=0), datetime.timedelta(hours=6)], "levelist": [500, 700], }, - {"date": 2, "time": 2, "step": 2, "levelist": 2}, + {"date": 2, "time": 2, "step_timedelta": 2, "levelist": 2}, {"levtype": 2}, ), ], @@ -151,14 +156,15 @@ def test_xr_dims_as_attrs(kwargs, coords, dims, attrs): "decode_times": False, "decode_timedelta": False, "strict": False, + "keep_dim_role_names": False, }, { "date": [20240603, 20240604], "time": [0, 1200], - "step": [0, 6], + "step_timedelta": [datetime.timedelta(hours=0), datetime.timedelta(hours=6)], "levelist": [500, 700], }, - {"date": 2, "time": 2, "step": 2, "levelist": 2}, + {"date": 2, "time": 2, "step_timedelta": 2, "levelist": 2}, { "shortName": "t", "levtype": "pl", diff --git a/tests/xr_engine/test_xr_dims.py b/tests/xr_engine/test_xr_dims.py index d3f4e3e97..a43c4a51d 100644 --- a/tests/xr_engine/test_xr_dims.py +++ b/tests/xr_engine/test_xr_dims.py @@ -9,6 +9,7 @@ # nor does it submit to any jurisdiction. # +import datetime import os import sys @@ -116,22 +117,25 @@ def test_xr_dims_input_fieldlist(): @pytest.mark.parametrize( "kwargs,var_key,variables,dim_keys", [ - ({}, "param", ["r", "t"], ["step", "levelist"]), + ({}, "param", ["r", "t"], ["step_timedelta", "levelist"]), ( - {"time_dim_mode": "forecast"}, + {"time_dim_mode": "forecast", "keep_dim_role_names": False}, "param", ["r", "t"], - ["step", "levelist"], + ["step_timedelta", "levelist"], ), ( - {"squeeze": False, "time_dim_mode": "raw"}, + {"squeeze": False, "time_dim_mode": "raw", "keep_dim_role_names": False}, "param", ["r", "t"], - ["time", "step", "levelist"], + ["time", "step_timedelta", "levelist"], ), ], ) def test_xr_dims_ds_lev(kwargs, var_key, variables, dim_keys): + """Test for the internal profile/dimension object. Cannot use all the options since + many tasks are performed elsewhere in the engine.""" + # TODO: consider removing this test prof = Profile.make("mars", **kwargs) ds = load_wrapped_fieldlist(DS_LEV, prof) # prof.update(ds, _attributes(ds)) @@ -144,128 +148,121 @@ def test_xr_dims_ds_lev(kwargs, var_key, variables, dim_keys): @pytest.mark.parametrize( "kwargs,var_key,variables,dims", [ - # ({"time_dim_mode": "raw"}, "param", ["r", "t"], ["date", "time", "step", "levelist"]), ( - {"time_dim_mode": "forecast"}, + {"time_dim_mode": "forecast", "keep_dim_role_names": False}, "param", ["r", "t"], - ["forecast_reference_time", "step", "levelist", "levtype"], + ["forecast_reference_time", "step_timedelta", "levelist", "levtype"], ), ( - {"time_dim_mode": "raw", "variable_key": "param_level"}, + {"time_dim_mode": "raw", "variable_key": "param_level", "keep_dim_role_names": False}, "param_level", ["r1000", "r850", "t1000", "t850"], - ["date", "time", "step", "levtype"], + ["date", "time", "step_timedelta", "levtype"], ), - # ( - # {"time_dim_mode": "raw", "extra_dims": "param_level"}, - # "param_level", - # [ - # "r1000", - # "r850", - # "t1000", - # "t850", - # ], - # ["date"], - # ), ( { "time_dim_mode": "raw", "variable_key": "param_level", "remapping": {"param_level": "{param}_{level}"}, + "keep_dim_role_names": False, }, "param_level", ["r_1000", "r_850", "t_1000", "t_850"], - ["date", "time", "step", "levtype"], + ["date", "time", "step_timedelta", "levtype"], ), ( - {"time_dim_mode": "raw", "variable_key": "shortName"}, + {"time_dim_mode": "raw", "variable_key": "shortName", "keep_dim_role_names": False}, "shortName", ["r", "t"], - ["date", "time", "step", "levelist", "levtype"], + ["date", "time", "step_timedelta", "levelist", "levtype"], ), ( - {"time_dim_mode": "raw", "variable_key": "shortName", "drop_variables": ["r"]}, + { + "time_dim_mode": "raw", + "variable_key": "shortName", + "drop_variables": ["r"], + "keep_dim_role_names": False, + }, "shortName", ["t"], - ["date", "time", "step", "levelist", "levtype"], + ["date", "time", "step_timedelta", "levelist", "levtype"], ), ( - {"time_dim_mode": "raw", "variable_key": "param_level", "drop_variables": ["r", "r1000"]}, + { + "time_dim_mode": "raw", + "variable_key": "param_level", + "drop_variables": ["r", "r1000"], + "keep_dim_role_names": False, + }, "param_level", ["r850", "t1000", "t850"], [ "date", "time", - "step", + "step_timedelta", "levtype", ], ), - # ( - # {"use_level_per_type_dim": True}, - # "param", - # ["r", "t"], - # {"date": ["20210101", "20210102"], "level_per_type": ["850pl", "1000pl"]}, - # ), ( - {"time_dim_mode": "raw", "level_dim_mode": "level_and_type"}, + {"time_dim_mode": "raw", "level_dim_mode": "level_and_type", "keep_dim_role_names": False}, "param", ["r", "t"], { "date": ["20210101", "20210102"], "time": ["12"], - "step": [0], + "step_timedelta": [datetime.timedelta(hours=0)], "level_and_type": ["1000pl", "850pl"], }, ), ( - {"time_dim_mode": "raw", "extra_dims": "class"}, + {"time_dim_mode": "raw", "extra_dims": "class", "keep_dim_role_names": False}, "param", ["r", "t"], { "class": ["od"], "date": ["20210101", "20210102"], "time": ["12"], - "step": [0], + "step_timedelta": [datetime.timedelta(hours=0)], "levelist": [850, 1000], "levtype": ["pl"], }, ), ( - {"time_dim_mode": "raw", "ensure_dims": "class"}, + {"time_dim_mode": "raw", "ensure_dims": "class", "keep_dim_role_names": False}, "param", ["r", "t"], { "class": ["od"], "date": ["20210101", "20210102"], "time": ["12"], - "step": [0], + "step_timedelta": [datetime.timedelta(hours=0)], "levelist": [850, 1000], "levtype": ["pl"], }, ), ( - {"time_dim_mode": "raw", "ensure_dims": ["class", "step"]}, + {"time_dim_mode": "raw", "ensure_dims": ["class", "step"], "keep_dim_role_names": False}, "param", ["r", "t"], { "class": ["od"], "date": ["20210101", "20210102"], "time": ["12"], - "step": [0], + "step_timedelta": [datetime.timedelta(hours=0)], "levelist": [850, 1000], "levtype": ["pl"], }, ), ( - {"time_dim_mode": "raw", "extra_dims": "class", "squeeze": False}, + {"time_dim_mode": "raw", "extra_dims": "class", "squeeze": False, "keep_dim_role_names": False}, "param", ["r", "t"], { "class": ["od"], "date": ["20210101", "20210102"], "time": ["12"], - "step": [0], + "step_timedelta": [datetime.timedelta(hours=0)], "levelist": [850, 1000], "levtype": ["pl"], }, @@ -273,6 +270,9 @@ def test_xr_dims_ds_lev(kwargs, var_key, variables, dim_keys): ], ) def test_xr_dims_ds_date_lev(kwargs, var_key, variables, dims): + """Test for the internal profile/dimension object. Cannot use all the options since + many tasks are performed elsewhere in the engine.""" + # TODO: consider removing this test prof = Profile.make("mars", **kwargs) ds = load_wrapped_fieldlist(DS_DATE_LEV, prof, remapping=prof.remapping.build()) @@ -303,13 +303,16 @@ def test_xr_dims_ds_date_lev(kwargs, var_key, variables, dims): {"time_dim_mode": "raw"}, "param", ["2t", "msl", "r", "t"], - ["date", "time", "step", "levelist", "levtype"], + ["date", "time", "step_timedelta", "levelist", "levtype"], ), # ({"base_datetime_dim": True}, "param", ["r", "t"], ["levelist", "levtype"]), # ({"squeeze": False}, "param", ["r", "t"], ["time", "step", "levelist", "levtype"]), ], ) def test_xr_dims_ds_sfc_and_pl(kwargs, var_key, variables, dim_keys): + """Test for the internal profile/dimension object. Cannot use all the options since + many tasks are performed elsewhere in the engine.""" + # TODO: consider removing this test prof = Profile.make("mars", **kwargs) ds = load_wrapped_fieldlist(DS_DATE_SFC_PL, prof) # prof.update(ds, _attributes(ds)) @@ -324,7 +327,20 @@ def test_xr_dims_ds_sfc_and_pl(kwargs, var_key, variables, dim_keys): "kwargs,dim_keys", [ ( - {"profile": "mars", "time_dim_mode": "raw", "rename_dims": {"levelist": "zz"}}, + { + "profile": "mars", + "time_dim_mode": "raw", + "rename_dims": {"levelist": "zz"}, + "keep_dim_role_names": False, + }, + ["date", "time", "step_timedelta", "zz"], + ), + ( + { + "profile": "mars", + "time_dim_mode": "raw", + "rename_dims": {"level": "zz"}, + }, ["date", "time", "step", "zz"], ), ], @@ -339,3 +355,114 @@ def test_xr_rename_dims(kwargs, dim_keys): for v in ds: compare_dim_order(ds, dim_keys, v) + + +@pytest.mark.cache +@pytest.mark.parametrize( + "kwargs,dim_keys", + [ + ( + { + "profile": "mars", + "fixed_dims": ["date", "time", "step", "level"], + }, + ["date", "time", "step", "level"], + ), + ( + { + "profile": "mars", + "fixed_dims": ["level", "date", "time", "step"], + }, + ["level", "date", "time", "step"], + ), + ( + { + "profile": "mars", + "fixed_dims": [{"my_date": "date"}, ("my_time", "time"), "step", "level"], + }, + ["my_date", "my_time", "step", "level"], + ), + ( + { + "profile": "mars", + "fixed_dims": ["forecast_reference_time", "endStep", "level"], + }, + ["forecast_reference_time", "endStep", "level"], + ), + ( + { + "profile": "mars", + "fixed_dims": ["forecast_reference_time", ("step", "endStep"), "level"], + }, + ["forecast_reference_time", "step", "level"], + ), + ], +) +def test_xr_fixed_dims(kwargs, dim_keys): + ds_ek = from_source("url", earthkit_remote_test_data_file("test-data/xr_engine/level/pl.grib")) + ds = ds_ek.to_xarray(**kwargs) + num = len(ds) + + dim_keys = dim_keys + ["latitude", "longitude"] + assert len(ds) == num + + for v in ds: + compare_dim_order(ds, dim_keys, v) + + +@pytest.mark.cache +@pytest.mark.parametrize( + "kwargs,dim_keys", + [ + ( + { + "profile": "mars", + "drop_dims": "number", + "time_dim_mode": "raw", + "squeeze": False, + "keep_dim_role_names": True, + }, + ["date", "time", "step", "level", "level_type"], + ), + ( + { + "profile": "mars", + "drop_dims": ["level_type", "number"], + "time_dim_mode": "raw", + "squeeze": False, + "keep_dim_role_names": True, + }, + ["date", "time", "step", "level"], + ), + ( + { + "profile": "mars", + "drop_dims": "number", + "time_dim_mode": "raw", + "squeeze": False, + "keep_dim_role_names": False, + }, + ["date", "time", "step_timedelta", "levelist", "levtype"], + ), + ( + { + "profile": "mars", + "drop_dims": ["levtype", "number"], + "time_dim_mode": "raw", + "squeeze": False, + "keep_dim_role_names": False, + }, + ["date", "time", "step_timedelta", "levelist"], + ), + ], +) +def test_xr_drop_dims(kwargs, dim_keys): + ds_ek = from_source("url", earthkit_remote_test_data_file("test-data/xr_engine/level/pl.grib")) + ds = ds_ek.to_xarray(**kwargs) + num = len(ds) + + dim_keys = dim_keys + ["latitude", "longitude"] + assert len(ds) == num + + for v in ds: + compare_dim_order(ds, dim_keys, v) diff --git a/tests/xr_engine/test_xr_engine.py b/tests/xr_engine/test_xr_engine.py index f8d9be3e9..2c40411e7 100644 --- a/tests/xr_engine/test_xr_engine.py +++ b/tests/xr_engine/test_xr_engine.py @@ -20,6 +20,7 @@ here = os.path.dirname(__file__) sys.path.insert(0, here) +from xr_engine_fixtures import compare_coords # noqa: E402 from xr_engine_fixtures import load_grib_data # noqa: E402 @@ -63,12 +64,16 @@ def test_xr_engine_basic(file): @pytest.mark.cache @pytest.mark.parametrize("api", ["earthkit", "xr"]) -def test_xr_engine_detailed_check(api): +def test_xr_engine_detailed_check_1(api): ds_ek = from_source("url", earthkit_remote_test_data_file("test-data", "xr_engine", "level", "pl.grib")) if api == "earthkit": ds = ds_ek.to_xarray( - time_dim_mode="raw", decode_times=False, decode_timedelta=False, add_valid_time_coord=False + time_dim_mode="raw", + decode_times=False, + decode_timedelta=False, + add_valid_time_coord=False, + keep_dim_role_names=False, ) else: import xarray as xr @@ -80,6 +85,7 @@ def test_xr_engine_detailed_check(api): decode_times=False, decode_timedelta=False, add_valid_time_coord=False, + keep_dim_role_names=False, ) assert ds is not None @@ -92,7 +98,7 @@ def test_xr_engine_detailed_check(api): coords_ref_full = { "date": np.array([20240603, 20240604]), "time": np.array([0, 1200]), - "step": np.array([0, 6]), + "step_timedelta": [0, 6], "levelist": np.array([300, 400, 500, 700, 850, 1000]), "latitude": lats, "longitude": lons, @@ -101,16 +107,14 @@ def test_xr_engine_detailed_check(api): dims_ref_full = { "date": 2, "time": 2, - "step": 2, + "step_timedelta": 2, "levelist": 6, "latitude": 19, "longitude": 36, } assert len(ds.dims) == len(dims_ref_full) - assert len(ds.coords) == len(coords_ref_full) - for k, v in coords_ref_full.items(): - assert np.allclose(ds.coords[k].values, v) + compare_coords(ds, coords_ref_full) assert [v for v in ds.data_vars] == data_vars # data variable @@ -119,47 +123,37 @@ def test_xr_engine_detailed_check(api): assert ds["u"].as_numpy().shape == (2, 2, 2, 6, 19, 36) assert ds["u"].to_numpy().shape == (2, 2, 2, 6, 19, 36) r = ds["u"] - assert len(r.coords) == len(coords_ref_full) - for k, v in coords_ref_full.items(): - assert np.allclose(r.coords[k].values, v) + compare_coords(r, coords_ref_full) # sel() on dataset r = ds.sel(date=20240603, time=[0, 1200]) coords_ref = dict(coords_ref_full) coords_ref["date"] = np.array([20240603]) - assert len(r.coords) == len(coords_ref) - for k, v in coords_ref.items(): - assert np.allclose(r.coords[k].values, v) + compare_coords(r, coords_ref) assert [v for v in r.data_vars] == data_vars # sel() on data variable of filtered dataset assert r["u"].shape == (2, 2, 6, 19, 36) - r1 = r["u"].sel(step=6, levelist=[1000, 300]) + r1 = r["u"].sel(step_timedelta=6, levelist=[1000, 300]) assert r1.shape == (2, 2, 19, 36) - coords_ref["step"] = np.array([6]) + coords_ref["step_timedelta"] = [6] coords_ref["levelist"] = np.array([1000, 300]) - assert len(r1.coords) == len(coords_ref) - for k, v in coords_ref.items(): - assert np.allclose(r1.coords[k].values, v) + compare_coords(r1, coords_ref) # isel() on dataset r = ds.isel(date=0, time=[0, 1]) coords_ref = dict(coords_ref_full) coords_ref["date"] = np.array([20240603]) - assert len(r.coords) == len(coords_ref) - for k, v in coords_ref.items(): - assert np.allclose(r.coords[k].values, v) + compare_coords(r, coords_ref) assert [v for v in r.data_vars] == data_vars # isel() on data variable of filtered dataset assert r["u"].shape == (2, 2, 6, 19, 36) - r1 = r["u"].isel(step=1, levelist=[0, -1]) + r1 = r["u"].isel(step_timedelta=1, levelist=[0, -1]) assert r1.shape == (2, 2, 19, 36) - coords_ref["step"] = np.array([6]) + coords_ref["step_timedelta"] = [6] coords_ref["levelist"] = np.array([300, 1000]) - assert len(r1.coords) == len(coords_ref) - for k, v in coords_ref.items(): - assert np.allclose(r1.coords[k].values, v) + compare_coords(r1, coords_ref) # slicing of data variable da = ds["u"] @@ -173,8 +167,7 @@ def test_xr_engine_detailed_check(api): assert len(r.dims) == len(dims_ref) coords_ref = dict(coords_ref_full) coords_ref["time"] = np.array([0]) - for k, v in coords_ref.items(): - assert np.allclose(r.coords[k].values, v) + compare_coords(r, coords_ref) r = da[:, 0, :, 3:5] assert r.shape == (2, 2, 2, 19, 36) @@ -186,8 +179,7 @@ def test_xr_engine_detailed_check(api): coords_ref = dict(coords_ref_full) coords_ref["time"] = np.array([0]) coords_ref["levelist"] = np.array([700, 850]) - for k, v in coords_ref.items(): - assert np.allclose(r.coords[k].values, v) + compare_coords(r, coords_ref) r = da.loc[:, 0, :, [700, 850]] assert r.shape == (2, 2, 2, 19, 36) @@ -199,8 +191,184 @@ def test_xr_engine_detailed_check(api): coords_ref = dict(coords_ref_full) coords_ref["time"] = np.array([0]) coords_ref["levelist"] = np.array([700, 850]) - for k, v in coords_ref.items(): - assert np.allclose(r.coords[k].values, v) + compare_coords(r, coords_ref) + + # lat-lon + da = ds["t"] + + r = da[:, 0, :, 2, 9, 0] + assert r.shape == (2, 2) + vals_ref = np.array([[269.00918579, 268.78610229], [268.57771301, 268.08932495]]) + assert np.allclose(r.values, vals_ref) + + r = da[:, 0, :, 2, 9:12, :2] + assert r.shape == (2, 2, 3, 2) + vals_ref = np.array( + [ + [ + [ + [269.00918579, 269.31680298], + [269.70254517, 269.81387329], + [267.50527954, 266.83828735], + ], + [ + [268.78610229, 268.80758667], + [269.52731323, 269.75680542], + [266.61813354, 267.12106323], + ], + ], + [ + [ + [268.57771301, 269.03767395], + [269.33357239, 269.56111145], + [264.75154114, 266.55036926], + ], + [ + [268.08932495, 268.35983276], + [269.01803589, 269.02389526], + [264.29733276, 266.08248901], + ], + ], + ] + ) + assert np.allclose(r.values, vals_ref) + + r = da.loc[:, 0, :, 500, 0, 0] + assert r.shape == (2, 2) + vals_ref = np.array([[269.00918579, 268.78610229], [268.57771301, 268.08932495]]) + assert np.allclose(r.values, vals_ref) + + +@pytest.mark.cache +@pytest.mark.parametrize("api", ["earthkit", "xr"]) +def test_xr_engine_detailed_check_2(api): + ds_ek = from_source("url", earthkit_remote_test_data_file("test-data", "xr_engine", "level", "pl.grib")) + + if api == "earthkit": + ds = ds_ek.to_xarray( + time_dim_mode="raw", + decode_times=False, + decode_timedelta=False, + add_valid_time_coord=False, + keep_dim_role_names=True, + ) + else: + import xarray as xr + + ds = xr.open_dataset( + ds_ek.path, + engine="earthkit", + time_dim_mode="raw", + decode_times=False, + decode_timedelta=False, + add_valid_time_coord=False, + keep_dim_role_names=True, + ) + + assert ds is not None + + # dataset + lats = np.linspace(90, -90, 19) + lons = np.linspace(0, 350, 36) + data_vars = ["r", "t", "u", "v", "z"] + + coords_ref_full = { + "date": np.array([20240603, 20240604]), + "time": np.array([0, 1200]), + "step": [0, 6], + "level": np.array([300, 400, 500, 700, 850, 1000]), + "latitude": lats, + "longitude": lons, + } + + dims_ref_full = { + "date": 2, + "time": 2, + "step": 2, + "level": 6, + "latitude": 19, + "longitude": 36, + } + + assert len(ds.dims) == len(dims_ref_full) + compare_coords(ds, coords_ref_full) + assert [v for v in ds.data_vars] == data_vars + + # data variable + assert ds["u"].shape == (2, 2, 2, 6, 19, 36) + assert ds["u"].values.shape == (2, 2, 2, 6, 19, 36) + assert ds["u"].as_numpy().shape == (2, 2, 2, 6, 19, 36) + assert ds["u"].to_numpy().shape == (2, 2, 2, 6, 19, 36) + r = ds["u"] + compare_coords(r, coords_ref_full) + + # sel() on dataset + r = ds.sel(date=20240603, time=[0, 1200]) + coords_ref = dict(coords_ref_full) + coords_ref["date"] = np.array([20240603]) + compare_coords(r, coords_ref) + assert [v for v in r.data_vars] == data_vars + + # sel() on data variable of filtered dataset + assert r["u"].shape == (2, 2, 6, 19, 36) + r1 = r["u"].sel(step=6, level=[1000, 300]) + assert r1.shape == (2, 2, 19, 36) + coords_ref["step"] = [6] + coords_ref["level"] = np.array([1000, 300]) + compare_coords(r1, coords_ref) + + # isel() on dataset + r = ds.isel(date=0, time=[0, 1]) + coords_ref = dict(coords_ref_full) + coords_ref["date"] = np.array([20240603]) + compare_coords(r, coords_ref) + assert [v for v in r.data_vars] == data_vars + + # isel() on data variable of filtered dataset + assert r["u"].shape == (2, 2, 6, 19, 36) + r1 = r["u"].isel(step=1, level=[0, -1]) + assert r1.shape == (2, 2, 19, 36) + coords_ref["step"] = [6] + coords_ref["level"] = np.array([300, 1000]) + compare_coords(r1, coords_ref) + + # slicing of data variable + da = ds["u"] + + r = da[:, 0] + assert r.shape == (2, 2, 6, 19, 36) + assert r.values.shape == (2, 2, 6, 19, 36) + assert r.to_numpy().shape == (2, 2, 6, 19, 36) + dims_ref = dict(dims_ref_full) + dims_ref.pop("time") + assert len(r.dims) == len(dims_ref) + coords_ref = dict(coords_ref_full) + coords_ref["time"] = np.array([0]) + compare_coords(r, coords_ref) + + r = da[:, 0, :, 3:5] + assert r.shape == (2, 2, 2, 19, 36) + assert r.values.shape == (2, 2, 2, 19, 36) + assert r.to_numpy().shape == (2, 2, 2, 19, 36) + dims_ref = dict(dims_ref_full) + dims_ref.pop("time") + assert len(r.dims) == len(dims_ref) + coords_ref = dict(coords_ref_full) + coords_ref["time"] = np.array([0]) + coords_ref["level"] = np.array([700, 850]) + compare_coords(r, coords_ref) + + r = da.loc[:, 0, :, [700, 850]] + assert r.shape == (2, 2, 2, 19, 36) + assert r.values.shape == (2, 2, 2, 19, 36) + assert r.to_numpy().shape == (2, 2, 2, 19, 36) + dims_ref = dict(dims_ref_full) + dims_ref.pop("time") + assert len(r.dims) == len(dims_ref) + coords_ref = dict(coords_ref_full) + coords_ref["time"] = np.array([0]) + coords_ref["level"] = np.array([700, 850]) + compare_coords(r, coords_ref) # lat-lon da = ds["t"] @@ -253,7 +421,7 @@ def test_xr_engine_detailed_check(api): @pytest.mark.parametrize("lazy_load", [False, True]) @pytest.mark.parametrize("release_source", [False, True]) @pytest.mark.parametrize("direct_backend", [False, True]) -def test_xr_engine_detailed_flatten_check(stream, lazy_load, release_source, direct_backend): +def test_xr_engine_detailed_flatten_check_1(stream, lazy_load, release_source, direct_backend): filename = "test-data/xr_engine/level/pl.grib" ds_ek, ds_ek_ref = load_grib_data(filename, "url", stream=stream) @@ -268,6 +436,7 @@ def test_xr_engine_detailed_flatten_check(stream, lazy_load, release_source, dir "lazy_load": lazy_load, "release_source": release_source, "direct_backend": direct_backend, + "keep_dim_role_names": False, } } } @@ -284,7 +453,7 @@ def test_xr_engine_detailed_flatten_check(stream, lazy_load, release_source, dir coords_ref_full = { "date": np.array([20240603, 20240604]), "time": np.array([0, 1200]), - "step": np.array([0, 6]), + "step_timedelta": np.array([0, 6]), "levelist": np.array([300, 400, 500, 700, 850, 1000]), "latitude": lats, "longitude": lons, @@ -293,7 +462,7 @@ def test_xr_engine_detailed_flatten_check(stream, lazy_load, release_source, dir dims_ref_full = { "date": 2, "time": 2, - "step": 2, + "step_timedelta": 2, "levelist": 6, "values": 684, } @@ -325,9 +494,9 @@ def test_xr_engine_detailed_flatten_check(stream, lazy_load, release_source, dir # sel() on data variable of filtered dataset assert r["u"].shape == (2, 2, 6, 684) - r1 = r["u"].sel(step=6, levelist=[1000, 300]) + r1 = r["u"].sel(step_timedelta=6, levelist=[1000, 300]) assert r1.shape == (2, 2, 684) - coords_ref["step"] = np.array([6]) + coords_ref["step_timedelta"] = np.array([6]) coords_ref["levelist"] = np.array([1000, 300]) assert len(r1.coords) == len(coords_ref) for k, v in coords_ref.items(): @@ -344,9 +513,9 @@ def test_xr_engine_detailed_flatten_check(stream, lazy_load, release_source, dir # isel() on data variable of filtered dataset assert r["u"].shape == (2, 2, 6, 684) - r1 = r["u"].isel(step=1, levelist=[0, -1]) + r1 = r["u"].isel(step_timedelta=1, levelist=[0, -1]) assert r1.shape == (2, 2, 684) - coords_ref["step"] = np.array([6]) + coords_ref["step_timedelta"] = np.array([6]) coords_ref["levelist"] = np.array([300, 1000]) assert len(r1.coords) == len(coords_ref) for k, v in coords_ref.items(): @@ -426,6 +595,170 @@ def test_xr_engine_detailed_flatten_check(stream, lazy_load, release_source, dir assert np.allclose(r.values, vals_ref) +@pytest.mark.cache +@pytest.mark.parametrize("stream", [False, True]) +@pytest.mark.parametrize("lazy_load", [False, True]) +@pytest.mark.parametrize("release_source", [False, True]) +@pytest.mark.parametrize("direct_backend", [False, True]) +def test_xr_engine_detailed_flatten_check_2(stream, lazy_load, release_source, direct_backend): + filename = "test-data/xr_engine/level/pl.grib" + ds_ek, ds_ek_ref = load_grib_data(filename, "url", stream=stream) + + kwargs = { + "xarray_open_dataset_kwargs": { + "backend_kwargs": { + "time_dim_mode": "raw", + "decode_times": False, + "decode_timedelta": False, + "flatten_values": True, + "add_valid_time_coord": False, + "lazy_load": lazy_load, + "release_source": release_source, + "direct_backend": direct_backend, + "keep_dim_role_names": True, + } + } + } + + ds = ds_ek.to_xarray(**kwargs) + assert ds is not None + + # dataset + ll = ds_ek_ref[0].to_latlon(flatten=True) + lats = ll["lat"] + lons = ll["lon"] + data_vars = ["r", "t", "u", "v", "z"] + + coords_ref_full = { + "date": np.array([20240603, 20240604]), + "time": np.array([0, 1200]), + "step": np.array([0, 6]), + "level": np.array([300, 400, 500, 700, 850, 1000]), + "latitude": lats, + "longitude": lons, + } + + dims_ref_full = { + "date": 2, + "time": 2, + "step": 2, + "level": 6, + "values": 684, + } + + assert len(ds.dims) == len(dims_ref_full) + compare_coords(ds, coords_ref_full) + assert [v for v in ds.data_vars] == data_vars + + # data variable + assert ds["u"].shape == (2, 2, 2, 6, 684) + assert ds["u"].values.shape == (2, 2, 2, 6, 684) + assert ds["u"].as_numpy().shape == (2, 2, 2, 6, 684) + assert ds["u"].to_numpy().shape == (2, 2, 2, 6, 684) + r = ds["u"] + compare_coords(r, coords_ref_full) + + # sel() on dataset + r = ds.sel(date=20240603, time=[0, 1200]) + coords_ref = dict(coords_ref_full) + coords_ref["date"] = np.array([20240603]) + compare_coords(r, coords_ref) + assert [v for v in r.data_vars] == data_vars + + # sel() on data variable of filtered dataset + assert r["u"].shape == (2, 2, 6, 684) + r1 = r["u"].sel(step=6, level=[1000, 300]) + assert r1.shape == (2, 2, 684) + coords_ref["step"] = np.array([6]) + coords_ref["level"] = np.array([1000, 300]) + compare_coords(r1, coords_ref) + + # isel() on dataset + r = ds.isel(date=0, time=[0, 1]) + coords_ref = dict(coords_ref_full) + coords_ref["date"] = np.array([20240603]) + compare_coords(r, coords_ref) + assert [v for v in r.data_vars] == data_vars + + # isel() on data variable of filtered dataset + assert r["u"].shape == (2, 2, 6, 684) + r1 = r["u"].isel(step=1, level=[0, -1]) + assert r1.shape == (2, 2, 684) + coords_ref["step"] = np.array([6]) + coords_ref["level"] = np.array([300, 1000]) + compare_coords(r1, coords_ref) + + # slicing of data variable + da = ds["u"] + + r = da[:, 0] + assert r.shape == (2, 2, 6, 684) + assert r.values.shape == (2, 2, 6, 684) + assert r.to_numpy().shape == (2, 2, 6, 684) + dims_ref = dict(dims_ref_full) + dims_ref.pop("time") + assert len(r.dims) == len(dims_ref) + coords_ref = dict(coords_ref_full) + coords_ref["time"] = np.array([0]) + compare_coords(r, coords_ref) + + r = da[:, 0, :, 3:5] + assert r.shape == (2, 2, 2, 684) + assert r.values.shape == (2, 2, 2, 684) + assert r.to_numpy().shape == (2, 2, 2, 684) + dims_ref = dict(dims_ref_full) + dims_ref.pop("time") + assert len(r.dims) == len(dims_ref) + coords_ref = dict(coords_ref_full) + coords_ref["time"] = np.array([0]) + coords_ref["level"] = np.array([700, 850]) + compare_coords(r, coords_ref) + + r = da.loc[:, 0, :, [700, 850]] + assert r.shape == (2, 2, 2, 684) + assert r.values.shape == (2, 2, 2, 684) + assert r.to_numpy().shape == (2, 2, 2, 684) + dims_ref = dict(dims_ref_full) + dims_ref.pop("time") + assert len(r.dims) == len(dims_ref) + coords_ref = dict(coords_ref_full) + coords_ref["time"] = np.array([0]) + coords_ref["level"] = np.array([700, 850]) + compare_coords(r, coords_ref) + + # level=500, lat=0, lon=0 + da = ds["t"] + + r = da[:, 0, :, 2, 9 * 36 + 0] + assert r.shape == (2, 2) + vals_ref = np.array([[269.00918579, 268.78610229], [268.57771301, 268.08932495]]) + assert np.allclose(r.values, vals_ref) + + r = da[:, 0, :, 2, [9 * 36, 10 * 36, 11 * 36]] + assert r.shape == (2, 2, 3) + vals_ref = np.array( + [ + [ + [269.00918579, 269.70254517, 267.50527954], + [268.78610229, 269.52731323, 266.61813354], + ], + [ + [268.57771301, 269.33357239, 264.75154114], + [268.08932495, 269.01803589, 264.29733276], + ], + ] + ) + + v_ek = ds_ek_ref.sel(param="t", time=0, levelist=500).to_numpy(flatten=True) + assert np.allclose(r.values.flatten(), v_ek[:, [9 * 36, 10 * 36, 11 * 36]].flatten()) + assert np.allclose(r.values, vals_ref) + + r = da.loc[:, 0, :, 500, 9 * 36 + 0] + assert r.shape == (2, 2) + vals_ref = np.array([[269.00918579, 268.78610229], [268.57771301, 268.08932495]]) + assert np.allclose(r.values, vals_ref) + + @pytest.mark.cache @pytest.mark.parametrize( "kwargs", diff --git a/tests/xr_engine/test_xr_level.py b/tests/xr_engine/test_xr_level.py index 2f3f4b91e..1e0b36591 100644 --- a/tests/xr_engine/test_xr_level.py +++ b/tests/xr_engine/test_xr_level.py @@ -28,11 +28,11 @@ "kwargs,dims", [ ( - {"profile": "mars", "level_dim_mode": "level"}, + {"profile": "mars", "level_dim_mode": "level", "keep_dim_role_names": False}, {"levelist": [300, 400, 500, 700, 850, 1000]}, ), ( - {"profile": "mars", "level_dim_mode": "level_and_type"}, + {"profile": "mars", "level_dim_mode": "level_and_type", "keep_dim_role_names": False}, {"level_and_type": ["1000pl", "300pl", "400pl", "500pl", "700pl", "850pl"]}, ), ], @@ -50,67 +50,117 @@ def test_xr_level_dim(kwargs, dims): [ ( "pl.grib", - {"profile": "grib", "level_dim_mode": "level"}, + {"profile": "grib", "level_dim_mode": "level", "keep_dim_role_names": False}, {"level": [300, 400, 500, 700, 850, 1000]}, "isobaricInhPa", ), ( "pl_80_Pa.grib2", - {"profile": "grib", "level_dim_mode": "level", "ensure_dims": "level"}, + { + "profile": "grib", + "level_dim_mode": "level", + "ensure_dims": "level", + "keep_dim_role_names": False, + }, {"level": [80]}, "isobaricInPa", ), ( "hpa_and_pa.grib", - {"profile": "mars", "level_dim_mode": "level", "ensure_dims": "levelist"}, + { + "profile": "mars", + "level_dim_mode": "level", + "ensure_dims": "levelist", + "keep_dim_role_names": False, + }, {"levelist": [0.01, 0.1, 1]}, "pl", ), ( "hl_1000_m_asl.grib2", - {"profile": "grib", "level_dim_mode": "level", "ensure_dims": "level"}, + { + "profile": "grib", + "level_dim_mode": "level", + "ensure_dims": "level", + "keep_dim_role_names": False, + }, {"level": [100, 1000, 2000, 3000]}, "heightAboveSea", ), ( "hl_1000_m_agr.grib2", - {"profile": "grib", "level_dim_mode": "level", "ensure_dims": "level"}, + { + "profile": "grib", + "level_dim_mode": "level", + "ensure_dims": "level", + "keep_dim_role_names": False, + }, {"level": [500, 1000, 2500, 10000]}, "heightAboveGround", ), ( "pt_320_K.grib1", - {"profile": "grib", "level_dim_mode": "level", "ensure_dims": "level"}, + { + "profile": "grib", + "level_dim_mode": "level", + "ensure_dims": "level", + "keep_dim_role_names": False, + }, {"level": [320]}, "theta", ), ( "pv_1500.grib1", - {"profile": "grib", "level_dim_mode": "level", "ensure_dims": "level"}, + { + "profile": "grib", + "level_dim_mode": "level", + "ensure_dims": "level", + "keep_dim_role_names": False, + }, {"level": [1500]}, "potentialVorticity", ), ( "soil_7.grib1", - {"profile": "grib", "level_dim_mode": "level", "ensure_dims": "level"}, + { + "profile": "grib", + "level_dim_mode": "level", + "ensure_dims": "level", + "keep_dim_role_names": False, + }, {"level": [7]}, "depthBelowLand", ), ( "sol_3.grib2", - {"profile": "grib", "level_dim_mode": "level", "ensure_dims": "level"}, + { + "profile": "grib", + "level_dim_mode": "level", + "ensure_dims": "level", + "keep_dim_role_names": False, + }, {"level": [3]}, "snowLayer", ), ( "ml_77.grib2", - {"profile": "grib", "level_dim_mode": "level", "ensure_dims": "level"}, + { + "profile": "grib", + "level_dim_mode": "level", + "ensure_dims": "level", + "keep_dim_role_names": False, + }, {"level": [77]}, "hybrid", ), ( "sfc.grib1", - {"profile": "grib", "level_dim_mode": "level", "ensure_dims": "level"}, + { + "profile": "grib", + "level_dim_mode": "level", + "ensure_dims": "level", + "keep_dim_role_names": False, + }, {"level": [0]}, "surface", ), @@ -122,49 +172,84 @@ def test_xr_level_dim(kwargs, dims): # ), ( "mean_sea_level_reduced_ll.grib1", - {"profile": "grib", "level_dim_mode": "level", "ensure_dims": "level"}, + { + "profile": "grib", + "level_dim_mode": "level", + "ensure_dims": "level", + "keep_dim_role_names": False, + }, {"level": [0]}, "meanSea", ), ( "gen_vert_layer.grib", - {"profile": "grib", "level_dim_mode": "level", "ensure_dims": "level"}, + { + "profile": "grib", + "level_dim_mode": "level", + "ensure_dims": "level", + "keep_dim_role_names": False, + }, {"level": [1]}, "generalVerticalLayer", ), ( "pl.grib", - {"profile": "mars", "level_dim_mode": "level"}, + {"profile": "mars", "level_dim_mode": "level", "keep_dim_role_names": False}, {"levelist": [300, 400, 500, 700, 850, 1000]}, "pl", ), ( "pl_80_Pa.grib2", - {"profile": "mars", "level_dim_mode": "level", "ensure_dims": "levelist"}, + { + "profile": "mars", + "level_dim_mode": "level", + "ensure_dims": "levelist", + "keep_dim_role_names": False, + }, {"levelist": [0.8]}, "pl", ), ( "pt_320_K.grib1", - {"profile": "mars", "level_dim_mode": "level", "ensure_dims": "levelist"}, + { + "profile": "mars", + "level_dim_mode": "level", + "ensure_dims": "levelist", + "keep_dim_role_names": False, + }, {"levelist": [320]}, "pt", ), ( "pv_1500.grib1", - {"profile": "mars", "level_dim_mode": "level", "ensure_dims": "levelist"}, + { + "profile": "mars", + "level_dim_mode": "level", + "ensure_dims": "levelist", + "keep_dim_role_names": False, + }, {"levelist": [1500]}, "pv", ), ( "sol_3.grib2", - {"profile": "grib", "level_dim_mode": "level", "ensure_dims": "levelist"}, + { + "profile": "grib", + "level_dim_mode": "level", + "ensure_dims": "levelist", + "keep_dim_role_names": False, + }, {"levelist": [3]}, "sol", ), ( "hpa_and_pa.grib", - {"profile": "mars", "level_dim_mode": "level", "ensure_dims": "levelist"}, + { + "profile": "mars", + "level_dim_mode": "level", + "ensure_dims": "levelist", + "keep_dim_role_names": False, + }, {"levelist": [0.01, 0.1, 1]}, "pl", ), diff --git a/tests/xr_engine/test_xr_time.py b/tests/xr_engine/test_xr_time.py index f81dc583e..48936c140 100644 --- a/tests/xr_engine/test_xr_time.py +++ b/tests/xr_engine/test_xr_time.py @@ -26,22 +26,37 @@ @pytest.mark.cache @pytest.mark.parametrize( - "kwargs,dims", + "kwargs,dims,step_units", [ ( - {"time_dim_mode": "raw", "decode_times": False, "decode_timedelta": False}, + { + "time_dim_mode": "raw", + "decode_times": False, + "decode_timedelta": False, + "keep_dim_role_names": True, + }, {"date": [20240603, 20240604], "time": [0, 1200], "step": [0, 6]}, + ("step", "hours"), ), ( - {"time_dim_mode": "raw"}, + { + "time_dim_mode": "raw", + "keep_dim_role_names": True, + }, { "date": [np.datetime64("2024-06-03", "ns"), np.datetime64("2024-06-04", "ns")], "time": [np.timedelta64(0, "s"), np.timedelta64(43200, "s")], "step": [np.timedelta64(0, "h"), np.timedelta64(6, "h")], }, + None, ), ( - {"time_dim_mode": "forecast", "decode_times": False, "decode_timedelta": False}, + { + "time_dim_mode": "forecast", + "decode_times": False, + "decode_timedelta": False, + "keep_dim_role_names": True, + }, { "forecast_reference_time": [ np.datetime64("2024-06-03T00", "ns"), @@ -51,9 +66,13 @@ ], "step": [0, 6], }, + ("step", "hours"), ), ( - {"time_dim_mode": "forecast"}, + { + "time_dim_mode": "forecast", + "keep_dim_role_names": True, + }, { "forecast_reference_time": [ np.datetime64("2024-06-03T00", "ns"), @@ -63,9 +82,15 @@ ], "step": [np.timedelta64(0, "h"), np.timedelta64(6, "h")], }, + None, ), ( - {"time_dim_mode": "valid_time", "decode_times": False, "decode_timedelta": False}, + { + "time_dim_mode": "valid_time", + "decode_times": False, + "decode_timedelta": False, + "keep_dim_role_names": True, + }, { "valid_time": [ np.datetime64("2024-06-03T00", "ns"), @@ -78,9 +103,15 @@ np.datetime64("2024-06-04T18", "ns"), ], }, + None, ), ( - {"time_dim_mode": "valid_time", "decode_times": True, "decode_timedelta": True}, + { + "time_dim_mode": "valid_time", + "decode_times": True, + "decode_timedelta": True, + "keep_dim_role_names": True, + }, { "valid_time": [ np.datetime64("2024-06-03T00", "ns"), @@ -93,25 +124,54 @@ np.datetime64("2024-06-04T18", "ns"), ], }, + None, + ), + ( + { + "time_dim_mode": "raw", + "decode_times": False, + "decode_timedelta": False, + "keep_dim_role_names": False, + }, + {"date": [20240603, 20240604], "time": [0, 1200], "step_timedelta": [0, 6]}, + ("step_timedelta", "hours"), + ), + ( + { + "time_dim_mode": "raw", + "keep_dim_role_names": False, + }, + { + "date": [np.datetime64("2024-06-03", "ns"), np.datetime64("2024-06-04", "ns")], + "time": [np.timedelta64(0, "s"), np.timedelta64(43200, "s")], + "step_timedelta": [np.timedelta64(0, "h"), np.timedelta64(6, "h")], + }, + None, ), ], ) -def test_xr_time_basic(kwargs, dims): +def test_xr_time_basic(kwargs, dims, step_units): ds_ek = from_source("url", earthkit_remote_test_data_file("test-data/xr_engine/level/pl.grib")) ds = ds_ek.to_xarray(**kwargs) compare_dims(ds, dims, order_ref_var="t") + if step_units is not None: + assert ( + ds[step_units[0]].attrs["units"] == step_units[1] + ), f"step units mismatch {ds[step_units[0]].attrs['units']} != {step_units[1]}" + @pytest.mark.cache @pytest.mark.parametrize( - "kwargs,dims", + "kwargs,dims,step_units", [ ( { "dim_roles": {"date": "indexingDate", "time": "indexingTime", "step": "forecastMonth"}, "decode_times": False, "decode_timedelta": False, + "keep_dim_role_names": False, }, { "indexing_time": [ @@ -120,12 +180,14 @@ def test_xr_time_basic(kwargs, dims): ], "forecastMonth": [1, 2, 3], }, + ("forecastMonth", "months"), ), ( { "dim_roles": {"forecast_reference_time": "indexing_time", "step": "forecastMonth"}, "decode_times": False, "decode_timedelta": False, + "keep_dim_role_names": False, }, { "indexing_time": [ @@ -134,10 +196,43 @@ def test_xr_time_basic(kwargs, dims): ], "forecastMonth": [1, 2, 3], }, + ("forecastMonth", "months"), + ), + ( + { + "dim_roles": {"date": "indexingDate", "time": "indexingTime", "step": "forecastMonth"}, + "decode_times": False, + "decode_timedelta": False, + "keep_dim_role_names": True, + }, + { + "forecast_reference_time": [ + np.datetime64("2014-09-01", "ns"), + np.datetime64("2014-10-01", "ns"), + ], + "step": [1, 2, 3], + }, + ("step", "months"), + ), + ( + { + "dim_roles": {"forecast_reference_time": "indexing_time", "step": "forecastMonth"}, + "decode_times": False, + "decode_timedelta": False, + "keep_dim_role_names": True, + }, + { + "forecast_reference_time": [ + np.datetime64("2014-09-01", "ns"), + np.datetime64("2014-10-01", "ns"), + ], + "step": [1, 2, 3], + }, + ("step", "months"), ), ], ) -def test_xr_time_seasonal_monthly_indexing_date(kwargs, dims): +def test_xr_time_seasonal_monthly_indexing_date(kwargs, dims, step_units): ds_ek = from_source( "url", earthkit_remote_test_data_file("test-data/xr_engine/date/jma_seasonal_fc_ref_time_per_member.grib"), @@ -146,10 +241,15 @@ def test_xr_time_seasonal_monthly_indexing_date(kwargs, dims): ds = ds_ek.to_xarray(**kwargs) compare_dims(ds, dims, order_ref_var="2t") + if step_units is not None: + assert ( + ds[step_units[0]].attrs["units"] == step_units[1] + ), f"step units mismatch {ds[step_units[0]].attrs['units']} != {step_units[1]}" + @pytest.mark.cache @pytest.mark.parametrize( - "kwargs,dims", + "kwargs,dims,step_units", [ ( { @@ -157,6 +257,7 @@ def test_xr_time_seasonal_monthly_indexing_date(kwargs, dims): "dim_roles": {"step": "forecastMonth"}, "decode_times": False, "decode_timedelta": False, + "keep_dim_role_names": False, }, { "number": [0, 1, 2], @@ -168,6 +269,7 @@ def test_xr_time_seasonal_monthly_indexing_date(kwargs, dims): ], "forecastMonth": [1, 2, 3, 4, 5, 6], }, + ("forecastMonth", "months"), ), ( { @@ -175,6 +277,7 @@ def test_xr_time_seasonal_monthly_indexing_date(kwargs, dims): "dim_roles": {"step": "fcmonth"}, "decode_times": False, "decode_timedelta": False, + "keep_dim_role_names": False, }, { "number": [0, 1, 2], @@ -186,6 +289,7 @@ def test_xr_time_seasonal_monthly_indexing_date(kwargs, dims): ], "fcmonth": [1, 2, 3, 4, 5, 6], }, + ("fcmonth", "months"), ), ( { @@ -194,6 +298,7 @@ def test_xr_time_seasonal_monthly_indexing_date(kwargs, dims): "decode_times": False, "decode_timedelta": False, "ensure_dims": ["number", "date", "time", "forecastMonth"], + "keep_dim_role_names": False, }, { "number": [0, 1, 2], @@ -206,10 +311,73 @@ def test_xr_time_seasonal_monthly_indexing_date(kwargs, dims): "time": [np.timedelta64(0, "s")], "forecastMonth": [1, 2, 3, 4, 5, 6], }, + ("forecastMonth", "months"), + ), + ( + { + "time_dim_mode": "forecast", + "dim_roles": {"step": "forecastMonth"}, + "decode_times": False, + "decode_timedelta": False, + "keep_dim_role_names": True, + }, + { + "number": [0, 1, 2], + "forecast_reference_time": [ + np.datetime64("1993-10-01", "ns"), + np.datetime64("1994-10-01", "ns"), + np.datetime64("1995-10-01", "ns"), + np.datetime64("1996-10-01", "ns"), + ], + "step": [1, 2, 3, 4, 5, 6], + }, + ("step", "months"), + ), + ( + { + "time_dim_mode": "forecast", + "dim_roles": {"step": "fcmonth"}, + "decode_times": False, + "decode_timedelta": False, + "keep_dim_role_names": True, + }, + { + "number": [0, 1, 2], + "forecast_reference_time": [ + np.datetime64("1993-10-01", "ns"), + np.datetime64("1994-10-01", "ns"), + np.datetime64("1995-10-01", "ns"), + np.datetime64("1996-10-01", "ns"), + ], + "step": [1, 2, 3, 4, 5, 6], + }, + ("step", "months"), + ), + ( + { + "time_dim_mode": "raw", + "dim_roles": {"step": "forecastMonth"}, + "decode_times": False, + "decode_timedelta": False, + "ensure_dims": ["number", "date", "time", "step"], + "keep_dim_role_names": True, + }, + { + "number": [0, 1, 2], + "date": [ + np.datetime64("1993-10-01", "ns"), + np.datetime64("1994-10-01", "ns"), + np.datetime64("1995-10-01", "ns"), + np.datetime64("1996-10-01", "ns"), + ], + "time": [np.timedelta64(0, "s")], + "step": [1, 2, 3, 4, 5, 6], + }, + ("step", "months"), ), ], ) -def test_xr_time_seasonal_monthly_simple(kwargs, dims): +def test_xr_time_seasonal_monthly_simple(kwargs, dims, step_units): ds_ek = from_source( "url", earthkit_remote_test_data_file("test-data/xr_engine/date/seasonal_monthly.grib"), @@ -218,6 +386,11 @@ def test_xr_time_seasonal_monthly_simple(kwargs, dims): ds = ds_ek.to_xarray(**kwargs) compare_dims(ds, dims, order_ref_var="2t") + if step_units is not None: + assert ( + ds[step_units[0]].attrs["units"] == step_units[1] + ), f"step units mismatch {ds[step_units[0]].attrs['units']} != {step_units[1]}" + @pytest.mark.cache def test_xr_valid_time_coord(): @@ -226,9 +399,15 @@ def test_xr_valid_time_coord(): ) ds = ds_ek.to_xarray( - time_dim_mode="forecast", add_valid_time_coord=True, decode_times=False, decode_timedelta=False + time_dim_mode="forecast", + add_valid_time_coord=True, + decode_times=False, + decode_timedelta=False, + keep_dim_role_names=True, ) + print(ds) + dims = { "forecast_reference_time": [ np.datetime64("2024-06-03T00", "ns"), diff --git a/tests/xr_engine/xr_engine_fixtures.py b/tests/xr_engine/xr_engine_fixtures.py index 713f95ef4..208819f43 100644 --- a/tests/xr_engine/xr_engine_fixtures.py +++ b/tests/xr_engine/xr_engine_fixtures.py @@ -90,7 +90,7 @@ def compare_coord(ds, name, ref_vals, mode="coord"): assert np.allclose(ds.coords[name].values, vals), f"{name=} {ds.coords[name].values} != {vals}" -def compare_dim_order(ds, dims, order_ref_var): +def compare_dim_order(ds, dims, order_ref_var, check_coord=True): if order_ref_var is None: return @@ -98,6 +98,8 @@ def compare_dim_order(ds, dims, order_ref_var): for d in ds[order_ref_var].dims: if d in dims: dim_order.append(d) + if check_coord: + assert d in ds.coords, f"{d} not in {ds.coords}" if isinstance(dims, dict): assert dim_order == list(dims.keys()), f"{dim_order=} != {list(dims.keys())}" From 3cfabf6b2597fd13792322eddffc01a496dbbcc7 Mon Sep 17 00:00:00 2001 From: Sandor Kertesz Date: Fri, 6 Jun 2025 15:39:58 +0100 Subject: [PATCH 2/9] Xarray step range --- src/earthkit/data/indexing/tensor.py | 4 +- tests/xr_engine/test_xr_time.py | 99 ++++++++++++++++++++++------ 2 files changed, 82 insertions(+), 21 deletions(-) diff --git a/src/earthkit/data/indexing/tensor.py b/src/earthkit/data/indexing/tensor.py index 568c7ddf1..e706e84c7 100644 --- a/src/earthkit/data/indexing/tensor.py +++ b/src/earthkit/data/indexing/tensor.py @@ -497,10 +497,12 @@ def make_valid_datetime(self, dims, dtype="datetime64[ns]"): print(f"{keys=}") for dims in dims_opt: if all(d in keys for d in dims): + print("Found dims:", dims) # use same dim order as in user_dims + dims = [keys[d] for d in dims] dims = [d for d in dims if d in self.user_dims] other_dims = [d for d in self.user_dims if d not in dims] - # print(f"{dims=} {other_dims=}") + print(f"{dims=} {other_dims=}") if other_dims: import datetime diff --git a/tests/xr_engine/test_xr_time.py b/tests/xr_engine/test_xr_time.py index 48936c140..26b968f14 100644 --- a/tests/xr_engine/test_xr_time.py +++ b/tests/xr_engine/test_xr_time.py @@ -393,36 +393,95 @@ def test_xr_time_seasonal_monthly_simple(kwargs, dims, step_units): @pytest.mark.cache -def test_xr_valid_time_coord(): +@pytest.mark.parametrize( + "kwargs,dims,step_units,coords", + [ + ( + { + "time_dim_mode": "forecast", + "add_valid_time_coord": True, + "decode_times": False, + "decode_timedelta": False, + "keep_dim_role_names": True, + }, + { + "forecast_reference_time": [ + np.datetime64("2024-06-03T00", "ns"), + np.datetime64("2024-06-03T12", "ns"), + ], + "step": [0, 6], + }, + ("step", "hours"), + { + "valid_time": [ + [np.datetime64("2024-06-03T00", "ns"), np.datetime64("2024-06-03T06", "ns")], + [np.datetime64("2024-06-03T12", "ns"), np.datetime64("2024-06-03T18", "ns")], + ] + }, + ), + ( + { + "fixed_dims": ["level", "forecast_reference_time", "step"], + "add_valid_time_coord": True, + "decode_times": False, + "decode_timedelta": False, + }, + { + "forecast_reference_time": [ + np.datetime64("2024-06-03T00", "ns"), + np.datetime64("2024-06-03T12", "ns"), + ], + "step": [0, 6], + }, + ("step", "hours"), + { + "valid_time": [ + [np.datetime64("2024-06-03T00", "ns"), np.datetime64("2024-06-03T06", "ns")], + [np.datetime64("2024-06-03T12", "ns"), np.datetime64("2024-06-03T18", "ns")], + ] + }, + ), + ( + { + "fixed_dims": ["level", "step", "forecast_reference_time"], + "add_valid_time_coord": True, + "decode_times": False, + "decode_timedelta": False, + }, + { + "step": [0, 6], + "forecast_reference_time": [ + np.datetime64("2024-06-03T00", "ns"), + np.datetime64("2024-06-03T12", "ns"), + ], + }, + ("step", "hours"), + { + "valid_time": [ + [np.datetime64("2024-06-03T00", "ns"), np.datetime64("2024-06-03T06", "ns")], + [np.datetime64("2024-06-03T12", "ns"), np.datetime64("2024-06-03T18", "ns")], + ] + }, + ), + ], +) +def test_xr_valid_time_coord(kwargs, dims, step_units, coords): ds_ek = from_source("url", earthkit_remote_test_data_file("test-data/xr_engine/level/pl_small.grib")).sel( date=20240603, time=[0, 1200] ) - ds = ds_ek.to_xarray( - time_dim_mode="forecast", - add_valid_time_coord=True, - decode_times=False, - decode_timedelta=False, - keep_dim_role_names=True, - ) + ds = ds_ek.to_xarray(**kwargs) print(ds) - dims = { - "forecast_reference_time": [ - np.datetime64("2024-06-03T00", "ns"), - np.datetime64("2024-06-03T12", "ns"), - ], - "step": [0, 6], - } compare_dims(ds, dims, order_ref_var="t") vt = ds.coords["valid_time"] assert vt.dims == ("forecast_reference_time", "step") - ref = [ - [np.datetime64("2024-06-03T00", "ns"), np.datetime64("2024-06-03T06", "ns")], - [np.datetime64("2024-06-03T12", "ns"), np.datetime64("2024-06-03T18", "ns")], - ] + # ref = [ + # [np.datetime64("2024-06-03T00", "ns"), np.datetime64("2024-06-03T06", "ns")], + # [np.datetime64("2024-06-03T12", "ns"), np.datetime64("2024-06-03T18", "ns")], + # ] - compare_coords(ds, {"valid_time": ref}) + compare_coords(ds, coords) From 3158bbd0296b6db7d55700233ea4b893d559c76d Mon Sep 17 00:00:00 2001 From: Sandor Kertesz Date: Mon, 9 Jun 2025 14:40:28 +0100 Subject: [PATCH 3/9] Handle time range --- docs/examples/grib_to_file_target.ipynb | 6 +- docs/examples/list_of_dicts_to_xarray.ipynb | 1930 ++++++++--------- docs/examples/xarray_engine_chunks.ipynb | 34 +- docs/examples/xarray_engine_field_dims.ipynb | 70 +- docs/examples/xarray_engine_level.ipynb | 394 ++-- docs/examples/xarray_engine_overview.ipynb | 75 +- docs/examples/xarray_engine_seasonal.ipynb | 632 +++++- docs/examples/xarray_engine_split.ipynb | 20 +- docs/examples/xarray_engine_temporal.ipynb | 307 +-- docs/examples/xarray_engine_to_grib.ipynb | 68 +- .../examples/xarray_engine_variable_key.ipynb | 309 +-- docs/release_notes/version_0.15_updates.rst | 27 + src/earthkit/data/indexing/tensor.py | 75 +- src/earthkit/data/utils/xarray/dim.py | 4 +- src/earthkit/data/utils/xarray/profile.py | 3 - tests/xr_engine/test_xr_dims.py | 18 + tests/xr_engine/test_xr_remapping.py | 46 +- tests/xr_engine/test_xr_split.py | 36 +- tests/xr_engine/test_xr_time.py | 37 +- tests/xr_engine/test_xr_write.py | 1 + 20 files changed, 2436 insertions(+), 1656 deletions(-) diff --git a/docs/examples/grib_to_file_target.ipynb b/docs/examples/grib_to_file_target.ipynb index c37ffa05f..97bcf1dad 100644 --- a/docs/examples/grib_to_file_target.ipynb +++ b/docs/examples/grib_to_file_target.ipynb @@ -442,9 +442,9 @@ ], "metadata": { "kernelspec": { - "display_name": "dev_ecc", + "display_name": "dev", "language": "python", - "name": "dev_ecc" + "name": "dev" }, "language_info": { "codemirror_mode": { @@ -456,7 +456,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.13" + "version": "3.11.12" } }, "nbformat": 4, diff --git a/docs/examples/list_of_dicts_to_xarray.ipynb b/docs/examples/list_of_dicts_to_xarray.ipynb index 955d60420..322f3097c 100644 --- a/docs/examples/list_of_dicts_to_xarray.ipynb +++ b/docs/examples/list_of_dicts_to_xarray.ipynb @@ -1,968 +1,968 @@ { - "cells": [ - { - "cell_type": "markdown", - "id": "ee0f0104-8077-45f1-9746-58f29b64db92", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" + "cells": [ + { + "cell_type": "markdown", + "id": "ee0f0104-8077-45f1-9746-58f29b64db92", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## list-of-dict: converting to Xarray" + ] + }, + { + "cell_type": "raw", + "id": "6cadbfbf-c7af-4927-8927-c320d9160c4f", + "metadata": { + "editable": true, + "raw_mimetype": "text/restructuredtext", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "This example demonstrates how :ref:`data-sources-lod` fieldlists can be converted into Xarray." + ] + }, + { + "cell_type": "markdown", + "id": "2e087423-8c96-49b4-984c-f15472fa8381", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "#### Data containing geography" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "1e5ebf7a-2fc6-453a-9e14-6b04b5135810", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 248B\n",
+                            "Dimensions:    (levelist: 2, latitude: 3, longitude: 2)\n",
+                            "Coordinates:\n",
+                            "  * levelist   (levelist) int64 16B 500 850\n",
+                            "  * latitude   (latitude) float64 24B 10.0 0.0 -10.0\n",
+                            "  * longitude  (longitude) float64 16B 20.0 40.0\n",
+                            "Data variables:\n",
+                            "    t          (levelist, latitude, longitude) float64 96B ...\n",
+                            "    u          (levelist, latitude, longitude) float64 96B ...\n",
+                            "Attributes:\n",
+                            "    Conventions:  CF-1.8\n",
+                            "    institution:  ECMWF
" + ], + "text/plain": [ + " Size: 248B\n", + "Dimensions: (levelist: 2, latitude: 3, longitude: 2)\n", + "Coordinates:\n", + " * levelist (levelist) int64 16B 500 850\n", + " * latitude (latitude) float64 24B 10.0 0.0 -10.0\n", + " * longitude (longitude) float64 16B 20.0 40.0\n", + "Data variables:\n", + " t (levelist, latitude, longitude) float64 96B ...\n", + " u (levelist, latitude, longitude) float64 96B ...\n", + "Attributes:\n", + " Conventions: CF-1.8\n", + " institution: ECMWF" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import earthkit.data as ekd\n", + "\n", + "prototype = {\n", + " \"latitudes\": [10.0, 0.0, -10.0],\n", + " \"longitudes\": [20, 40.0],\n", + " \"values\": [1, 2, 3, 4, 5, 6],\n", + " \"valid_datetime\": \"2018-08-01T09:00:00Z\",\n", + " }\n", + "\n", + "d = [\n", + " {\"param\": \"t\", \"level\": 500, **prototype},\n", + " {\"param\": \"t\", \"level\": 850, **prototype},\n", + " {\"param\": \"u\", \"level\": 500, **prototype},\n", + " {\"param\": \"u\", \"level\": 850, **prototype},\n", + " ]\n", + "\n", + "ds = ekd.from_source(\"list-of-dicts\", d)\n", + "ds.to_xarray()" + ] + }, + { + "cell_type": "markdown", + "id": "94b46ec8-614b-480a-8ffe-0b1dd4e344bb", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "#### Data without geography" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "7ea3d8bf-a432-4aef-94d9-5ac0c6b19503", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 208B\n",
+                            "Dimensions:   (levelist: 2, values: 6)\n",
+                            "Coordinates:\n",
+                            "  * levelist  (levelist) int64 16B 500 850\n",
+                            "Dimensions without coordinates: values\n",
+                            "Data variables:\n",
+                            "    t         (levelist, values) float64 96B ...\n",
+                            "    u         (levelist, values) float64 96B ...\n",
+                            "Attributes:\n",
+                            "    Conventions:  CF-1.8\n",
+                            "    institution:  ECMWF
" + ], + "text/plain": [ + " Size: 208B\n", + "Dimensions: (levelist: 2, values: 6)\n", + "Coordinates:\n", + " * levelist (levelist) int64 16B 500 850\n", + "Dimensions without coordinates: values\n", + "Data variables:\n", + " t (levelist, values) float64 96B ...\n", + " u (levelist, values) float64 96B ...\n", + "Attributes:\n", + " Conventions: CF-1.8\n", + " institution: ECMWF" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "prototype = {\n", + " \"values\": [1, 2, 3, 4, 5, 6],\n", + " \"valid_datetime\": \"2018-08-01T09:00:00Z\",\n", + " }\n", + "\n", + "d = [\n", + " {\"param\": \"t\", \"level\": 500, **prototype},\n", + " {\"param\": \"t\", \"level\": 850, **prototype},\n", + " {\"param\": \"u\", \"level\": 500, **prototype},\n", + " {\"param\": \"u\", \"level\": 850, **prototype},\n", + " ]\n", + "\n", + "ds = ekd.from_source(\"list-of-dicts\", d)\n", + "ds.to_xarray()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c6463409-7686-4d90-8cab-00a04b7119bb", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "dev", + "language": "python", + "name": "dev" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.12" + } }, - "tags": [] - }, - "source": [ - "## list-of-dict: converting to Xarray" - ] - }, - { - "cell_type": "raw", - "id": "6cadbfbf-c7af-4927-8927-c320d9160c4f", - "metadata": { - "editable": true, - "raw_mimetype": "text/x-rst", - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "source": [ - "This example demonstrates how :ref:`data-sources-lod` fieldlists can be converted into Xarray." - ] - }, - { - "cell_type": "markdown", - "id": "2e087423-8c96-49b4-984c-f15472fa8381", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "source": [ - "#### Data containing geography" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "1e5ebf7a-2fc6-453a-9e14-6b04b5135810", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.Dataset> Size: 248B\n",
-       "Dimensions:    (levelist: 2, latitude: 3, longitude: 2)\n",
-       "Coordinates:\n",
-       "  * levelist   (levelist) int64 16B 500 850\n",
-       "  * latitude   (latitude) float64 24B 10.0 0.0 -10.0\n",
-       "  * longitude  (longitude) float64 16B 20.0 40.0\n",
-       "Data variables:\n",
-       "    t          (levelist, latitude, longitude) float64 96B ...\n",
-       "    u          (levelist, latitude, longitude) float64 96B ...\n",
-       "Attributes:\n",
-       "    Conventions:  CF-1.8\n",
-       "    institution:  ECMWF
" - ], - "text/plain": [ - " Size: 248B\n", - "Dimensions: (levelist: 2, latitude: 3, longitude: 2)\n", - "Coordinates:\n", - " * levelist (levelist) int64 16B 500 850\n", - " * latitude (latitude) float64 24B 10.0 0.0 -10.0\n", - " * longitude (longitude) float64 16B 20.0 40.0\n", - "Data variables:\n", - " t (levelist, latitude, longitude) float64 96B ...\n", - " u (levelist, latitude, longitude) float64 96B ...\n", - "Attributes:\n", - " Conventions: CF-1.8\n", - " institution: ECMWF" - ] - }, - "execution_count": 1, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import earthkit.data as ekd\n", - "\n", - "prototype = {\n", - " \"latitudes\": [10.0, 0.0, -10.0],\n", - " \"longitudes\": [20, 40.0],\n", - " \"values\": [1, 2, 3, 4, 5, 6],\n", - " \"valid_datetime\": \"2018-08-01T09:00:00Z\",\n", - " }\n", - "\n", - "d = [\n", - " {\"param\": \"t\", \"level\": 500, **prototype},\n", - " {\"param\": \"t\", \"level\": 850, **prototype},\n", - " {\"param\": \"u\", \"level\": 500, **prototype},\n", - " {\"param\": \"u\", \"level\": 850, **prototype},\n", - " ]\n", - "\n", - "ds = ekd.from_source(\"list-of-dicts\", d)\n", - "ds.to_xarray()" - ] - }, - { - "cell_type": "markdown", - "id": "94b46ec8-614b-480a-8ffe-0b1dd4e344bb", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "source": [ - "#### Data without geography" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "7ea3d8bf-a432-4aef-94d9-5ac0c6b19503", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.Dataset> Size: 208B\n",
-       "Dimensions:   (levelist: 2, values: 6)\n",
-       "Coordinates:\n",
-       "  * levelist  (levelist) int64 16B 500 850\n",
-       "Dimensions without coordinates: values\n",
-       "Data variables:\n",
-       "    t         (levelist, values) float64 96B ...\n",
-       "    u         (levelist, values) float64 96B ...\n",
-       "Attributes:\n",
-       "    Conventions:  CF-1.8\n",
-       "    institution:  ECMWF
" - ], - "text/plain": [ - " Size: 208B\n", - "Dimensions: (levelist: 2, values: 6)\n", - "Coordinates:\n", - " * levelist (levelist) int64 16B 500 850\n", - "Dimensions without coordinates: values\n", - "Data variables:\n", - " t (levelist, values) float64 96B ...\n", - " u (levelist, values) float64 96B ...\n", - "Attributes:\n", - " Conventions: CF-1.8\n", - " institution: ECMWF" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "prototype = {\n", - " \"values\": [1, 2, 3, 4, 5, 6],\n", - " \"valid_datetime\": \"2018-08-01T09:00:00Z\",\n", - " }\n", - "\n", - "d = [\n", - " {\"param\": \"t\", \"level\": 500, **prototype},\n", - " {\"param\": \"t\", \"level\": 850, **prototype},\n", - " {\"param\": \"u\", \"level\": 500, **prototype},\n", - " {\"param\": \"u\", \"level\": 850, **prototype},\n", - " ]\n", - "\n", - "ds = ekd.from_source(\"list-of-dicts\", d)\n", - "ds.to_xarray()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c6463409-7686-4d90-8cab-00a04b7119bb", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "dev", - "language": "python", - "name": "dev" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.12" - } - }, - "nbformat": 4, - "nbformat_minor": 5 + "nbformat": 4, + "nbformat_minor": 5 } diff --git a/docs/examples/xarray_engine_chunks.ipynb b/docs/examples/xarray_engine_chunks.ipynb index 39eb8f6f9..37b9993f4 100644 --- a/docs/examples/xarray_engine_chunks.ipynb +++ b/docs/examples/xarray_engine_chunks.ipynb @@ -19,7 +19,7 @@ "id": "b42eccf8-abcc-44a1-8406-f8aa966b1bf5", "metadata": { "editable": true, - "raw_mimetype": "text/x-rst", + "raw_mimetype": "text/restructuredtext", "slideshow": { "slide_type": "" }, @@ -58,7 +58,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "98299fdfafa74aa5b8cbc0f95188b8d5", + "model_id": "5bd1390ded1949169dbeeaf1f017ad75", "version_major": 2, "version_minor": 0 }, @@ -494,7 +494,7 @@ "Attributes:\n", " standard_name: air_temperature\n", " long_name: 2 metre temperature\n", - " units: K" + " dtype='float64', name='longitude'))
  • standard_name :
    air_temperature
    long_name :
    2 metre temperature
    units :
    K
  • " ], "text/plain": [ " Size: 2MB\n", @@ -667,7 +667,7 @@ "tags": [] }, "source": [ - "We compute the mean along the temporal dimension. Xarray will load data in chunks for this computation keeping the memory usage low." + "Finally, we compute the mean along the temporal dimension. Xarray will load data in chunks for this computation keeping the memory usage low." ] }, { @@ -1099,7 +1099,7 @@ " 227.70048102, 227.70048102, 227.70048102, 227.70048102]])\n", "Coordinates:\n", " * latitude (latitude) float64 104B 90.0 75.0 60.0 45.0 ... -60.0 -75.0 -90.0\n", - " * longitude (longitude) float64 192B 0.0 15.0 30.0 45.0 ... 315.0 330.0 345.0
  • " ], "text/plain": [ " Size: 2kB\n", diff --git a/docs/examples/xarray_engine_field_dims.ipynb b/docs/examples/xarray_engine_field_dims.ipynb index 082335403..36bb29093 100644 --- a/docs/examples/xarray_engine_field_dims.ipynb +++ b/docs/examples/xarray_engine_field_dims.ipynb @@ -75,12 +75,26 @@ "outputs": [ { "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "9d15f57ec3eb42f19a24d4fd6774fb79", + "version_major": 2, + "version_minor": 0 + }, "text/plain": [ - "(Frozen({'forecast_reference_time': 4, 'step': 2, 'levelist': 2, 'latitude': 19, 'longitude': 36}),\n", + "pl.grib: 0%| | 0.00/48.8k [00:00` or :ref:`\"grib\" ` we have the following mappings:\n", + "We can convert a GRIB fieldlist to Xarray with :py:meth:`~data.readers.grib.index.GribFieldList.to_xarray`. This notebook discusses the **level** options used by this method.\n", "\n", - "- level role: \"level\"\n", - "- level_type role: \"typeOfLevel\"\n", + "The level dimension is based on the ``dim_roles`` and ``level_dim_mode`` options. The ``dim_roles`` are a mapping between predefined dimension roles and metadata keys used to build the given dimensions. With regards to the levels the \"level\" and \"level_type\" roles are the ones we need to consider. When ``profile`` is :ref:`None ` or :ref:`\"grib\" ` we have the following mappings:\n", + " \n", + " - level role: \"level\"\n", + " - level_type role: \"typeOfLevel\"\n", "\n", "When ``profile`` is :ref:`\"mars\" ` the roles are defined as follows:\n", - "\n", - "- level role: \"levelist\"\n", - "- level_type role: \"levtype\"\n" + " \n", + " - level role: \"levelist\"\n", + " - level_type role: \"levtype\"" ] }, { @@ -56,14 +58,16 @@ "id": "a477dd3d-e98f-43f2-99f6-628c2bd8cd6f", "metadata": { "editable": true, - "raw_mimetype": "text/restructuredtext", + "raw_mimetype": "", "slideshow": { "slide_type": "" }, "tags": [] }, "source": [ - "When ``level_dim_mode=\"level\"`` the level role defines the level dimension. Since the default :ref:`profile ` is :ref:`\"mars\" ` in the example below the level dimension will be derived from the \"levelist\" key." + "When ``level_dim_mode=\"level\"`` the level role defines the level dimension. Since the default :ref:`profile ` is :ref:`\"mars\" ` in the example below the level dimension will be derived from the \"levelist\" key. \n", + "\n", + "By default, the dimensions related to dimension roles are named after the roles. So, although the level dimension was generated from the \"levelist\" GRIB key the dimension name is still \"level\". " ] }, { @@ -78,6 +82,20 @@ "tags": [] }, "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "59d6167a6800496ab715b55fd46c41e0", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "pl.grib: 0%| | 0.00/48.8k [00:00 span {\n", @@ -286,15 +311,15 @@ "}\n", "\n", ".xr-dim-list:before {\n", - " content: '(';\n", + " content: \"(\";\n", "}\n", "\n", ".xr-dim-list:after {\n", - " content: ')';\n", + " content: \")\";\n", "}\n", "\n", ".xr-dim-list li:not(:last-child):after {\n", - " content: ',';\n", + " content: \",\";\n", " padding-right: 5px;\n", "}\n", "\n", @@ -444,18 +469,18 @@ " stroke: currentColor;\n", " fill: currentColor;\n", "}\n", - "
    <xarray.Dataset>\n",
    -       "Dimensions:                  (forecast_reference_time: 4, step: 2, levelist: 2,\n",
    +       "
    <xarray.Dataset> Size: 176kB\n",
    +       "Dimensions:                  (forecast_reference_time: 4, step: 2, level: 2,\n",
            "                              latitude: 19, longitude: 36)\n",
            "Coordinates:\n",
    -       "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 2024-06...\n",
    -       "  * step                     (step) timedelta64[ns] 00:00:00 06:00:00\n",
    -       "  * levelist                 (levelist) int64 500 700\n",
    -       "  * latitude                 (latitude) float64 90.0 80.0 70.0 ... -80.0 -90.0\n",
    -       "  * longitude                (longitude) float64 0.0 10.0 20.0 ... 340.0 350.0\n",
    +       "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 32B 202...\n",
    +       "  * step                     (step) timedelta64[ns] 16B 00:00:00 06:00:00\n",
    +       "  * level                    (level) int64 16B 500 700\n",
    +       "  * latitude                 (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n",
    +       "  * longitude                (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n",
            "Data variables:\n",
    -       "    r                        (forecast_reference_time, step, levelist, latitude, longitude) float64 ...\n",
    -       "    t                        (forecast_reference_time, step, levelist, latitude, longitude) float64 ...\n",
    +       "    r                        (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n",
    +       "    t                        (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n",
            "Attributes:\n",
            "    class:        od\n",
            "    stream:       oper\n",
    @@ -467,34 +492,34 @@
            "    domain:       g\n",
            "    number:       0\n",
            "    Conventions:  CF-1.8\n",
    -       "    institution:  ECMWF
  • class :
    od
    stream :
    oper
    levtype :
    pl
    type :
    fc
    expver :
    0001
    date :
    20240603
    time :
    0
    domain :
    g
    number :
    0
    Conventions :
    CF-1.8
    institution :
    ECMWF
  • " ], "text/plain": [ - "\n", - "Dimensions: (forecast_reference_time: 4, step: 2, levelist: 2,\n", + " Size: 176kB\n", + "Dimensions: (forecast_reference_time: 4, step: 2, level: 2,\n", " latitude: 19, longitude: 36)\n", "Coordinates:\n", - " * forecast_reference_time (forecast_reference_time) datetime64[ns] 2024-06...\n", - " * step (step) timedelta64[ns] 00:00:00 06:00:00\n", - " * levelist (levelist) int64 500 700\n", - " * latitude (latitude) float64 90.0 80.0 70.0 ... -80.0 -90.0\n", - " * longitude (longitude) float64 0.0 10.0 20.0 ... 340.0 350.0\n", + " * forecast_reference_time (forecast_reference_time) datetime64[ns] 32B 202...\n", + " * step (step) timedelta64[ns] 16B 00:00:00 06:00:00\n", + " * level (level) int64 16B 500 700\n", + " * latitude (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n", + " * longitude (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n", "Data variables:\n", - " r (forecast_reference_time, step, levelist, latitude, longitude) float64 ...\n", - " t (forecast_reference_time, step, levelist, latitude, longitude) float64 ...\n", + " r (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n", + " t (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n", "Attributes:\n", " class: od\n", " stream: oper\n", @@ -581,13 +606,14 @@ " --xr-background-color-row-odd: var(--jp-layout-color2, #eeeeee);\n", "}\n", "\n", - "html[theme=dark],\n", - "body[data-theme=dark],\n", + "html[theme=\"dark\"],\n", + "html[data-theme=\"dark\"],\n", + "body[data-theme=\"dark\"],\n", "body.vscode-dark {\n", " --xr-font-color0: rgba(255, 255, 255, 1);\n", " --xr-font-color2: rgba(255, 255, 255, 0.54);\n", " --xr-font-color3: rgba(255, 255, 255, 0.38);\n", - " --xr-border-color: #1F1F1F;\n", + " --xr-border-color: #1f1f1f;\n", " --xr-disabled-color: #515151;\n", " --xr-background-color: #111111;\n", " --xr-background-color-row-even: #111111;\n", @@ -632,7 +658,7 @@ ".xr-sections {\n", " padding-left: 0 !important;\n", " display: grid;\n", - " grid-template-columns: 150px auto auto 1fr 20px 20px;\n", + " grid-template-columns: 150px auto auto 1fr 0 20px 0 20px;\n", "}\n", "\n", ".xr-section-item {\n", @@ -640,7 +666,9 @@ "}\n", "\n", ".xr-section-item input {\n", - " display: none;\n", + " display: inline-block;\n", + " opacity: 0;\n", + " height: 0;\n", "}\n", "\n", ".xr-section-item input + label {\n", @@ -652,6 +680,10 @@ " color: var(--xr-font-color2);\n", "}\n", "\n", + ".xr-section-item input:focus + label {\n", + " border: 2px solid var(--xr-font-color0);\n", + "}\n", + "\n", ".xr-section-item input:enabled + label:hover {\n", " color: var(--xr-font-color0);\n", "}\n", @@ -673,7 +705,7 @@ "\n", ".xr-section-summary-in + label:before {\n", " display: inline-block;\n", - " content: 'â–º';\n", + " content: \"â–º\";\n", " font-size: 11px;\n", " width: 15px;\n", " text-align: center;\n", @@ -684,7 +716,7 @@ "}\n", "\n", ".xr-section-summary-in:checked + label:before {\n", - " content: 'â–¼';\n", + " content: \"â–¼\";\n", "}\n", "\n", ".xr-section-summary-in:checked + label > span {\n", @@ -756,15 +788,15 @@ "}\n", "\n", ".xr-dim-list:before {\n", - " content: '(';\n", + " content: \"(\";\n", "}\n", "\n", ".xr-dim-list:after {\n", - " content: ')';\n", + " content: \")\";\n", "}\n", "\n", ".xr-dim-list li:not(:last-child):after {\n", - " content: ',';\n", + " content: \",\";\n", " padding-right: 5px;\n", "}\n", "\n", @@ -914,48 +946,48 @@ " stroke: currentColor;\n", " fill: currentColor;\n", "}\n", - "
    <xarray.Dataset>\n",
    +       "
    <xarray.Dataset> Size: 176kB\n",
            "Dimensions:                  (forecast_reference_time: 4, step: 2, level: 2,\n",
            "                              latitude: 19, longitude: 36)\n",
            "Coordinates:\n",
    -       "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 2024-06...\n",
    -       "  * step                     (step) timedelta64[ns] 00:00:00 06:00:00\n",
    -       "  * level                    (level) int64 500 700\n",
    -       "  * latitude                 (latitude) float64 90.0 80.0 70.0 ... -80.0 -90.0\n",
    -       "  * longitude                (longitude) float64 0.0 10.0 20.0 ... 340.0 350.0\n",
    +       "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 32B 202...\n",
    +       "  * step                     (step) timedelta64[ns] 16B 00:00:00 06:00:00\n",
    +       "  * level                    (level) int64 16B 500 700\n",
    +       "  * latitude                 (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n",
    +       "  * longitude                (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n",
            "Data variables:\n",
    -       "    r                        (forecast_reference_time, step, level, latitude, longitude) float64 ...\n",
    -       "    t                        (forecast_reference_time, step, level, latitude, longitude) float64 ...\n",
    +       "    r                        (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n",
    +       "    t                        (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n",
            "Attributes:\n",
            "    Conventions:  CF-1.8\n",
    -       "    institution:  ECMWF
  • Conventions :
    CF-1.8
    institution :
    ECMWF
  • " ], "text/plain": [ - "\n", + " Size: 176kB\n", "Dimensions: (forecast_reference_time: 4, step: 2, level: 2,\n", " latitude: 19, longitude: 36)\n", "Coordinates:\n", - " * forecast_reference_time (forecast_reference_time) datetime64[ns] 2024-06...\n", - " * step (step) timedelta64[ns] 00:00:00 06:00:00\n", - " * level (level) int64 500 700\n", - " * latitude (latitude) float64 90.0 80.0 70.0 ... -80.0 -90.0\n", - " * longitude (longitude) float64 0.0 10.0 20.0 ... 340.0 350.0\n", + " * forecast_reference_time (forecast_reference_time) datetime64[ns] 32B 202...\n", + " * step (step) timedelta64[ns] 16B 00:00:00 06:00:00\n", + " * level (level) int64 16B 500 700\n", + " * latitude (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n", + " * longitude (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n", "Data variables:\n", - " r (forecast_reference_time, step, level, latitude, longitude) float64 ...\n", - " t (forecast_reference_time, step, level, latitude, longitude) float64 ...\n", + " r (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n", + " t (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n", "Attributes:\n", " Conventions: CF-1.8\n", " institution: ECMWF" @@ -1005,6 +1037,20 @@ "tags": [] }, "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "56b17d5515a14844859d8269f559d026", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "mixed_pl_ml.grib: 0%| | 0.00/176k [00:00 span {\n", @@ -1213,15 +1266,15 @@ "}\n", "\n", ".xr-dim-list:before {\n", - " content: '(';\n", + " content: \"(\";\n", "}\n", "\n", ".xr-dim-list:after {\n", - " content: ')';\n", + " content: \")\";\n", "}\n", "\n", ".xr-dim-list li:not(:last-child):after {\n", - " content: ',';\n", + " content: \",\";\n", " padding-right: 5px;\n", "}\n", "\n", @@ -1371,18 +1424,18 @@ " stroke: currentColor;\n", " fill: currentColor;\n", "}\n", - "
    <xarray.Dataset>\n",
    +       "
    <xarray.Dataset> Size: 351kB\n",
            "Dimensions:                  (forecast_reference_time: 4, step: 2,\n",
            "                              level_and_type: 4, latitude: 19, longitude: 36)\n",
            "Coordinates:\n",
    -       "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 2024-06...\n",
    -       "  * step                     (step) timedelta64[ns] 00:00:00 06:00:00\n",
    -       "  * level_and_type           (level_and_type) <U5 '137ml' '500pl' '700pl' '90ml'\n",
    -       "  * latitude                 (latitude) float64 90.0 80.0 70.0 ... -80.0 -90.0\n",
    -       "  * longitude                (longitude) float64 0.0 10.0 20.0 ... 340.0 350.0\n",
    +       "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 32B 202...\n",
    +       "  * step                     (step) timedelta64[ns] 16B 00:00:00 06:00:00\n",
    +       "  * level_and_type           (level_and_type) <U5 80B '137ml' '500pl' ... '90ml'\n",
    +       "  * latitude                 (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n",
    +       "  * longitude                (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n",
            "Data variables:\n",
    -       "    t                        (forecast_reference_time, step, level_and_type, latitude, longitude) float64 ...\n",
    -       "    u                        (forecast_reference_time, step, level_and_type, latitude, longitude) float64 ...\n",
    +       "    t                        (forecast_reference_time, step, level_and_type, latitude, longitude) float64 175kB ...\n",
    +       "    u                        (forecast_reference_time, step, level_and_type, latitude, longitude) float64 175kB ...\n",
            "Attributes:\n",
            "    class:        od\n",
            "    stream:       oper\n",
    @@ -1394,34 +1447,34 @@
            "    domain:       g\n",
            "    levelist:     137\n",
            "    Conventions:  CF-1.8\n",
    -       "    institution:  ECMWF
  • class :
    od
    stream :
    oper
    levtype :
    ml
    type :
    fc
    expver :
    0001
    date :
    20240603
    time :
    0
    domain :
    g
    levelist :
    137
    Conventions :
    CF-1.8
    institution :
    ECMWF
  • " ], "text/plain": [ - "\n", + " Size: 351kB\n", "Dimensions: (forecast_reference_time: 4, step: 2,\n", " level_and_type: 4, latitude: 19, longitude: 36)\n", "Coordinates:\n", - " * forecast_reference_time (forecast_reference_time) datetime64[ns] 2024-06...\n", - " * step (step) timedelta64[ns] 00:00:00 06:00:00\n", - " * level_and_type (level_and_type) span {\n", @@ -1697,15 +1771,15 @@ "}\n", "\n", ".xr-dim-list:before {\n", - " content: '(';\n", + " content: \"(\";\n", "}\n", "\n", ".xr-dim-list:after {\n", - " content: ')';\n", + " content: \")\";\n", "}\n", "\n", ".xr-dim-list li:not(:last-child):after {\n", - " content: ',';\n", + " content: \",\";\n", " padding-right: 5px;\n", "}\n", "\n", @@ -1855,64 +1929,64 @@ " stroke: currentColor;\n", " fill: currentColor;\n", "}\n", - "
    <xarray.Dataset>\n",
    +       "
    <xarray.Dataset> Size: 1MB\n",
            "Dimensions:                  (number: 1, forecast_reference_time: 4, step: 2,\n",
            "                              surface: 1, latitude: 19, longitude: 36,\n",
            "                              isobaricInhPa: 6)\n",
            "Coordinates:\n",
    -       "  * number                   (number) int64 0\n",
    -       "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 2024-06...\n",
    -       "  * step                     (step) timedelta64[ns] 00:00:00 06:00:00\n",
    -       "  * surface                  (surface) int64 0\n",
    -       "  * isobaricInhPa            (isobaricInhPa) int64 300 400 500 700 850 1000\n",
    -       "  * latitude                 (latitude) float64 90.0 80.0 70.0 ... -80.0 -90.0\n",
    -       "  * longitude                (longitude) float64 0.0 10.0 20.0 ... 340.0 350.0\n",
    +       "  * number                   (number) int64 8B 0\n",
    +       "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 32B 202...\n",
    +       "  * step                     (step) timedelta64[ns] 16B 00:00:00 06:00:00\n",
    +       "  * surface                  (surface) int64 8B 0\n",
    +       "  * isobaricInhPa            (isobaricInhPa) int64 48B 300 400 500 700 850 1000\n",
    +       "  * latitude                 (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n",
    +       "  * longitude                (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n",
            "Data variables:\n",
    -       "    2t                       (number, forecast_reference_time, step, surface, latitude, longitude) float64 ...\n",
    -       "    msl                      (number, forecast_reference_time, step, surface, latitude, longitude) float64 ...\n",
    -       "    r                        (number, forecast_reference_time, step, isobaricInhPa, latitude, longitude) float64 ...\n",
    -       "    t                        (number, forecast_reference_time, step, isobaricInhPa, latitude, longitude) float64 ...\n",
    -       "    u                        (number, forecast_reference_time, step, isobaricInhPa, latitude, longitude) float64 ...\n",
    -       "    v                        (number, forecast_reference_time, step, isobaricInhPa, latitude, longitude) float64 ...\n",
    -       "    z                        (number, forecast_reference_time, step, isobaricInhPa, latitude, longitude) float64 ...\n",
    +       "    2t                       (number, forecast_reference_time, step, surface, latitude, longitude) float64 44kB ...\n",
    +       "    msl                      (number, forecast_reference_time, step, surface, latitude, longitude) float64 44kB ...\n",
    +       "    r                        (number, forecast_reference_time, step, isobaricInhPa, latitude, longitude) float64 263kB ...\n",
    +       "    t                        (number, forecast_reference_time, step, isobaricInhPa, latitude, longitude) float64 263kB ...\n",
    +       "    u                        (number, forecast_reference_time, step, isobaricInhPa, latitude, longitude) float64 263kB ...\n",
    +       "    v                        (number, forecast_reference_time, step, isobaricInhPa, latitude, longitude) float64 263kB ...\n",
    +       "    z                        (number, forecast_reference_time, step, isobaricInhPa, latitude, longitude) float64 263kB ...\n",
            "Attributes:\n",
            "    Conventions:  CF-1.8\n",
    -       "    institution:  ECMWF
  • Conventions :
    CF-1.8
    institution :
    ECMWF
  • " ], "text/plain": [ - "\n", + " Size: 1MB\n", "Dimensions: (number: 1, forecast_reference_time: 4, step: 2,\n", " surface: 1, latitude: 19, longitude: 36,\n", " isobaricInhPa: 6)\n", "Coordinates:\n", - " * number (number) int64 0\n", - " * forecast_reference_time (forecast_reference_time) datetime64[ns] 2024-06...\n", - " * step (step) timedelta64[ns] 00:00:00 06:00:00\n", - " * surface (surface) int64 0\n", - " * isobaricInhPa (isobaricInhPa) int64 300 400 500 700 850 1000\n", - " * latitude (latitude) float64 90.0 80.0 70.0 ... -80.0 -90.0\n", - " * longitude (longitude) float64 0.0 10.0 20.0 ... 340.0 350.0\n", + " * number (number) int64 8B 0\n", + " * forecast_reference_time (forecast_reference_time) datetime64[ns] 32B 202...\n", + " * step (step) timedelta64[ns] 16B 00:00:00 06:00:00\n", + " * surface (surface) int64 8B 0\n", + " * isobaricInhPa (isobaricInhPa) int64 48B 300 400 500 700 850 1000\n", + " * latitude (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n", + " * longitude (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n", "Data variables:\n", - " 2t (number, forecast_reference_time, step, surface, latitude, longitude) float64 ...\n", - " msl (number, forecast_reference_time, step, surface, latitude, longitude) float64 ...\n", - " r (number, forecast_reference_time, step, isobaricInhPa, latitude, longitude) float64 ...\n", - " t (number, forecast_reference_time, step, isobaricInhPa, latitude, longitude) float64 ...\n", - " u (number, forecast_reference_time, step, isobaricInhPa, latitude, longitude) float64 ...\n", - " v (number, forecast_reference_time, step, isobaricInhPa, latitude, longitude) float64 ...\n", - " z (number, forecast_reference_time, step, isobaricInhPa, latitude, longitude) float64 ...\n", + " 2t (number, forecast_reference_time, step, surface, latitude, longitude) float64 44kB ...\n", + " msl (number, forecast_reference_time, step, surface, latitude, longitude) float64 44kB ...\n", + " r (number, forecast_reference_time, step, isobaricInhPa, latitude, longitude) float64 263kB ...\n", + " t (number, forecast_reference_time, step, isobaricInhPa, latitude, longitude) float64 263kB ...\n", + " u (number, forecast_reference_time, step, isobaricInhPa, latitude, longitude) float64 263kB ...\n", + " v (number, forecast_reference_time, step, isobaricInhPa, latitude, longitude) float64 263kB ...\n", + " z (number, forecast_reference_time, step, isobaricInhPa, latitude, longitude) float64 263kB ...\n", "Attributes:\n", " Conventions: CF-1.8\n", " institution: ECMWF" @@ -1946,9 +2020,9 @@ ], "metadata": { "kernelspec": { - "display_name": "dev_ecc", + "display_name": "dev", "language": "python", - "name": "dev_ecc" + "name": "dev" }, "language_info": { "codemirror_mode": { @@ -1960,7 +2034,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.13" + "version": "3.11.12" } }, "nbformat": 4, diff --git a/docs/examples/xarray_engine_overview.ipynb b/docs/examples/xarray_engine_overview.ipynb index 0d1fe3615..0d256cdb5 100644 --- a/docs/examples/xarray_engine_overview.ipynb +++ b/docs/examples/xarray_engine_overview.ipynb @@ -57,7 +57,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "", + "model_id": "8b2a598b3f264e2aa75a0cddab1650d2", "version_major": 2, "version_minor": 0 }, @@ -115,13 +115,6 @@ "tags": [] }, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "From version 0.11.0 the default engine for to_xarray is 'earthkit'. Use engine=`cfgrib` to invoke the cfgrib engine.\n" - ] - }, { "data": { "text/html": [ @@ -155,14 +148,14 @@ " --xr-background-color-row-odd: var(--jp-layout-color2, #eeeeee);\n", "}\n", "\n", - "html[theme=dark],\n", - "html[data-theme=dark],\n", - "body[data-theme=dark],\n", + "html[theme=\"dark\"],\n", + "html[data-theme=\"dark\"],\n", + "body[data-theme=\"dark\"],\n", "body.vscode-dark {\n", " --xr-font-color0: rgba(255, 255, 255, 1);\n", " --xr-font-color2: rgba(255, 255, 255, 0.54);\n", " --xr-font-color3: rgba(255, 255, 255, 0.38);\n", - " --xr-border-color: #1F1F1F;\n", + " --xr-border-color: #1f1f1f;\n", " --xr-disabled-color: #515151;\n", " --xr-background-color: #111111;\n", " --xr-background-color-row-even: #111111;\n", @@ -217,6 +210,7 @@ ".xr-section-item input {\n", " display: inline-block;\n", " opacity: 0;\n", + " height: 0;\n", "}\n", "\n", ".xr-section-item input + label {\n", @@ -253,7 +247,7 @@ "\n", ".xr-section-summary-in + label:before {\n", " display: inline-block;\n", - " content: 'â–º';\n", + " content: \"â–º\";\n", " font-size: 11px;\n", " width: 15px;\n", " text-align: center;\n", @@ -264,7 +258,7 @@ "}\n", "\n", ".xr-section-summary-in:checked + label:before {\n", - " content: 'â–¼';\n", + " content: \"â–¼\";\n", "}\n", "\n", ".xr-section-summary-in:checked + label > span {\n", @@ -336,15 +330,15 @@ "}\n", "\n", ".xr-dim-list:before {\n", - " content: '(';\n", + " content: \"(\";\n", "}\n", "\n", ".xr-dim-list:after {\n", - " content: ')';\n", + " content: \")\";\n", "}\n", "\n", ".xr-dim-list li:not(:last-child):after {\n", - " content: ',';\n", + " content: \",\";\n", " padding-right: 5px;\n", "}\n", "\n", @@ -495,17 +489,17 @@ " fill: currentColor;\n", "}\n", "
    <xarray.Dataset> Size: 176kB\n",
    -       "Dimensions:                  (forecast_reference_time: 4, step: 2, levelist: 2,\n",
    +       "Dimensions:                  (forecast_reference_time: 4, step: 2, level: 2,\n",
            "                              latitude: 19, longitude: 36)\n",
            "Coordinates:\n",
            "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 32B 202...\n",
            "  * step                     (step) timedelta64[ns] 16B 00:00:00 06:00:00\n",
    -       "  * levelist                 (levelist) int64 16B 500 700\n",
    +       "  * level                    (level) int64 16B 500 700\n",
            "  * latitude                 (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n",
            "  * longitude                (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n",
            "Data variables:\n",
    -       "    r                        (forecast_reference_time, step, levelist, latitude, longitude) float64 88kB ...\n",
    -       "    t                        (forecast_reference_time, step, levelist, latitude, longitude) float64 88kB ...\n",
    +       "    r                        (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n",
    +       "    t                        (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n",
            "Attributes:\n",
            "    class:        od\n",
            "    stream:       oper\n",
    @@ -517,34 +511,34 @@
            "    domain:       g\n",
            "    number:       0\n",
            "    Conventions:  CF-1.8\n",
    -       "    institution:  ECMWF
  • class :
    od
    stream :
    oper
    levtype :
    pl
    type :
    fc
    expver :
    0001
    date :
    20240603
    time :
    0
    domain :
    g
    number :
    0
    Conventions :
    CF-1.8
    institution :
    ECMWF
  • " ], "text/plain": [ " Size: 176kB\n", - "Dimensions: (forecast_reference_time: 4, step: 2, levelist: 2,\n", + "Dimensions: (forecast_reference_time: 4, step: 2, level: 2,\n", " latitude: 19, longitude: 36)\n", "Coordinates:\n", " * forecast_reference_time (forecast_reference_time) datetime64[ns] 32B 202...\n", " * step (step) timedelta64[ns] 16B 00:00:00 06:00:00\n", - " * levelist (levelist) int64 16B 500 700\n", + " * level (level) int64 16B 500 700\n", " * latitude (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n", " * longitude (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n", "Data variables:\n", - " r (forecast_reference_time, step, levelist, latitude, longitude) float64 88kB ...\n", - " t (forecast_reference_time, step, levelist, latitude, longitude) float64 88kB ...\n", + " r (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n", + " t (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n", "Attributes:\n", " class: od\n", " stream: oper\n", @@ -820,7 +814,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 8, "id": "5ad32a3e-2f48-49f5-b207-15e89c397fba", "metadata": { "editable": true, @@ -833,17 +827,18 @@ { "data": { "text/plain": [ - "255.25649845948692" + "(254.25649845948692, 255.25649845948692)" ] }, - "execution_count": 5, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "ds_fl.sel(param=\"t\", step=6, level=500)[0].values.mean(), \n", - "ds_fl1.sel(param=\"t\", step=6, level=500)[0].values.mean()" + "m_0 = ds_fl.sel(param=\"t\", step=6, level=500)[0].values.mean() \n", + "m_1 = ds_fl1.sel(param=\"t\", step=6, level=500)[0].values.mean()\n", + "m_0, m_1" ] }, { @@ -1189,9 +1184,9 @@ ], "metadata": { "kernelspec": { - "display_name": "dev_ecc", + "display_name": "dev", "language": "python", - "name": "dev_ecc" + "name": "dev" }, "language_info": { "codemirror_mode": { @@ -1203,7 +1198,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.13" + "version": "3.11.12" } }, "nbformat": 4, diff --git a/docs/examples/xarray_engine_seasonal.ipynb b/docs/examples/xarray_engine_seasonal.ipynb index 2c8c55528..0727d75d5 100644 --- a/docs/examples/xarray_engine_seasonal.ipynb +++ b/docs/examples/xarray_engine_seasonal.ipynb @@ -25,7 +25,22 @@ }, "tags": [] }, - "outputs": [], + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "653a95e071ca4633aadbe42f597676a9", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "seasonal_monthly.grib: 0%| | 0.00/160k [00:00\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
    <xarray.Dataset> Size: 395kB\n",
    +       "Dimensions:                  (number: 3, forecast_reference_time: 4, step: 6,\n",
    +       "                              latitude: 19, longitude: 36)\n",
    +       "Coordinates:\n",
    +       "  * number                   (number) int64 24B 0 1 2\n",
    +       "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 32B 199...\n",
    +       "  * step                     (step) int64 48B 1 2 3 4 5 6\n",
    +       "  * latitude                 (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n",
    +       "  * longitude                (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n",
    +       "Data variables:\n",
    +       "    2t                       (number, forecast_reference_time, step, latitude, longitude) float64 394kB ...\n",
    +       "Attributes: (12/15)\n",
    +       "    param:        2t\n",
    +       "    paramId:      167\n",
    +       "    class:        c3\n",
    +       "    stream:       msmm\n",
    +       "    levtype:      sfc\n",
    +       "    type:         fcmean\n",
    +       "    ...           ...\n",
    +       "    fcmonth:      1\n",
    +       "    origin:       lfpw\n",
    +       "    domain:       g\n",
    +       "    method:       1\n",
    +       "    Conventions:  CF-1.8\n",
    +       "    institution:  ECMWF
    " + ], + "text/plain": [ + " Size: 395kB\n", + "Dimensions: (number: 3, forecast_reference_time: 4, step: 6,\n", + " latitude: 19, longitude: 36)\n", + "Coordinates:\n", + " * number (number) int64 24B 0 1 2\n", + " * forecast_reference_time (forecast_reference_time) datetime64[ns] 32B 199...\n", + " * step (step) int64 48B 1 2 3 4 5 6\n", + " * latitude (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n", + " * longitude (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n", + "Data variables:\n", + " 2t (number, forecast_reference_time, step, latitude, longitude) float64 394kB ...\n", + "Attributes: (12/15)\n", + " param: 2t\n", + " paramId: 167\n", + " class: c3\n", + " stream: msmm\n", + " levtype: sfc\n", + " type: fcmean\n", + " ... ...\n", + " fcmonth: 1\n", + " origin: lfpw\n", + " domain: g\n", + " method: 1\n", + " Conventions: CF-1.8\n", + " institution: ECMWF" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds = ds_fl.to_xarray(time_dim_mode=\"forecast\", \n", + " dim_roles={\"step\": \"forecastMonth\"})\n", + "ds" + ] + }, + { + "cell_type": "markdown", + "id": "e917dbc1-ba05-4180-b1d8-62e04bf98d50", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "When we check the \"step\" dimension we can see its units are \"months\"." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "850836de-db60-48ac-b42b-253ab335ceef", + "metadata": {}, + "outputs": [ + { + "name": "stdout", "output_type": "stream", "text": [ - "From version 0.11.0 the default engine for to_xarray is 'earthkit'. Use engine=`cfgrib` to invoke the cfgrib engine.\n" + " Size: 48B\n", + "array([1, 2, 3, 4, 5, 6])\n", + "Coordinates:\n", + " * step (step) int64 48B 1 2 3 4 5 6\n", + "Attributes:\n", + " units: months\n" ] + } + ], + "source": [ + "print(ds[\"step\"])" + ] + }, + { + "cell_type": "raw", + "id": "13b0fdb1-ed1f-4a0a-b77f-71115adf40ad", + "metadata": { + "editable": true, + "raw_mimetype": "text/restructuredtext", + "slideshow": { + "slide_type": "" }, + "tags": [] + }, + "source": [ + "By default, the dimensions related to dimension roles are named after the roles. So, although the step dimension was generated from the \"forecastMonth\" GRIB key the dimension name is still \"step\". To override this use the ``keep_dim_role_name=False`` option in :py:meth:`~data.readers.grib.index.GribFieldList.to_xarray`." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "cbf6d822-546e-42ab-a8c7-ad8d7d0c61fc", + "metadata": {}, + "outputs": [ { "data": { "text/html": [ @@ -247,14 +762,14 @@ " --xr-background-color-row-odd: var(--jp-layout-color2, #eeeeee);\n", "}\n", "\n", - "html[theme=dark],\n", - "html[data-theme=dark],\n", - "body[data-theme=dark],\n", + "html[theme=\"dark\"],\n", + "html[data-theme=\"dark\"],\n", + "body[data-theme=\"dark\"],\n", "body.vscode-dark {\n", " --xr-font-color0: rgba(255, 255, 255, 1);\n", " --xr-font-color2: rgba(255, 255, 255, 0.54);\n", " --xr-font-color3: rgba(255, 255, 255, 0.38);\n", - " --xr-border-color: #1F1F1F;\n", + " --xr-border-color: #1f1f1f;\n", " --xr-disabled-color: #515151;\n", " --xr-background-color: #111111;\n", " --xr-background-color-row-even: #111111;\n", @@ -309,6 +824,7 @@ ".xr-section-item input {\n", " display: inline-block;\n", " opacity: 0;\n", + " height: 0;\n", "}\n", "\n", ".xr-section-item input + label {\n", @@ -345,7 +861,7 @@ "\n", ".xr-section-summary-in + label:before {\n", " display: inline-block;\n", - " content: 'â–º';\n", + " content: \"â–º\";\n", " font-size: 11px;\n", " width: 15px;\n", " text-align: center;\n", @@ -356,7 +872,7 @@ "}\n", "\n", ".xr-section-summary-in:checked + label:before {\n", - " content: 'â–¼';\n", + " content: \"â–¼\";\n", "}\n", "\n", ".xr-section-summary-in:checked + label > span {\n", @@ -428,15 +944,15 @@ "}\n", "\n", ".xr-dim-list:before {\n", - " content: '(';\n", + " content: \"(\";\n", "}\n", "\n", ".xr-dim-list:after {\n", - " content: ')';\n", + " content: \")\";\n", "}\n", "\n", ".xr-dim-list li:not(:last-child):after {\n", - " content: ',';\n", + " content: \",\";\n", " padding-right: 5px;\n", "}\n", "\n", @@ -597,31 +1113,31 @@ " * longitude (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n", "Data variables:\n", " 2t (number, forecast_reference_time, forecastMonth, latitude, longitude) float64 394kB ...\n", - "Attributes: (12/17)\n", - " param: 2t\n", - " standard_name: unknown\n", - " long_name: 2 metre temperature\n", - " paramId: 167\n", - " class: c3\n", - " stream: msmm\n", - " ... ...\n", - " fcmonth: 1\n", - " origin: lfpw\n", - " domain: g\n", - " method: 1\n", - " Conventions: CF-1.8\n", - " institution: ECMWF
  • param :
    2t
    paramId :
    167
    class :
    c3
    stream :
    msmm
    levtype :
    sfc
    type :
    fcmean
    expver :
    0001
    date :
    19931001
    time :
    0
    fcmonth :
    1
    origin :
    lfpw
    domain :
    g
    method :
    1
    Conventions :
    CF-1.8
    institution :
    ECMWF
  • " ], "text/plain": [ " Size: 395kB\n", @@ -635,52 +1151,40 @@ " * longitude (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n", "Data variables:\n", " 2t (number, forecast_reference_time, forecastMonth, latitude, longitude) float64 394kB ...\n", - "Attributes: (12/17)\n", - " param: 2t\n", - " standard_name: unknown\n", - " long_name: 2 metre temperature\n", - " paramId: 167\n", - " class: c3\n", - " stream: msmm\n", - " ... ...\n", - " fcmonth: 1\n", - " origin: lfpw\n", - " domain: g\n", - " method: 1\n", - " Conventions: CF-1.8\n", - " institution: ECMWF" + "Attributes: (12/15)\n", + " param: 2t\n", + " paramId: 167\n", + " class: c3\n", + " stream: msmm\n", + " levtype: sfc\n", + " type: fcmean\n", + " ... ...\n", + " fcmonth: 1\n", + " origin: lfpw\n", + " domain: g\n", + " method: 1\n", + " Conventions: CF-1.8\n", + " institution: ECMWF" ] }, - "execution_count": 3, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "ds = ds_fl.to_xarray(time_dim_mode=\"forecast\", dim_roles={\"step\": \"forecastMonth\"})\n", + "ds = ds_fl.to_xarray(time_dim_mode=\"forecast\", \n", + " dim_roles={\"step\": \"forecastMonth\"}, \n", + " keep_dim_role_names=False)\n", "ds" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6eba63a3-452b-4317-aa7d-7793e4a1dd2a", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [], - "source": [] } ], "metadata": { "kernelspec": { - "display_name": "dev_ecc", + "display_name": "dev", "language": "python", - "name": "dev_ecc" + "name": "dev" }, "language_info": { "codemirror_mode": { @@ -692,7 +1196,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.13" + "version": "3.11.12" } }, "nbformat": 4, diff --git a/docs/examples/xarray_engine_split.ipynb b/docs/examples/xarray_engine_split.ipynb index 6c77736b0..787e734e3 100644 --- a/docs/examples/xarray_engine_split.ipynb +++ b/docs/examples/xarray_engine_split.ipynb @@ -44,7 +44,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "8e0c9b139cf64468a0126a7780bf54e0", + "model_id": "6c861eb4640740fab0436a6893174a95", "version_major": 2, "version_minor": 0 }, @@ -59,7 +59,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Dimension 'typeOfLevel' of variable 't' cannot have multiple values=['hybrid', 'isobaricInhPa']\n" + "Dimension 'level_type' of variable 't' cannot have multiple values=['hybrid', 'isobaricInhPa']\n" ] } ], @@ -84,7 +84,7 @@ "tags": [] }, "source": [ - "In this case we can use the ``split_dims`` option to split the hypercube along the problematic dimensions. The results a tuple of two lists: \n", + "In this case we can use the ``split_dims`` option to split the hypercube along the problematic dimensions. `split_dims`` does not use dimension names but takes a single or multiple GRIB keys to perform the splitting on. The results a tuple of two lists: \n", "\n", "- the first list contains the Xarray datasets\n", "- the second list contains the corresponding dictionaries with the spitting keys/values (one dictionary per dataset)\n", @@ -527,20 +527,20 @@ " u (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n", "Attributes:\n", " Conventions: CF-1.8\n", - " institution: ECMWF
  • Conventions :
    CF-1.8
    institution :
    ECMWF
  • " ], "text/plain": [ " Size: 176kB\n", diff --git a/docs/examples/xarray_engine_temporal.ipynb b/docs/examples/xarray_engine_temporal.ipynb index 1b1a76d57..b3c8589e7 100644 --- a/docs/examples/xarray_engine_temporal.ipynb +++ b/docs/examples/xarray_engine_temporal.ipynb @@ -54,7 +54,22 @@ }, "tags": [] }, - "outputs": [], + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "8dca45f059a04a48898e26443d3b1a64", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "pl.grib: 0%| | 0.00/48.8k [00:00`)." ] }, { @@ -114,7 +138,7 @@ "tags": [] }, "source": [ - "When ``time_dim_mode=\"raw\"`` the \"date\", \"time\" and \"step\" ecCodes GRIB keys are used to form the temporal dimensions." + "When ``time_dim_mode=\"raw\"`` the \"date\", \"time\" and \"step\" roles are used to form the temporal dimensions." ] }, { @@ -162,14 +186,14 @@ " --xr-background-color-row-odd: var(--jp-layout-color2, #eeeeee);\n", "}\n", "\n", - "html[theme=dark],\n", - "html[data-theme=dark],\n", - "body[data-theme=dark],\n", + "html[theme=\"dark\"],\n", + "html[data-theme=\"dark\"],\n", + "body[data-theme=\"dark\"],\n", "body.vscode-dark {\n", " --xr-font-color0: rgba(255, 255, 255, 1);\n", " --xr-font-color2: rgba(255, 255, 255, 0.54);\n", " --xr-font-color3: rgba(255, 255, 255, 0.38);\n", - " --xr-border-color: #1F1F1F;\n", + " --xr-border-color: #1f1f1f;\n", " --xr-disabled-color: #515151;\n", " --xr-background-color: #111111;\n", " --xr-background-color-row-even: #111111;\n", @@ -224,6 +248,7 @@ ".xr-section-item input {\n", " display: inline-block;\n", " opacity: 0;\n", + " height: 0;\n", "}\n", "\n", ".xr-section-item input + label {\n", @@ -260,7 +285,7 @@ "\n", ".xr-section-summary-in + label:before {\n", " display: inline-block;\n", - " content: 'â–º';\n", + " content: \"â–º\";\n", " font-size: 11px;\n", " width: 15px;\n", " text-align: center;\n", @@ -271,7 +296,7 @@ "}\n", "\n", ".xr-section-summary-in:checked + label:before {\n", - " content: 'â–¼';\n", + " content: \"â–¼\";\n", "}\n", "\n", ".xr-section-summary-in:checked + label > span {\n", @@ -343,15 +368,15 @@ "}\n", "\n", ".xr-dim-list:before {\n", - " content: '(';\n", + " content: \"(\";\n", "}\n", "\n", ".xr-dim-list:after {\n", - " content: ')';\n", + " content: \")\";\n", "}\n", "\n", ".xr-dim-list li:not(:last-child):after {\n", - " content: ',';\n", + " content: \",\";\n", " padding-right: 5px;\n", "}\n", "\n", @@ -502,20 +527,18 @@ " fill: currentColor;\n", "}\n", "
    <xarray.Dataset> Size: 176kB\n",
    -       "Dimensions:    (date: 2, time: 2, step: 2, levelist: 2, latitude: 19,\n",
    -       "                longitude: 36)\n",
    +       "Dimensions:    (date: 2, time: 2, step: 2, level: 2, latitude: 19, longitude: 36)\n",
            "Coordinates:\n",
            "  * date       (date) datetime64[ns] 16B 2024-06-03 2024-06-04\n",
            "  * time       (time) timedelta64[ns] 16B 00:00:00 12:00:00\n",
            "  * step       (step) timedelta64[ns] 16B 00:00:00 06:00:00\n",
    -       "  * levelist   (levelist) int64 16B 500 700\n",
    +       "  * level      (level) int64 16B 500 700\n",
            "  * latitude   (latitude) float64 152B 90.0 80.0 70.0 60.0 ... -70.0 -80.0 -90.0\n",
            "  * longitude  (longitude) float64 288B 0.0 10.0 20.0 30.0 ... 330.0 340.0 350.0\n",
            "Data variables:\n",
    -       "    r          (date, time, step, levelist, latitude, longitude) float64 88kB ...\n",
    -       "    t          (date, time, step, levelist, latitude, longitude) float64 88kB ...\n",
    +       "    r          (date, time, step, level, latitude, longitude) float64 88kB ...\n",
    +       "    t          (date, time, step, level, latitude, longitude) float64 88kB ...\n",
            "Attributes:\n",
    -       "    param:        t\n",
            "    class:        od\n",
            "    stream:       oper\n",
            "    levtype:      pl\n",
    @@ -524,34 +547,32 @@
            "    domain:       g\n",
            "    number:       0\n",
            "    Conventions:  CF-1.8\n",
    -       "    institution:  ECMWF
  • class :
    od
    stream :
    oper
    levtype :
    pl
    type :
    fc
    expver :
    0001
    domain :
    g
    number :
    0
    Conventions :
    CF-1.8
    institution :
    ECMWF
  • " ], "text/plain": [ " Size: 176kB\n", - "Dimensions: (date: 2, time: 2, step: 2, levelist: 2, latitude: 19,\n", - " longitude: 36)\n", + "Dimensions: (date: 2, time: 2, step: 2, level: 2, latitude: 19, longitude: 36)\n", "Coordinates:\n", " * date (date) datetime64[ns] 16B 2024-06-03 2024-06-04\n", " * time (time) timedelta64[ns] 16B 00:00:00 12:00:00\n", " * step (step) timedelta64[ns] 16B 00:00:00 06:00:00\n", - " * levelist (levelist) int64 16B 500 700\n", + " * level (level) int64 16B 500 700\n", " * latitude (latitude) float64 152B 90.0 80.0 70.0 60.0 ... -70.0 -80.0 -90.0\n", " * longitude (longitude) float64 288B 0.0 10.0 20.0 30.0 ... 330.0 340.0 350.0\n", "Data variables:\n", - " r (date, time, step, levelist, latitude, longitude) float64 88kB ...\n", - " t (date, time, step, levelist, latitude, longitude) float64 88kB ...\n", + " r (date, time, step, level, latitude, longitude) float64 88kB ...\n", + " t (date, time, step, level, latitude, longitude) float64 88kB ...\n", "Attributes:\n", - " param: t\n", " class: od\n", " stream: oper\n", " levtype: pl\n", @@ -599,7 +620,7 @@ "tags": [] }, "source": [ - "When ``time_dim_mode=\"forecast\"`` the \"date\" and \"time\" ecCodes GRIB keys are merged to form the dimension \"forecats_reference_time\". It also adds the \"step\" dimension based on the \"step\" key." + "When ``time_dim_mode=\"forecast\"`` the \"date\" and \"time\" roles are merged to form the dimension \"forecats_reference_time\". It also adds the \"step\" dimension." ] }, { @@ -647,14 +668,14 @@ " --xr-background-color-row-odd: var(--jp-layout-color2, #eeeeee);\n", "}\n", "\n", - "html[theme=dark],\n", - "html[data-theme=dark],\n", - "body[data-theme=dark],\n", + "html[theme=\"dark\"],\n", + "html[data-theme=\"dark\"],\n", + "body[data-theme=\"dark\"],\n", "body.vscode-dark {\n", " --xr-font-color0: rgba(255, 255, 255, 1);\n", " --xr-font-color2: rgba(255, 255, 255, 0.54);\n", " --xr-font-color3: rgba(255, 255, 255, 0.38);\n", - " --xr-border-color: #1F1F1F;\n", + " --xr-border-color: #1f1f1f;\n", " --xr-disabled-color: #515151;\n", " --xr-background-color: #111111;\n", " --xr-background-color-row-even: #111111;\n", @@ -709,6 +730,7 @@ ".xr-section-item input {\n", " display: inline-block;\n", " opacity: 0;\n", + " height: 0;\n", "}\n", "\n", ".xr-section-item input + label {\n", @@ -745,7 +767,7 @@ "\n", ".xr-section-summary-in + label:before {\n", " display: inline-block;\n", - " content: 'â–º';\n", + " content: \"â–º\";\n", " font-size: 11px;\n", " width: 15px;\n", " text-align: center;\n", @@ -756,7 +778,7 @@ "}\n", "\n", ".xr-section-summary-in:checked + label:before {\n", - " content: 'â–¼';\n", + " content: \"â–¼\";\n", "}\n", "\n", ".xr-section-summary-in:checked + label > span {\n", @@ -828,15 +850,15 @@ "}\n", "\n", ".xr-dim-list:before {\n", - " content: '(';\n", + " content: \"(\";\n", "}\n", "\n", ".xr-dim-list:after {\n", - " content: ')';\n", + " content: \")\";\n", "}\n", "\n", ".xr-dim-list li:not(:last-child):after {\n", - " content: ',';\n", + " content: \",\";\n", " padding-right: 5px;\n", "}\n", "\n", @@ -987,62 +1009,64 @@ " fill: currentColor;\n", "}\n", "
    <xarray.Dataset> Size: 176kB\n",
    -       "Dimensions:                  (forecast_reference_time: 4, step: 2, levelist: 2,\n",
    +       "Dimensions:                  (forecast_reference_time: 4, step: 2, level: 2,\n",
            "                              latitude: 19, longitude: 36)\n",
            "Coordinates:\n",
            "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 32B 202...\n",
            "  * step                     (step) timedelta64[ns] 16B 00:00:00 06:00:00\n",
    -       "  * levelist                 (levelist) int64 16B 500 700\n",
    +       "  * level                    (level) int64 16B 500 700\n",
            "  * latitude                 (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n",
            "  * longitude                (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n",
            "Data variables:\n",
    -       "    r                        (forecast_reference_time, step, levelist, latitude, longitude) float64 88kB ...\n",
    -       "    t                        (forecast_reference_time, step, levelist, latitude, longitude) float64 88kB ...\n",
    +       "    r                        (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n",
    +       "    t                        (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n",
            "Attributes:\n",
    -       "    param:        t\n",
            "    class:        od\n",
            "    stream:       oper\n",
            "    levtype:      pl\n",
            "    type:         fc\n",
            "    expver:       0001\n",
    +       "    date:         20240603\n",
    +       "    time:         0\n",
            "    domain:       g\n",
            "    number:       0\n",
            "    Conventions:  CF-1.8\n",
    -       "    institution:  ECMWF
  • class :
    od
    stream :
    oper
    levtype :
    pl
    type :
    fc
    expver :
    0001
    date :
    20240603
    time :
    0
    domain :
    g
    number :
    0
    Conventions :
    CF-1.8
    institution :
    ECMWF
  • " ], "text/plain": [ " Size: 176kB\n", - "Dimensions: (forecast_reference_time: 4, step: 2, levelist: 2,\n", + "Dimensions: (forecast_reference_time: 4, step: 2, level: 2,\n", " latitude: 19, longitude: 36)\n", "Coordinates:\n", " * forecast_reference_time (forecast_reference_time) datetime64[ns] 32B 202...\n", " * step (step) timedelta64[ns] 16B 00:00:00 06:00:00\n", - " * levelist (levelist) int64 16B 500 700\n", + " * level (level) int64 16B 500 700\n", " * latitude (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n", " * longitude (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n", "Data variables:\n", - " r (forecast_reference_time, step, levelist, latitude, longitude) float64 88kB ...\n", - " t (forecast_reference_time, step, levelist, latitude, longitude) float64 88kB ...\n", + " r (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n", + " t (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n", "Attributes:\n", - " param: t\n", " class: od\n", " stream: oper\n", " levtype: pl\n", " type: fc\n", " expver: 0001\n", + " date: 20240603\n", + " time: 0\n", " domain: g\n", " number: 0\n", " Conventions: CF-1.8\n", @@ -1085,7 +1109,7 @@ "tags": [] }, "source": [ - "When ``time_dim_mode=\"valid_time\"`` the only temporal dimension is \"valid_time\". It is built from the values of the \"validityDate\" and \"validityTime\" ecCodes GRIB keys. This dimension can only be generated if each GRIB field has a distinct valid time, so it typically fits for analysis/climate data." + "When ``time_dim_mode=\"valid_time\"`` the only temporal dimension is \"valid_time\". By default, it is built from the values of the \"validityDate\" and \"validityTime\" ecCodes GRIB keys. This dimension can only be generated if each GRIB field has a distinct valid time, so it typically fits for analysis/climate data." ] }, { @@ -1103,7 +1127,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "", + "model_id": "98fb8660778641739b5bdba816ad80ef", "version_major": 2, "version_minor": 0 }, @@ -1147,14 +1171,14 @@ " --xr-background-color-row-odd: var(--jp-layout-color2, #eeeeee);\n", "}\n", "\n", - "html[theme=dark],\n", - "html[data-theme=dark],\n", - "body[data-theme=dark],\n", + "html[theme=\"dark\"],\n", + "html[data-theme=\"dark\"],\n", + "body[data-theme=\"dark\"],\n", "body.vscode-dark {\n", " --xr-font-color0: rgba(255, 255, 255, 1);\n", " --xr-font-color2: rgba(255, 255, 255, 0.54);\n", " --xr-font-color3: rgba(255, 255, 255, 0.38);\n", - " --xr-border-color: #1F1F1F;\n", + " --xr-border-color: #1f1f1f;\n", " --xr-disabled-color: #515151;\n", " --xr-background-color: #111111;\n", " --xr-background-color-row-even: #111111;\n", @@ -1209,6 +1233,7 @@ ".xr-section-item input {\n", " display: inline-block;\n", " opacity: 0;\n", + " height: 0;\n", "}\n", "\n", ".xr-section-item input + label {\n", @@ -1245,7 +1270,7 @@ "\n", ".xr-section-summary-in + label:before {\n", " display: inline-block;\n", - " content: 'â–º';\n", + " content: \"â–º\";\n", " font-size: 11px;\n", " width: 15px;\n", " text-align: center;\n", @@ -1256,7 +1281,7 @@ "}\n", "\n", ".xr-section-summary-in:checked + label:before {\n", - " content: 'â–¼';\n", + " content: \"â–¼\";\n", "}\n", "\n", ".xr-section-summary-in:checked + label > span {\n", @@ -1328,15 +1353,15 @@ "}\n", "\n", ".xr-dim-list:before {\n", - " content: '(';\n", + " content: \"(\";\n", "}\n", "\n", ".xr-dim-list:after {\n", - " content: ')';\n", + " content: \")\";\n", "}\n", "\n", ".xr-dim-list li:not(:last-child):after {\n", - " content: ',';\n", + " content: \",\";\n", " padding-right: 5px;\n", "}\n", "\n", @@ -1494,27 +1519,30 @@ " * longitude (longitude) float64 96B -70.0 -60.0 -50.0 ... 20.0 30.0 40.0\n", "Data variables:\n", " msl (valid_time, latitude, longitude) float64 5kB ...\n", - "Attributes:\n", + "Attributes: (12/13)\n", " param: msl\n", + " paramId: 151\n", " class: od\n", " stream: oper\n", " levtype: sfc\n", " type: an\n", - " expver: 0001\n", + " ... ...\n", + " date: 20160925\n", + " time: 0\n", " domain: g\n", " number: 0\n", " Conventions: CF-1.8\n", - " institution: ECMWF
  • param :
    msl
    paramId :
    151
    class :
    od
    stream :
    oper
    levtype :
    sfc
    type :
    an
    expver :
    0001
    date :
    20160925
    time :
    0
    domain :
    g
    number :
    0
    Conventions :
    CF-1.8
    institution :
    ECMWF
  • " ], "text/plain": [ " Size: 6kB\n", @@ -1525,13 +1553,16 @@ " * longitude (longitude) float64 96B -70.0 -60.0 -50.0 ... 20.0 30.0 40.0\n", "Data variables:\n", " msl (valid_time, latitude, longitude) float64 5kB ...\n", - "Attributes:\n", + "Attributes: (12/13)\n", " param: msl\n", + " paramId: 151\n", " class: od\n", " stream: oper\n", " levtype: sfc\n", " type: an\n", - " expver: 0001\n", + " ... ...\n", + " date: 20160925\n", + " time: 0\n", " domain: g\n", " number: 0\n", " Conventions: CF-1.8\n", @@ -1560,7 +1591,7 @@ "tags": [] }, "source": [ - "This mode can also be used for suitable forecasts. To use it for the original forecast data first we need to filter it." + "This mode can also be used for suitable forecasts data. To use it for the original forecast data first we need to filter it." ] }, { @@ -1608,14 +1639,14 @@ " --xr-background-color-row-odd: var(--jp-layout-color2, #eeeeee);\n", "}\n", "\n", - "html[theme=dark],\n", - "html[data-theme=dark],\n", - "body[data-theme=dark],\n", + "html[theme=\"dark\"],\n", + "html[data-theme=\"dark\"],\n", + "body[data-theme=\"dark\"],\n", "body.vscode-dark {\n", " --xr-font-color0: rgba(255, 255, 255, 1);\n", " --xr-font-color2: rgba(255, 255, 255, 0.54);\n", " --xr-font-color3: rgba(255, 255, 255, 0.38);\n", - " --xr-border-color: #1F1F1F;\n", + " --xr-border-color: #1f1f1f;\n", " --xr-disabled-color: #515151;\n", " --xr-background-color: #111111;\n", " --xr-background-color-row-even: #111111;\n", @@ -1670,6 +1701,7 @@ ".xr-section-item input {\n", " display: inline-block;\n", " opacity: 0;\n", + " height: 0;\n", "}\n", "\n", ".xr-section-item input + label {\n", @@ -1706,7 +1738,7 @@ "\n", ".xr-section-summary-in + label:before {\n", " display: inline-block;\n", - " content: 'â–º';\n", + " content: \"â–º\";\n", " font-size: 11px;\n", " width: 15px;\n", " text-align: center;\n", @@ -1717,7 +1749,7 @@ "}\n", "\n", ".xr-section-summary-in:checked + label:before {\n", - " content: 'â–¼';\n", + " content: \"â–¼\";\n", "}\n", "\n", ".xr-section-summary-in:checked + label > span {\n", @@ -1789,15 +1821,15 @@ "}\n", "\n", ".xr-dim-list:before {\n", - " content: '(';\n", + " content: \"(\";\n", "}\n", "\n", ".xr-dim-list:after {\n", - " content: ')';\n", + " content: \")\";\n", "}\n", "\n", ".xr-dim-list li:not(:last-child):after {\n", - " content: ',';\n", + " content: \",\";\n", " padding-right: 5px;\n", "}\n", "\n", @@ -1948,17 +1980,16 @@ " fill: currentColor;\n", "}\n", "
    <xarray.Dataset> Size: 44kB\n",
    -       "Dimensions:     (valid_time: 2, levelist: 2, latitude: 19, longitude: 36)\n",
    +       "Dimensions:     (valid_time: 2, level: 2, latitude: 19, longitude: 36)\n",
            "Coordinates:\n",
            "  * valid_time  (valid_time) datetime64[ns] 16B 2024-06-03 2024-06-03T06:00:00\n",
    -       "  * levelist    (levelist) int64 16B 500 700\n",
    +       "  * level       (level) int64 16B 500 700\n",
            "  * latitude    (latitude) float64 152B 90.0 80.0 70.0 ... -70.0 -80.0 -90.0\n",
            "  * longitude   (longitude) float64 288B 0.0 10.0 20.0 ... 330.0 340.0 350.0\n",
            "Data variables:\n",
    -       "    r           (valid_time, levelist, latitude, longitude) float64 22kB ...\n",
    -       "    t           (valid_time, levelist, latitude, longitude) float64 22kB ...\n",
    +       "    r           (valid_time, level, latitude, longitude) float64 22kB ...\n",
    +       "    t           (valid_time, level, latitude, longitude) float64 22kB ...\n",
            "Attributes:\n",
    -       "    param:        t\n",
            "    class:        od\n",
            "    stream:       oper\n",
            "    levtype:      pl\n",
    @@ -1969,31 +2000,30 @@
            "    domain:       g\n",
            "    number:       0\n",
            "    Conventions:  CF-1.8\n",
    -       "    institution:  ECMWF
  • class :
    od
    stream :
    oper
    levtype :
    pl
    type :
    fc
    expver :
    0001
    date :
    20240603
    time :
    0
    domain :
    g
    number :
    0
    Conventions :
    CF-1.8
    institution :
    ECMWF
  • " ], "text/plain": [ " Size: 44kB\n", - "Dimensions: (valid_time: 2, levelist: 2, latitude: 19, longitude: 36)\n", + "Dimensions: (valid_time: 2, level: 2, latitude: 19, longitude: 36)\n", "Coordinates:\n", " * valid_time (valid_time) datetime64[ns] 16B 2024-06-03 2024-06-03T06:00:00\n", - " * levelist (levelist) int64 16B 500 700\n", + " * level (level) int64 16B 500 700\n", " * latitude (latitude) float64 152B 90.0 80.0 70.0 ... -70.0 -80.0 -90.0\n", " * longitude (longitude) float64 288B 0.0 10.0 20.0 ... 330.0 340.0 350.0\n", "Data variables:\n", - " r (valid_time, levelist, latitude, longitude) float64 22kB ...\n", - " t (valid_time, levelist, latitude, longitude) float64 22kB ...\n", + " r (valid_time, level, latitude, longitude) float64 22kB ...\n", + " t (valid_time, level, latitude, longitude) float64 22kB ...\n", "Attributes:\n", - " param: t\n", " class: od\n", " stream: oper\n", " levtype: pl\n", @@ -2042,7 +2072,7 @@ "tags": [] }, "source": [ - "When ``add_valid_time_dim=True`` it adds coord `valid_time` containing the valid times for all the different temporal dimensions as datetime64. When ``time_dim_mode=\"valid_time\"`` this coordinate is always added irrespectively of the value of ``add_valid_time_dim``." + "When ``add_valid_time_dim=True`` it adds the coordine`valid_time` containing the valid times for all the different temporal dimensions as datetime64. When ``time_dim_mode=\"valid_time\"`` this coordinate is always added irrespective of the value of ``add_valid_time_dim``." ] }, { @@ -2090,14 +2120,14 @@ " --xr-background-color-row-odd: var(--jp-layout-color2, #eeeeee);\n", "}\n", "\n", - "html[theme=dark],\n", - "html[data-theme=dark],\n", - "body[data-theme=dark],\n", + "html[theme=\"dark\"],\n", + "html[data-theme=\"dark\"],\n", + "body[data-theme=\"dark\"],\n", "body.vscode-dark {\n", " --xr-font-color0: rgba(255, 255, 255, 1);\n", " --xr-font-color2: rgba(255, 255, 255, 0.54);\n", " --xr-font-color3: rgba(255, 255, 255, 0.38);\n", - " --xr-border-color: #1F1F1F;\n", + " --xr-border-color: #1f1f1f;\n", " --xr-disabled-color: #515151;\n", " --xr-background-color: #111111;\n", " --xr-background-color-row-even: #111111;\n", @@ -2152,6 +2182,7 @@ ".xr-section-item input {\n", " display: inline-block;\n", " opacity: 0;\n", + " height: 0;\n", "}\n", "\n", ".xr-section-item input + label {\n", @@ -2188,7 +2219,7 @@ "\n", ".xr-section-summary-in + label:before {\n", " display: inline-block;\n", - " content: 'â–º';\n", + " content: \"â–º\";\n", " font-size: 11px;\n", " width: 15px;\n", " text-align: center;\n", @@ -2199,7 +2230,7 @@ "}\n", "\n", ".xr-section-summary-in:checked + label:before {\n", - " content: 'â–¼';\n", + " content: \"â–¼\";\n", "}\n", "\n", ".xr-section-summary-in:checked + label > span {\n", @@ -2271,15 +2302,15 @@ "}\n", "\n", ".xr-dim-list:before {\n", - " content: '(';\n", + " content: \"(\";\n", "}\n", "\n", ".xr-dim-list:after {\n", - " content: ')';\n", + " content: \")\";\n", "}\n", "\n", ".xr-dim-list li:not(:last-child):after {\n", - " content: ',';\n", + " content: \",\";\n", " padding-right: 5px;\n", "}\n", "\n", @@ -2430,21 +2461,20 @@ " fill: currentColor;\n", "}\n", "
    <xarray.Dataset> Size: 176kB\n",
    -       "Dimensions:     (date: 2, time: 2, step: 2, levelist: 2, latitude: 19,\n",
    +       "Dimensions:     (date: 2, time: 2, step: 2, level: 2, latitude: 19,\n",
            "                 longitude: 36)\n",
            "Coordinates:\n",
            "  * date        (date) datetime64[ns] 16B 2024-06-03 2024-06-04\n",
            "  * time        (time) timedelta64[ns] 16B 00:00:00 12:00:00\n",
            "  * step        (step) timedelta64[ns] 16B 00:00:00 06:00:00\n",
    -       "  * levelist    (levelist) int64 16B 500 700\n",
    +       "  * level       (level) int64 16B 500 700\n",
            "    valid_time  (date, time, step) datetime64[ns] 64B ...\n",
            "  * latitude    (latitude) float64 152B 90.0 80.0 70.0 ... -70.0 -80.0 -90.0\n",
            "  * longitude   (longitude) float64 288B 0.0 10.0 20.0 ... 330.0 340.0 350.0\n",
            "Data variables:\n",
    -       "    r           (date, time, step, levelist, latitude, longitude) float64 88kB ...\n",
    -       "    t           (date, time, step, levelist, latitude, longitude) float64 88kB ...\n",
    +       "    r           (date, time, step, level, latitude, longitude) float64 88kB ...\n",
    +       "    t           (date, time, step, level, latitude, longitude) float64 88kB ...\n",
            "Attributes:\n",
    -       "    param:        t\n",
            "    class:        od\n",
            "    stream:       oper\n",
            "    levtype:      pl\n",
    @@ -2453,35 +2483,34 @@
            "    domain:       g\n",
            "    number:       0\n",
            "    Conventions:  CF-1.8\n",
    -       "    institution:  ECMWF
  • class :
    od
    stream :
    oper
    levtype :
    pl
    type :
    fc
    expver :
    0001
    domain :
    g
    number :
    0
    Conventions :
    CF-1.8
    institution :
    ECMWF
  • " ], "text/plain": [ " Size: 176kB\n", - "Dimensions: (date: 2, time: 2, step: 2, levelist: 2, latitude: 19,\n", + "Dimensions: (date: 2, time: 2, step: 2, level: 2, latitude: 19,\n", " longitude: 36)\n", "Coordinates:\n", " * date (date) datetime64[ns] 16B 2024-06-03 2024-06-04\n", " * time (time) timedelta64[ns] 16B 00:00:00 12:00:00\n", " * step (step) timedelta64[ns] 16B 00:00:00 06:00:00\n", - " * levelist (levelist) int64 16B 500 700\n", + " * level (level) int64 16B 500 700\n", " valid_time (date, time, step) datetime64[ns] 64B ...\n", " * latitude (latitude) float64 152B 90.0 80.0 70.0 ... -70.0 -80.0 -90.0\n", " * longitude (longitude) float64 288B 0.0 10.0 20.0 ... 330.0 340.0 350.0\n", "Data variables:\n", - " r (date, time, step, levelist, latitude, longitude) float64 88kB ...\n", - " t (date, time, step, levelist, latitude, longitude) float64 88kB ...\n", + " r (date, time, step, level, latitude, longitude) float64 88kB ...\n", + " t (date, time, step, level, latitude, longitude) float64 88kB ...\n", "Attributes:\n", - " param: t\n", " class: od\n", " stream: oper\n", " levtype: pl\n", @@ -2524,13 +2553,13 @@ "source": [ "When ``decode_times=True`` (the default) the follwing coordinates will be stored as datetime64:\n", "\n", - "- coordinates representing the date-like ecCodes keys (e.g. \"date\", \"validityDate\" etc.)\n", + "- coordinates representing the date-like roles or GRIB keys (e.g. \"date\", \"validityDate\" etc.)\n", "- datetime coordinates (e.g. \"forecast_reference_time\" etc.)\n", "\n", "When ``decode_timedelta=True`` (the default) the following coordinates will be stored as timedelta64:\n", "\n", - "- coordinates representing the time-like ecCodes keys (e.g. \"time\", \"validityTime\" etc.)\n", - "- duration-like coordinates (e.g. \"step\")" + "- coordinates representing the time-like roles or GRIB keys (e.g. \"time\", \"validityTime\" etc.)\n", + "- duration-like coordinates (e.g. \"step\", \"endStep\")" ] }, { @@ -2546,7 +2575,7 @@ " * date (date) datetime64[ns] 16B 2024-06-03 2024-06-04\n", " * time (time) timedelta64[ns] 16B 00:00:00 12:00:00\n", " * step (step) timedelta64[ns] 16B 00:00:00 06:00:00\n", - " * levelist (levelist) int64 16B 500 700\n", + " * level (level) int64 16B 500 700\n", " * latitude (latitude) float64 152B 90.0 80.0 70.0 60.0 ... -70.0 -80.0 -90.0\n", " * longitude (longitude) float64 288B 0.0 10.0 20.0 30.0 ... 330.0 340.0 350.0" ] @@ -2568,12 +2597,12 @@ "source": [ "When ``decode_times=False`` the following rules apply:\n", "\n", - "- coordinates representing date-like ecCodes keys (e.g. \"date\", \"validityDate\" etc.) will store the native GRIB int values (as yyyymmdd)\n", + "- coordinates representing date-like GRIB keys (e.g. \"date\", \"validityDate\" etc.) will store the native GRIB int values (as yyyymmdd)\n", "- datetime coordinates (e.g. \"forecast_reference_time\" etc.) will store datetime64 values\n", "\n", "When ``decode_timedelta=False`` the following rules apply:\n", "\n", - "- coordinates representing the time-like ecCodes keys (e.g. \"time\", \"validityTime\" etc.) will store the native GRIB int values (as 100*hours + minutes)\n", + "- coordinates representing the time-like GRIB keys (e.g. \"time\", \"validityTime\" etc.) will store the native GRIB int values (as 100*hours + minutes)\n", "- duration-like (e.g. \"step\") coordinates will store int values with units indicated by the coordinate attribute \"units\"" ] }, @@ -2596,7 +2625,7 @@ " * date (date) int64 16B 20240603 20240604\n", " * time (time) int64 16B 0 1200\n", " * step (step) int64 16B 0 6\n", - " * levelist (levelist) int64 16B 500 700\n", + " * level (level) int64 16B 500 700\n", " * latitude (latitude) float64 152B 90.0 80.0 70.0 60.0 ... -70.0 -80.0 -90.0\n", " * longitude (longitude) float64 288B 0.0 10.0 20.0 30.0 ... 330.0 340.0 350.0" ] @@ -2626,9 +2655,7 @@ { "data": { "text/plain": [ - "{'standard_name': 'forecast_period',\n", - " 'long_name': 'time since forecast_reference_time',\n", - " 'units': 'hours'}" + "{'units': 'hours'}" ] }, "execution_count": 9, diff --git a/docs/examples/xarray_engine_to_grib.ipynb b/docs/examples/xarray_engine_to_grib.ipynb index a69e0264e..8f0efe4fd 100644 --- a/docs/examples/xarray_engine_to_grib.ipynb +++ b/docs/examples/xarray_engine_to_grib.ipynb @@ -58,7 +58,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "", + "model_id": "cf6d8ab8661b476ca0f076c67c8acfea", "version_major": 2, "version_minor": 0 }, @@ -69,13 +69,6 @@ "metadata": {}, "output_type": "display_data" }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "From version 0.11.0 the default engine for to_xarray is 'earthkit'. Use engine=`cfgrib` to invoke the cfgrib engine.\n" - ] - }, { "data": { "text/html": [ @@ -109,14 +102,14 @@ " --xr-background-color-row-odd: var(--jp-layout-color2, #eeeeee);\n", "}\n", "\n", - "html[theme=dark],\n", - "html[data-theme=dark],\n", - "body[data-theme=dark],\n", + "html[theme=\"dark\"],\n", + "html[data-theme=\"dark\"],\n", + "body[data-theme=\"dark\"],\n", "body.vscode-dark {\n", " --xr-font-color0: rgba(255, 255, 255, 1);\n", " --xr-font-color2: rgba(255, 255, 255, 0.54);\n", " --xr-font-color3: rgba(255, 255, 255, 0.38);\n", - " --xr-border-color: #1F1F1F;\n", + " --xr-border-color: #1f1f1f;\n", " --xr-disabled-color: #515151;\n", " --xr-background-color: #111111;\n", " --xr-background-color-row-even: #111111;\n", @@ -171,6 +164,7 @@ ".xr-section-item input {\n", " display: inline-block;\n", " opacity: 0;\n", + " height: 0;\n", "}\n", "\n", ".xr-section-item input + label {\n", @@ -207,7 +201,7 @@ "\n", ".xr-section-summary-in + label:before {\n", " display: inline-block;\n", - " content: 'â–º';\n", + " content: \"â–º\";\n", " font-size: 11px;\n", " width: 15px;\n", " text-align: center;\n", @@ -218,7 +212,7 @@ "}\n", "\n", ".xr-section-summary-in:checked + label:before {\n", - " content: 'â–¼';\n", + " content: \"â–¼\";\n", "}\n", "\n", ".xr-section-summary-in:checked + label > span {\n", @@ -290,15 +284,15 @@ "}\n", "\n", ".xr-dim-list:before {\n", - " content: '(';\n", + " content: \"(\";\n", "}\n", "\n", ".xr-dim-list:after {\n", - " content: ')';\n", + " content: \")\";\n", "}\n", "\n", ".xr-dim-list li:not(:last-child):after {\n", - " content: ',';\n", + " content: \",\";\n", " padding-right: 5px;\n", "}\n", "\n", @@ -449,17 +443,17 @@ " fill: currentColor;\n", "}\n", "
    <xarray.Dataset> Size: 176kB\n",
    -       "Dimensions:                  (forecast_reference_time: 4, step: 2, levelist: 2,\n",
    +       "Dimensions:                  (forecast_reference_time: 4, step: 2, level: 2,\n",
            "                              latitude: 19, longitude: 36)\n",
            "Coordinates:\n",
            "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 32B 202...\n",
            "  * step                     (step) timedelta64[ns] 16B 00:00:00 06:00:00\n",
    -       "  * levelist                 (levelist) int64 16B 500 700\n",
    +       "  * level                    (level) int64 16B 500 700\n",
            "  * latitude                 (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n",
            "  * longitude                (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n",
            "Data variables:\n",
    -       "    r                        (forecast_reference_time, step, levelist, latitude, longitude) float64 88kB ...\n",
    -       "    t                        (forecast_reference_time, step, levelist, latitude, longitude) float64 88kB ...\n",
    +       "    r                        (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n",
    +       "    t                        (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n",
            "Attributes:\n",
            "    class:        od\n",
            "    stream:       oper\n",
    @@ -471,34 +465,34 @@
            "    domain:       g\n",
            "    number:       0\n",
            "    Conventions:  CF-1.8\n",
    -       "    institution:  ECMWF
  • class :
    od
    stream :
    oper
    levtype :
    pl
    type :
    fc
    expver :
    0001
    date :
    20240603
    time :
    0
    domain :
    g
    number :
    0
    Conventions :
    CF-1.8
    institution :
    ECMWF
  • " ], "text/plain": [ " Size: 176kB\n", - "Dimensions: (forecast_reference_time: 4, step: 2, levelist: 2,\n", + "Dimensions: (forecast_reference_time: 4, step: 2, level: 2,\n", " latitude: 19, longitude: 36)\n", "Coordinates:\n", " * forecast_reference_time (forecast_reference_time) datetime64[ns] 32B 202...\n", " * step (step) timedelta64[ns] 16B 00:00:00 06:00:00\n", - " * levelist (levelist) int64 16B 500 700\n", + " * level (level) int64 16B 500 700\n", " * latitude (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n", " * longitude (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n", "Data variables:\n", - " r (forecast_reference_time, step, levelist, latitude, longitude) float64 88kB ...\n", - " t (forecast_reference_time, step, levelist, latitude, longitude) float64 88kB ...\n", + " r (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n", + " t (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n", "Attributes:\n", " class: od\n", " stream: oper\n", @@ -933,7 +927,7 @@ "tags": [] }, "source": [ - "The generated GRIB fieldlist can be saved to disk using the :py:meth:`~data.readers.grib.index.GribFieldList.save` method." + "The generated GRIB fieldlist can be saved to disk using the :func:`to_target` method." ] }, { @@ -1035,7 +1029,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "", + "model_id": "120cbbf74d0c4edeac0cbdeed7bb6e2f", "version_major": 2, "version_minor": 0 }, @@ -1197,9 +1191,9 @@ ], "metadata": { "kernelspec": { - "display_name": "dev_ecc", + "display_name": "dev", "language": "python", - "name": "dev_ecc" + "name": "dev" }, "language_info": { "codemirror_mode": { @@ -1211,7 +1205,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.13" + "version": "3.11.12" } }, "nbformat": 4, diff --git a/docs/examples/xarray_engine_variable_key.ipynb b/docs/examples/xarray_engine_variable_key.ipynb index 23b95acc3..71c30dc70 100644 --- a/docs/examples/xarray_engine_variable_key.ipynb +++ b/docs/examples/xarray_engine_variable_key.ipynb @@ -31,7 +31,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 1, "id": "08b75c56-0b2f-4cc6-9637-b28ad2aa4455", "metadata": { "editable": true, @@ -41,6 +41,20 @@ "tags": [] }, "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "41fcae342d1e49ac812d5756cd625b1f", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "pl.grib: 0%| | 0.00/48.8k [00:00 span {\n", @@ -255,15 +270,15 @@ "}\n", "\n", ".xr-dim-list:before {\n", - " content: '(';\n", + " content: \"(\";\n", "}\n", "\n", ".xr-dim-list:after {\n", - " content: ')';\n", + " content: \")\";\n", "}\n", "\n", ".xr-dim-list li:not(:last-child):after {\n", - " content: ',';\n", + " content: \",\";\n", " padding-right: 5px;\n", "}\n", "\n", @@ -414,69 +429,71 @@ " fill: currentColor;\n", "}\n", "
    <xarray.Dataset> Size: 176kB\n",
    -       "Dimensions:                  (forecast_reference_time: 4, step: 2, levelist: 2,\n",
    +       "Dimensions:                  (forecast_reference_time: 4, step: 2, level: 2,\n",
            "                              latitude: 19, longitude: 36)\n",
            "Coordinates:\n",
            "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 32B 202...\n",
            "  * step                     (step) timedelta64[ns] 16B 00:00:00 06:00:00\n",
    -       "  * levelist                 (levelist) int64 16B 500 700\n",
    +       "  * level                    (level) int64 16B 500 700\n",
            "  * latitude                 (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n",
            "  * longitude                (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n",
            "Data variables:\n",
    -       "    r                        (forecast_reference_time, step, levelist, latitude, longitude) float64 88kB ...\n",
    -       "    t                        (forecast_reference_time, step, levelist, latitude, longitude) float64 88kB ...\n",
    +       "    r                        (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n",
    +       "    t                        (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n",
            "Attributes:\n",
    -       "    param:        t\n",
            "    class:        od\n",
            "    stream:       oper\n",
            "    levtype:      pl\n",
            "    type:         fc\n",
            "    expver:       0001\n",
    +       "    date:         20240603\n",
    +       "    time:         0\n",
            "    domain:       g\n",
            "    number:       0\n",
            "    Conventions:  CF-1.8\n",
    -       "    institution:  ECMWF
  • class :
    od
    stream :
    oper
    levtype :
    pl
    type :
    fc
    expver :
    0001
    date :
    20240603
    time :
    0
    domain :
    g
    number :
    0
    Conventions :
    CF-1.8
    institution :
    ECMWF
  • " ], "text/plain": [ " Size: 176kB\n", - "Dimensions: (forecast_reference_time: 4, step: 2, levelist: 2,\n", + "Dimensions: (forecast_reference_time: 4, step: 2, level: 2,\n", " latitude: 19, longitude: 36)\n", "Coordinates:\n", " * forecast_reference_time (forecast_reference_time) datetime64[ns] 32B 202...\n", " * step (step) timedelta64[ns] 16B 00:00:00 06:00:00\n", - " * levelist (levelist) int64 16B 500 700\n", + " * level (level) int64 16B 500 700\n", " * latitude (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n", " * longitude (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n", "Data variables:\n", - " r (forecast_reference_time, step, levelist, latitude, longitude) float64 88kB ...\n", - " t (forecast_reference_time, step, levelist, latitude, longitude) float64 88kB ...\n", + " r (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n", + " t (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n", "Attributes:\n", - " param: t\n", " class: od\n", " stream: oper\n", " levtype: pl\n", " type: fc\n", " expver: 0001\n", + " date: 20240603\n", + " time: 0\n", " domain: g\n", " number: 0\n", " Conventions: CF-1.8\n", " institution: ECMWF" ] }, - "execution_count": 4, + "execution_count": 1, "metadata": {}, "output_type": "execute_result" } @@ -518,7 +535,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 2, "id": "4898ceb3-4657-4397-b1d8-3bc1110b86eb", "metadata": { "editable": true, @@ -561,14 +578,14 @@ " --xr-background-color-row-odd: var(--jp-layout-color2, #eeeeee);\n", "}\n", "\n", - "html[theme=dark],\n", - "html[data-theme=dark],\n", - "body[data-theme=dark],\n", + "html[theme=\"dark\"],\n", + "html[data-theme=\"dark\"],\n", + "body[data-theme=\"dark\"],\n", "body.vscode-dark {\n", " --xr-font-color0: rgba(255, 255, 255, 1);\n", " --xr-font-color2: rgba(255, 255, 255, 0.54);\n", " --xr-font-color3: rgba(255, 255, 255, 0.38);\n", - " --xr-border-color: #1F1F1F;\n", + " --xr-border-color: #1f1f1f;\n", " --xr-disabled-color: #515151;\n", " --xr-background-color: #111111;\n", " --xr-background-color-row-even: #111111;\n", @@ -623,6 +640,7 @@ ".xr-section-item input {\n", " display: inline-block;\n", " opacity: 0;\n", + " height: 0;\n", "}\n", "\n", ".xr-section-item input + label {\n", @@ -659,7 +677,7 @@ "\n", ".xr-section-summary-in + label:before {\n", " display: inline-block;\n", - " content: 'â–º';\n", + " content: \"â–º\";\n", " font-size: 11px;\n", " width: 15px;\n", " text-align: center;\n", @@ -670,7 +688,7 @@ "}\n", "\n", ".xr-section-summary-in:checked + label:before {\n", - " content: 'â–¼';\n", + " content: \"â–¼\";\n", "}\n", "\n", ".xr-section-summary-in:checked + label > span {\n", @@ -742,15 +760,15 @@ "}\n", "\n", ".xr-dim-list:before {\n", - " content: '(';\n", + " content: \"(\";\n", "}\n", "\n", ".xr-dim-list:after {\n", - " content: ')';\n", + " content: \")\";\n", "}\n", "\n", ".xr-dim-list li:not(:last-child):after {\n", - " content: ',';\n", + " content: \",\";\n", " padding-right: 5px;\n", "}\n", "\n", @@ -914,30 +932,30 @@ " t500 (forecast_reference_time, step, latitude, longitude) float64 44kB ...\n", " t700 (forecast_reference_time, step, latitude, longitude) float64 44kB ...\n", "Attributes:\n", - " param: t\n", " class: od\n", " stream: oper\n", " levtype: pl\n", " type: fc\n", " expver: 0001\n", + " date: 20240603\n", + " time: 0\n", " domain: g\n", " number: 0\n", - " levelist: 700\n", " Conventions: CF-1.8\n", - " institution: ECMWF
  • class :
    od
    stream :
    oper
    levtype :
    pl
    type :
    fc
    expver :
    0001
    date :
    20240603
    time :
    0
    domain :
    g
    number :
    0
    Conventions :
    CF-1.8
    institution :
    ECMWF
  • " ], "text/plain": [ " Size: 176kB\n", @@ -954,20 +972,20 @@ " t500 (forecast_reference_time, step, latitude, longitude) float64 44kB ...\n", " t700 (forecast_reference_time, step, latitude, longitude) float64 44kB ...\n", "Attributes:\n", - " param: t\n", " class: od\n", " stream: oper\n", " levtype: pl\n", " type: fc\n", " expver: 0001\n", + " date: 20240603\n", + " time: 0\n", " domain: g\n", " number: 0\n", - " levelist: 700\n", " Conventions: CF-1.8\n", " institution: ECMWF" ] }, - "execution_count": 6, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -993,7 +1011,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 3, "id": "f55fe598-7f80-4e9a-87b3-43131f78f2e1", "metadata": { "editable": true, @@ -1003,6 +1021,20 @@ "tags": [] }, "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "3aaa4f09dc6e498c8df353d0bce3579e", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "mixed_pl_sfc.grib: 0%| | 0.00/390k [00:00 span {\n", @@ -1217,15 +1250,15 @@ "}\n", "\n", ".xr-dim-list:before {\n", - " content: '(';\n", + " content: \"(\";\n", "}\n", "\n", ".xr-dim-list:after {\n", - " content: ')';\n", + " content: \")\";\n", "}\n", "\n", ".xr-dim-list li:not(:last-child):after {\n", - " content: ',';\n", + " content: \",\";\n", " padding-right: 5px;\n", "}\n", "\n", @@ -1398,30 +1431,29 @@ " z700 (forecast_reference_time, step, latitude, longitude) float64 44kB ...\n", " z850 (forecast_reference_time, step, latitude, longitude) float64 44kB ...\n", "Attributes:\n", - " param: z\n", " class: od\n", " stream: oper\n", - " levtype: pl\n", " type: fc\n", " expver: 0001\n", + " date: 20240603\n", + " time: 0\n", " domain: g\n", " number: 0\n", - " levelist: 850\n", " Conventions: CF-1.8\n", - " institution: ECMWF
  • class :
    od
    stream :
    oper
    type :
    fc
    expver :
    0001
    date :
    20240603
    time :
    0
    domain :
    g
    number :
    0
    Conventions :
    CF-1.8
    institution :
    ECMWF
  • " ], "text/plain": [ " Size: 1MB\n", @@ -1447,20 +1479,19 @@ " z700 (forecast_reference_time, step, latitude, longitude) float64 44kB ...\n", " z850 (forecast_reference_time, step, latitude, longitude) float64 44kB ...\n", "Attributes:\n", - " param: z\n", " class: od\n", " stream: oper\n", - " levtype: pl\n", " type: fc\n", " expver: 0001\n", + " date: 20240603\n", + " time: 0\n", " domain: g\n", " number: 0\n", - " levelist: 850\n", " Conventions: CF-1.8\n", " institution: ECMWF" ] }, - "execution_count": 8, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -1501,7 +1532,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 4, "id": "e1bc4e80-2efc-460c-96b0-26553bed7591", "metadata": { "editable": true, @@ -1511,6 +1542,20 @@ "tags": [] }, "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "b3618ce82f674f7d9c28e3bc7cc87fa9", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "mixed_pl_sfc.grib: 0%| | 0.00/390k [00:00 span {\n", @@ -1725,15 +1771,15 @@ "}\n", "\n", ".xr-dim-list:before {\n", - " content: '(';\n", + " content: \"(\";\n", "}\n", "\n", ".xr-dim-list:after {\n", - " content: ')';\n", + " content: \")\";\n", "}\n", "\n", ".xr-dim-list li:not(:last-child):after {\n", - " content: ',';\n", + " content: \",\";\n", " padding-right: 5px;\n", "}\n", "\n", @@ -1906,30 +1952,29 @@ " z_700_pl (forecast_reference_time, step, latitude, longitude) float64 44kB ...\n", " z_850_pl (forecast_reference_time, step, latitude, longitude) float64 44kB ...\n", "Attributes:\n", - " param: z\n", " class: od\n", " stream: oper\n", - " levtype: pl\n", " type: fc\n", " expver: 0001\n", + " date: 20240603\n", + " time: 0\n", " domain: g\n", " number: 0\n", - " levelist: 850\n", " Conventions: CF-1.8\n", - " institution: ECMWF
  • class :
    od
    stream :
    oper
    type :
    fc
    expver :
    0001
    date :
    20240603
    time :
    0
    domain :
    g
    number :
    0
    Conventions :
    CF-1.8
    institution :
    ECMWF
  • " ], "text/plain": [ " Size: 1MB\n", @@ -1955,20 +2000,19 @@ " z_700_pl (forecast_reference_time, step, latitude, longitude) float64 44kB ...\n", " z_850_pl (forecast_reference_time, step, latitude, longitude) float64 44kB ...\n", "Attributes:\n", - " param: z\n", " class: od\n", " stream: oper\n", - " levtype: pl\n", " type: fc\n", " expver: 0001\n", + " date: 20240603\n", + " time: 0\n", " domain: g\n", " number: 0\n", - " levelist: 850\n", " Conventions: CF-1.8\n", " institution: ECMWF" ] }, - "execution_count": 10, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -1990,12 +2034,12 @@ "tags": [] }, "source": [ - "This technique is partuculary useful when the same parameter is available on multiple level types in the input data. In this case using \"param_level\" does not result in a full hypercube, however the same `remapping`` that we used above does." + "This technique is partuculary useful when the same parameter is available on multiple level types in the input data. In this case using \"param_level\" does not result in a full hypercube, however the same ``remapping`` that we used above does." ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 5, "id": "39cbb360-43c4-416e-956b-8b6cfadda26c", "metadata": { "editable": true, @@ -2005,6 +2049,20 @@ "tags": [] }, "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "ad9990991ee44e4b81fad30f4cef90e4", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "mixed_pl_ml.grib: 0%| | 0.00/176k [00:00 span {\n", @@ -2219,15 +2278,15 @@ "}\n", "\n", ".xr-dim-list:before {\n", - " content: '(';\n", + " content: \"(\";\n", "}\n", "\n", ".xr-dim-list:after {\n", - " content: ')';\n", + " content: \")\";\n", "}\n", "\n", ".xr-dim-list li:not(:last-child):after {\n", - " content: ',';\n", + " content: \",\";\n", " padding-right: 5px;\n", "}\n", "\n", @@ -2395,29 +2454,28 @@ " u_700_pl (forecast_reference_time, step, latitude, longitude) float64 44kB ...\n", " u_90_ml (forecast_reference_time, step, latitude, longitude) float64 44kB ...\n", "Attributes:\n", - " param: u\n", " class: od\n", " stream: oper\n", - " levtype: ml\n", " type: fc\n", " expver: 0001\n", + " date: 20240603\n", + " time: 0\n", " domain: g\n", - " levelist: 90\n", " Conventions: CF-1.8\n", - " institution: ECMWF
  • class :
    od
    stream :
    oper
    type :
    fc
    expver :
    0001
    date :
    20240603
    time :
    0
    domain :
    g
    Conventions :
    CF-1.8
    institution :
    ECMWF
  • " ], "text/plain": [ " Size: 351kB\n", @@ -2438,19 +2496,18 @@ " u_700_pl (forecast_reference_time, step, latitude, longitude) float64 44kB ...\n", " u_90_ml (forecast_reference_time, step, latitude, longitude) float64 44kB ...\n", "Attributes:\n", - " param: u\n", " class: od\n", " stream: oper\n", - " levtype: ml\n", " type: fc\n", " expver: 0001\n", + " date: 20240603\n", + " time: 0\n", " domain: g\n", - " levelist: 90\n", " Conventions: CF-1.8\n", " institution: ECMWF" ] }, - "execution_count": 11, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -2478,9 +2535,9 @@ ], "metadata": { "kernelspec": { - "display_name": "dev_ecc", + "display_name": "dev", "language": "python", - "name": "dev_ecc" + "name": "dev" }, "language_info": { "codemirror_mode": { @@ -2492,7 +2549,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.13" + "version": "3.11.12" } }, "nbformat": 4, diff --git a/docs/release_notes/version_0.15_updates.rst b/docs/release_notes/version_0.15_updates.rst index bc8a5d815..8fe6c0ef8 100644 --- a/docs/release_notes/version_0.15_updates.rst +++ b/docs/release_notes/version_0.15_updates.rst @@ -5,9 +5,36 @@ Version 0.15 Updates Version 0.15.0 =============== +Deprecations ++++++++++++++++++++ + +- :ref:`deprecated-ens-role` + Xarray engine ++++++++++++++++++++++++++++++ +Breaking changes +------------------- + +- Separated the dimension names from metadata keys used to generate the dimensions. Dimensions associated with the dimension roles are now taking the name of the dimension role, irrespective of the metadata key the dimension role is mapped to. E.g.: the "level_type" dimension role now generates a dimension called "level_type". Previously, the dimension name was the name of the associated metadata key: e.g. "levtype" in the :ref:`default ` profile and "typeOfLevel" in the :ref:`grib ` profile. The old behaviour can still be invoked by using the newly added ``keep_dim_role_names=False`` option. +- The ``step`` dimension role is now mapped to the ``step_timedelta`` metadata key, which is the ``datatime.timedelta`` representation of the ``"endStep"`` GRIB/metadata key. Previously, this role was mapped to the ``"step"`` key. Please note that due to this change when ``keep_dim_role_names=False`` is used the step dimension will be called "step_timedelta" instead of "step". + + +Other changes +------------------- + +- Allowed using mappings in the ``ensure_dims``, ``extra_dims`` and ``fixed_dims`` options to define both the name of the dimensions and the metadata keys to generate their values. Previously, these options only took a single/multiple metadata keys. E.g. both the options below will generate the "expver", "mars_stream" and "mars_class" dimensions using the "expver", "stream" and "class" metadata keys. + + .. code-block:: python + + extra_dims = ["expver", {"mars_stream": "stream"}, ("mars_class", "endStep")] + extra_dims = { + "expver": "expver", + "mars_stream": "stream", + "mars_class": "endStep", + } + + - Improved the serialisation of GRIB fieldlists to reduce memory usage when Xarray is generated with chunks (:pr:`700`). See the :ref:`/examples/xarray_engine_chunks.ipynb` notebook example. - TensorBackendArray, which implements the lazy loading of DataArrays in the Xarray engine, now uses a ``dask.utils.SerializableLock`` when accessing the data (:pr:`700`). - Enabled converting :ref:`data-sources-lod` fieldlists into Xarray (:pr:`701`). See the :ref:`/examples/list_of_dicts_to_xarray.ipynb` notebook example. diff --git a/src/earthkit/data/indexing/tensor.py b/src/earthkit/data/indexing/tensor.py index e706e84c7..4e6460eb5 100644 --- a/src/earthkit/data/indexing/tensor.py +++ b/src/earthkit/data/indexing/tensor.py @@ -443,28 +443,17 @@ def _subset(self, indexes): ds = self.source[tuple(dataset_indexes)] return self.from_tensor(self, ds, coords) - def make_valid_datetime(self, dims, dtype="datetime64[ns]"): - + def make_valid_datetime(self, dims_map, dtype="datetime64[ns]"): # TODO: make it more general - dims_opt = [ - ["base_datetime", "step"], - ["base_datetime"], - ["forecast_reference_time", "step"], - ["forecast_reference_time"], - ["date", "time", "step"], - ["date", "time"], - ["date", "step"], - ["time", "step"], - ["step"], - ] - # in the tensor the dims.coords are GRIB keys for k in ["valid_datetime", "valid_time"]: if k in self.user_coords: import datetime return (k,), [datetime.datetime.fromisoformat(x) for x in self.user_coords[k]] + # in the tensor the dims.coords are GRIB keys + # dims_map is a mapping from dim names to GRIB keys DIM_ROLES = { "forecast_reference_time": ("forecast_reference_time", "base_datetime"), "step": ("step_timedelta", "step", "ensStep", "stepRange"), @@ -472,9 +461,10 @@ def make_valid_datetime(self, dims, dtype="datetime64[ns]"): "time": ("time", "dataTime"), } + # map dim roles to keys available in the tensor keys = {} for k in DIM_ROLES: - for d in dims: + for d in dims_map: if d.name == k: keys[k] = d.key break @@ -484,7 +474,7 @@ def make_valid_datetime(self, dims, dtype="datetime64[ns]"): keys[k] = d break - dims_opt = [ + DIM_COMBINATIONS = [ ["forecast_reference_time", "step"], ["forecast_reference_time"], ["date", "time", "step"], @@ -494,59 +484,14 @@ def make_valid_datetime(self, dims, dtype="datetime64[ns]"): ["step"], ] - print(f"{keys=}") - for dims in dims_opt: + for dims in DIM_COMBINATIONS: if all(d in keys for d in dims): - print("Found dims:", dims) + dims_step = [keys[d] for d in dims] # use same dim order as in user_dims - dims = [keys[d] for d in dims] - dims = [d for d in dims if d in self.user_dims] + dims = [d for d in self.user_dims if d in dims_step] + assert len(dims) == len(dims_step), f"Duplicate dims in {dims}" other_dims = [d for d in self.user_dims if d not in dims] - print(f"{dims=} {other_dims=}") - if other_dims: - import datetime - - import numpy as np - - other_coords = { - k: next(iter(self.user_coords[k])) for k in other_dims if k in self.user_coords - } - vals = np.array( - [ - datetime.datetime.fromisoformat(x) - for x in self.source.sel(**other_coords).metadata("valid_datetime") - ], - dtype=dtype, - ) - - shape = tuple([self.user_dims[d] for d in dims]) - return tuple(dims), vals.reshape(shape) - else: - import datetime - - import numpy as np - - vals = np.array( - [datetime.datetime.fromisoformat(x) for x in self.source.metadata("valid_datetime")], - dtype=dtype, - ) - - shape = tuple([self.user_dims[d] for d in dims]) - return tuple(dims), vals.reshape(shape) - return None, None - - # print(f"{keys=}") - - # print(f"{keys=}") - - # print(f"{self.user_dims=}") - for dims in dims_opt: - if all(d in self.user_dims for d in dims): - # use same dim order as in user_dims - dims = [d for d in dims if d in self.user_dims] - other_dims = [d for d in self.user_dims if d not in dims] - # print(f"{dims=} {other_dims=}") if other_dims: import datetime diff --git a/src/earthkit/data/utils/xarray/dim.py b/src/earthkit/data/utils/xarray/dim.py index 1ff48a778..36951acce 100644 --- a/src/earthkit/data/utils/xarray/dim.py +++ b/src/earthkit/data/utils/xarray/dim.py @@ -99,7 +99,7 @@ def make_dim(owner, *args, name=None, key=None, **kwargs): if ck is not None: d = CompoundKeyDim(owner, ck) else: - print("args", args, "kwargs", kwargs, "name", name, "key", key) + # print("args", args, "kwargs", kwargs, "name", name, "key", key) d = OtherDim(owner, *args, name=name, key=key, **kwargs) return d @@ -667,7 +667,7 @@ def __init__( self.dims = dims - print(f"self.dims={self.dims}") + # LOG.debug(f"self.dims={self.dims}") # for d in self.dims.values(): # if d.name != d.key: diff --git a/src/earthkit/data/utils/xarray/profile.py b/src/earthkit/data/utils/xarray/profile.py index d160fad51..41a2e078c 100644 --- a/src/earthkit/data/utils/xarray/profile.py +++ b/src/earthkit/data/utils/xarray/profile.py @@ -332,9 +332,6 @@ def update(self, ds): assert self.variables assert self.variable_key not in self.dim_keys - print("UPDATE dims", self.dims.dims) - print("UPDATE dim_keys", self.dim_keys) - # print("UPDATE variable_key", self.variable_key) # print("UPDATE variables", self.variables) # print(" -> dim_keys", self.dim_keys) diff --git a/tests/xr_engine/test_xr_dims.py b/tests/xr_engine/test_xr_dims.py index a43c4a51d..922cfa9d7 100644 --- a/tests/xr_engine/test_xr_dims.py +++ b/tests/xr_engine/test_xr_dims.py @@ -245,6 +245,24 @@ def test_xr_dims_ds_lev(kwargs, var_key, variables, dim_keys): {"time_dim_mode": "raw", "ensure_dims": ["class", "step"], "keep_dim_role_names": False}, "param", ["r", "t"], + { + "class": ["od"], + "step": [0], + "date": ["20210101", "20210102"], + "time": ["12"], + "step_timedelta": [datetime.timedelta(hours=0)], + "levelist": [850, 1000], + "levtype": ["pl"], + }, + ), + ( + { + "time_dim_mode": "raw", + "ensure_dims": ["class", "step_timedelta"], + "keep_dim_role_names": False, + }, + "param", + ["r", "t"], { "class": ["od"], "date": ["20210101", "20210102"], diff --git a/tests/xr_engine/test_xr_remapping.py b/tests/xr_engine/test_xr_remapping.py index 859c51c6c..b491071ec 100644 --- a/tests/xr_engine/test_xr_remapping.py +++ b/tests/xr_engine/test_xr_remapping.py @@ -38,18 +38,52 @@ def test_xr_remapping_1(): @pytest.mark.cache -def test_xr_remapping_2(): +@pytest.mark.parametrize( + "kwargs,coords,dims", + [ + ( + dict( + dim_roles={"level": "_k"}, + level_dim_mode="level", + remapping={"_k": "{levelist}_{levtype}"}, + keep_dim_role_names=False, + ), + {"_k": ["500_pl", "700_pl"]}, + {"forecast_reference_time": 4, "step_timedelta": 2, "_k": 2, "latitude": 19, "longitude": 36}, + ), + ( + dict( + dim_roles={"level": "_k"}, + level_dim_mode="level", + remapping={"_k": "{levelist}_{levtype}"}, + keep_dim_role_names=True, + ), + {"level": ["500_pl", "700_pl"]}, + {"forecast_reference_time": 4, "step": 2, "level": 2, "latitude": 19, "longitude": 36}, + ), + ( + dict( + dim_roles={"level": "_k"}, + level_dim_mode="level", + remapping={"_k": "{levelist}_{levtype}"}, + rename_dims={"level": "_k"}, + keep_dim_role_names=True, + ), + {"_k": ["500_pl", "700_pl"]}, + {"forecast_reference_time": 4, "step": 2, "_k": 2, "latitude": 19, "longitude": 36}, + ), + ], +) +def test_xr_remapping_2(kwargs, coords, dims): ds0 = from_source("url", earthkit_remote_test_data_file("test-data/xr_engine/level/pl_small.grib")) - ds = ds0.to_xarray( - dim_roles={"level": "_k"}, level_dim_mode="level", remapping={"_k": "{levelist}_{levtype}"} - ) + ds = ds0.to_xarray(**kwargs) data_vars = ["r", "t"] assert [v for v in ds.data_vars] == data_vars - coords = {"_k": ["500_pl", "700_pl"]} + # coords = {"_k": ["500_pl", "700_pl"]} compare_coords(ds, coords) - dims = {"forecast_reference_time": 4, "step": 2, "_k": 2, "latitude": 19, "longitude": 36} + # dims = {"forecast_reference_time": 4, "step_timedelta": 2, "_k": 2, "latitude": 19, "longitude": 36} compare_dims(ds, dims, sizes=True) diff --git a/tests/xr_engine/test_xr_split.py b/tests/xr_engine/test_xr_split.py index abd42a4c0..db8c2d4e9 100644 --- a/tests/xr_engine/test_xr_split.py +++ b/tests/xr_engine/test_xr_split.py @@ -22,7 +22,7 @@ [ ( ["level", "pl.grib"], - {"time_dim_mode": "raw", "split_dims": ["step"]}, + {"time_dim_mode": "raw", "split_dims": ["step"], "keep_dim_role_names": False}, 2, ["2t", "msl", "r", "t"], ["date", "time", "levelist"], @@ -30,7 +30,12 @@ ), ( ["level", "pl.grib"], - {"time_dim_mode": "raw", "split_dims": ["step"], "ensure_dims": "step"}, + { + "time_dim_mode": "raw", + "split_dims": ["step"], + "ensure_dims": "step", + "keep_dim_role_names": False, + }, 2, ["2t", "msl", "r", "t"], ["date", "time", "step", "levelist"], @@ -38,7 +43,11 @@ ), ( ["cds-reanalysis-era5-single-levels-20230101-low-resol.grib"], - {"time_dim_mode": "valid_time", "split_dims": ["stream", "dataType", "edition", "Ni"]}, + { + "time_dim_mode": "valid_time", + "split_dims": ["stream", "dataType", "edition", "Ni"], + "keep_dim_role_names": False, + }, 11, None, ["valid_time"], @@ -56,6 +65,27 @@ {"stream": "wave", "dataType": "an", "edition": 1, "Ni": 18}, ], ), + ( + ["level", "pl.grib"], + {"time_dim_mode": "raw", "split_dims": ["step"], "keep_dim_role_names": True}, + 2, + ["2t", "msl", "r", "t"], + ["date", "time", "level"], + [{"step": 0}, {"step": 6}], + ), + ( + ["level", "pl.grib"], + { + "time_dim_mode": "raw", + "split_dims": ["step"], + "ensure_dims": "step", + "keep_dim_role_names": True, + }, + 2, + ["2t", "msl", "r", "t"], + ["date", "time", "step", "level"], + [{"step": 0}, {"step": 6}], + ), # ({"base_datetime_dim": True}, "param", ["r", "t"], ["levelist"]), # ({"squeeze": False}, "param", ["r", "t"], ["time", "step", "levelist"]), ], diff --git a/tests/xr_engine/test_xr_time.py b/tests/xr_engine/test_xr_time.py index 26b968f14..9560d720e 100644 --- a/tests/xr_engine/test_xr_time.py +++ b/tests/xr_engine/test_xr_time.py @@ -456,6 +456,29 @@ def test_xr_time_seasonal_monthly_simple(kwargs, dims, step_units): ], }, ("step", "hours"), + { + "valid_time": [ + [np.datetime64("2024-06-03T00", "ns"), np.datetime64("2024-06-03T12", "ns")], + [np.datetime64("2024-06-03T06", "ns"), np.datetime64("2024-06-03T18", "ns")], + ] + }, + ), + ( + { + "time_dim_mode": "forecast", + "add_valid_time_coord": True, + "decode_times": False, + "decode_timedelta": False, + "keep_dim_role_names": False, + }, + { + "forecast_reference_time": [ + np.datetime64("2024-06-03T00", "ns"), + np.datetime64("2024-06-03T12", "ns"), + ], + "step_timedelta": [0, 6], + }, + ("step_timedelta", "hours"), { "valid_time": [ [np.datetime64("2024-06-03T00", "ns"), np.datetime64("2024-06-03T06", "ns")], @@ -472,16 +495,14 @@ def test_xr_valid_time_coord(kwargs, dims, step_units, coords): ds = ds_ek.to_xarray(**kwargs) - print(ds) - compare_dims(ds, dims, order_ref_var="t") vt = ds.coords["valid_time"] - assert vt.dims == ("forecast_reference_time", "step") - - # ref = [ - # [np.datetime64("2024-06-03T00", "ns"), np.datetime64("2024-06-03T06", "ns")], - # [np.datetime64("2024-06-03T12", "ns"), np.datetime64("2024-06-03T18", "ns")], - # ] + assert vt.dims == tuple(dims.keys()) compare_coords(ds, coords) + + if step_units is not None: + assert ( + ds[step_units[0]].attrs["units"] == step_units[1] + ), f"step units mismatch {ds[step_units[0]].attrs['units']} != {step_units[1]}" diff --git a/tests/xr_engine/test_xr_write.py b/tests/xr_engine/test_xr_write.py index 118bb8c5e..687e039f2 100644 --- a/tests/xr_engine/test_xr_write.py +++ b/tests/xr_engine/test_xr_write.py @@ -211,6 +211,7 @@ def test_xr_write_seasonal(): ds = ds_ek.to_xarray( time_dim_mode="forecast", dim_roles={"date": "indexingDate", "time": "indexingTime", "step": "forecastMonth"}, + keep_dim_role_names=False, ) import xarray as xr From e7b2d415e980c352805b0fb664811c5421a73972 Mon Sep 17 00:00:00 2001 From: Sandor Kertesz Date: Mon, 9 Jun 2025 18:34:15 +0100 Subject: [PATCH 4/9] Handle time range --- docs/examples/index.rst | 2 + docs/examples/xarray_engine_ensemble.ipynb | 1162 ++++++++++++++ docs/examples/xarray_engine_squeeze.ipynb | 1497 +++++++++++++++++++ docs/guide/xarray/dim.rst | 110 ++ docs/guide/xarray/overview.rst | 11 +- docs/release_notes/version_0.15_updates.rst | 25 +- src/earthkit/data/core/index.py | 1 + src/earthkit/data/utils/xarray/fieldlist.py | 4 +- tests/xr_engine/test_xr_time.py | 70 + 9 files changed, 2876 insertions(+), 6 deletions(-) create mode 100644 docs/examples/xarray_engine_ensemble.ipynb create mode 100644 docs/examples/xarray_engine_squeeze.ipynb create mode 100644 docs/guide/xarray/dim.rst diff --git a/docs/examples/index.rst b/docs/examples/index.rst index f9f5af4a9..68875b017 100644 --- a/docs/examples/index.rst +++ b/docs/examples/index.rst @@ -159,11 +159,13 @@ Xarray engine xarray_engine_overview.ipynb xarray_engine_temporal.ipynb xarray_engine_level.ipynb + xarray_engine_ensemble.ipynb xarray_engine_variable_key.ipynb xarray_engine_field_dims.ipynb xarray_engine_to_grib.ipynb xarray_engine_split.ipynb xarray_engine_seasonal.ipynb + xarray_engine_squeeze.ipynb xarray_engine_chunks.ipynb Targets and encoders diff --git a/docs/examples/xarray_engine_ensemble.ipynb b/docs/examples/xarray_engine_ensemble.ipynb new file mode 100644 index 000000000..d75d1726a --- /dev/null +++ b/docs/examples/xarray_engine_ensemble.ipynb @@ -0,0 +1,1162 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "9ea9a922-c03f-43c1-aa83-b4ce321b75f5", + "metadata": {}, + "source": [ + "## Xarray engine: ensemble data" + ] + }, + { + "cell_type": "markdown", + "id": "ef0e9584-da7b-4461-804e-5785e494485e", + "metadata": {}, + "source": [ + "First, we get some ensemble forecast data and read it into a GRIB fieldlist." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "ecc9eac2-21fa-47a1-b1a4-37a26c980800", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "54e1f557c9b0480ba48f170084bede29", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "ens_cf_pf.grib: 0%| | 0.00/7.03k [00:00\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    centreshortNametypeOfLevelleveldataDatedataTimestepRangedataTypenumbergridType
    0ecmftisobaricInhPa5002024060300cf0regular_ll
    1ecmftisobaricInhPa5002024060306cf0regular_ll
    2ecmftisobaricInhPa5002024060300pf1regular_ll
    3ecmftisobaricInhPa5002024060300pf2regular_ll
    4ecmftisobaricInhPa5002024060306pf1regular_ll
    5ecmftisobaricInhPa5002024060306pf2regular_ll
    \n", + "" + ], + "text/plain": [ + " centre shortName typeOfLevel level dataDate dataTime stepRange \\\n", + "0 ecmf t isobaricInhPa 500 20240603 0 0 \n", + "1 ecmf t isobaricInhPa 500 20240603 0 6 \n", + "2 ecmf t isobaricInhPa 500 20240603 0 0 \n", + "3 ecmf t isobaricInhPa 500 20240603 0 0 \n", + "4 ecmf t isobaricInhPa 500 20240603 0 6 \n", + "5 ecmf t isobaricInhPa 500 20240603 0 6 \n", + "\n", + " dataType number gridType \n", + "0 cf 0 regular_ll \n", + "1 cf 0 regular_ll \n", + "2 pf 1 regular_ll \n", + "3 pf 2 regular_ll \n", + "4 pf 1 regular_ll \n", + "5 pf 2 regular_ll " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds_fl.ls()" + ] + }, + { + "cell_type": "raw", + "id": "48c853ab-abec-4475-818f-f64d8d3d01d8", + "metadata": { + "editable": true, + "raw_mimetype": "text/restructuredtext", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "When we convert GRIB data to Xarray with :py:meth:`~data.readers.grib.index.GribFieldList.to_xarray` the ensemble dimension is defined by the \"number\" :ref:`dimension role ` dimension role. By default, this role is using the \"number\" metadata key." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "7ae667a2-8e6b-4c6d-9cfe-b6bde40b2971", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
    \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
    <xarray.Dataset> Size: 33kB\n",
    +       "Dimensions:    (number: 3, step: 2, latitude: 19, longitude: 36)\n",
    +       "Coordinates:\n",
    +       "  * number     (number) int64 24B 0 1 2\n",
    +       "  * step       (step) timedelta64[ns] 16B 00:00:00 06:00:00\n",
    +       "  * latitude   (latitude) float64 152B 90.0 80.0 70.0 60.0 ... -70.0 -80.0 -90.0\n",
    +       "  * longitude  (longitude) float64 288B 0.0 10.0 20.0 30.0 ... 330.0 340.0 350.0\n",
    +       "Data variables:\n",
    +       "    t          (number, step, latitude, longitude) float64 33kB ...\n",
    +       "Attributes: (12/13)\n",
    +       "    param:        t\n",
    +       "    paramId:      130\n",
    +       "    class:        od\n",
    +       "    stream:       enfo\n",
    +       "    levtype:      pl\n",
    +       "    type:         cf\n",
    +       "    ...           ...\n",
    +       "    date:         20240603\n",
    +       "    time:         0\n",
    +       "    domain:       g\n",
    +       "    levelist:     500\n",
    +       "    Conventions:  CF-1.8\n",
    +       "    institution:  ECMWF
    " + ], + "text/plain": [ + " Size: 33kB\n", + "Dimensions: (number: 3, step: 2, latitude: 19, longitude: 36)\n", + "Coordinates:\n", + " * number (number) int64 24B 0 1 2\n", + " * step (step) timedelta64[ns] 16B 00:00:00 06:00:00\n", + " * latitude (latitude) float64 152B 90.0 80.0 70.0 60.0 ... -70.0 -80.0 -90.0\n", + " * longitude (longitude) float64 288B 0.0 10.0 20.0 30.0 ... 330.0 340.0 350.0\n", + "Data variables:\n", + " t (number, step, latitude, longitude) float64 33kB ...\n", + "Attributes: (12/13)\n", + " param: t\n", + " paramId: 130\n", + " class: od\n", + " stream: enfo\n", + " levtype: pl\n", + " type: cf\n", + " ... ...\n", + " date: 20240603\n", + " time: 0\n", + " domain: g\n", + " levelist: 500\n", + " Conventions: CF-1.8\n", + " institution: ECMWF" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds_fl.to_xarray()" + ] + }, + { + "cell_type": "markdown", + "id": "6f2a0d29-10bf-45ad-abf5-e457fd6f820d", + "metadata": { + "editable": true, + "raw_mimetype": "text/x-rst", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "This default can be overridden by specifying custom ``dim_roles``. E.g. to get the ensemble member number from the \"perturbatioNumber\" key we can use:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "ba4a3746-3549-494b-8272-df0b03d6936c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
    \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
    <xarray.Dataset> Size: 33kB\n",
    +       "Dimensions:    (number: 3, step: 2, latitude: 19, longitude: 36)\n",
    +       "Coordinates:\n",
    +       "  * number     (number) int64 24B 0 1 2\n",
    +       "  * step       (step) timedelta64[ns] 16B 00:00:00 06:00:00\n",
    +       "  * latitude   (latitude) float64 152B 90.0 80.0 70.0 60.0 ... -70.0 -80.0 -90.0\n",
    +       "  * longitude  (longitude) float64 288B 0.0 10.0 20.0 30.0 ... 330.0 340.0 350.0\n",
    +       "Data variables:\n",
    +       "    t          (number, step, latitude, longitude) float64 33kB ...\n",
    +       "Attributes: (12/14)\n",
    +       "    param:        t\n",
    +       "    paramId:      130\n",
    +       "    class:        od\n",
    +       "    stream:       enfo\n",
    +       "    levtype:      pl\n",
    +       "    type:         cf\n",
    +       "    ...           ...\n",
    +       "    time:         0\n",
    +       "    domain:       g\n",
    +       "    number:       0\n",
    +       "    levelist:     500\n",
    +       "    Conventions:  CF-1.8\n",
    +       "    institution:  ECMWF
    " + ], + "text/plain": [ + " Size: 33kB\n", + "Dimensions: (number: 3, step: 2, latitude: 19, longitude: 36)\n", + "Coordinates:\n", + " * number (number) int64 24B 0 1 2\n", + " * step (step) timedelta64[ns] 16B 00:00:00 06:00:00\n", + " * latitude (latitude) float64 152B 90.0 80.0 70.0 60.0 ... -70.0 -80.0 -90.0\n", + " * longitude (longitude) float64 288B 0.0 10.0 20.0 30.0 ... 330.0 340.0 350.0\n", + "Data variables:\n", + " t (number, step, latitude, longitude) float64 33kB ...\n", + "Attributes: (12/14)\n", + " param: t\n", + " paramId: 130\n", + " class: od\n", + " stream: enfo\n", + " levtype: pl\n", + " type: cf\n", + " ... ...\n", + " time: 0\n", + " domain: g\n", + " number: 0\n", + " levelist: 500\n", + " Conventions: CF-1.8\n", + " institution: ECMWF" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds_fl.to_xarray(dim_roles={\"number\": \"perturbationNumber\"})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "596c4426-32d7-4766-a640-4436da604918", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "dev", + "language": "python", + "name": "dev" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/examples/xarray_engine_squeeze.ipynb b/docs/examples/xarray_engine_squeeze.ipynb new file mode 100644 index 000000000..c32360896 --- /dev/null +++ b/docs/examples/xarray_engine_squeeze.ipynb @@ -0,0 +1,1497 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "c2feafcc-430b-4718-983f-554e55dcd54a", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Xarray engine: sqeezing dimensions" + ] + }, + { + "cell_type": "markdown", + "id": "f1b37637-7cce-4af5-8bad-1ddb6492d732", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "First, we get some GRIB forecast data on pressure levels and read it into a GRIB fieldlist." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "1a6e355d-3fbf-4d92-b32f-a9d7e770f9db", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "fbbb4422431d4d75aad6e3a4bd7d20d4", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "pl.grib: 0%| | 0.00/48.8k [00:00\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
    <xarray.Dataset> Size: 176kB\n",
    +       "Dimensions:                  (forecast_reference_time: 4, step: 2, level: 2,\n",
    +       "                              latitude: 19, longitude: 36)\n",
    +       "Coordinates:\n",
    +       "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 32B 202...\n",
    +       "  * step                     (step) timedelta64[ns] 16B 00:00:00 06:00:00\n",
    +       "  * level                    (level) int64 16B 500 700\n",
    +       "  * latitude                 (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n",
    +       "  * longitude                (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n",
    +       "Data variables:\n",
    +       "    r                        (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n",
    +       "    t                        (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n",
    +       "Attributes:\n",
    +       "    class:        od\n",
    +       "    stream:       oper\n",
    +       "    levtype:      pl\n",
    +       "    type:         fc\n",
    +       "    expver:       0001\n",
    +       "    date:         20240603\n",
    +       "    time:         0\n",
    +       "    domain:       g\n",
    +       "    number:       0\n",
    +       "    Conventions:  CF-1.8\n",
    +       "    institution:  ECMWF
    " + ], + "text/plain": [ + " Size: 176kB\n", + "Dimensions: (forecast_reference_time: 4, step: 2, level: 2,\n", + " latitude: 19, longitude: 36)\n", + "Coordinates:\n", + " * forecast_reference_time (forecast_reference_time) datetime64[ns] 32B 202...\n", + " * step (step) timedelta64[ns] 16B 00:00:00 06:00:00\n", + " * level (level) int64 16B 500 700\n", + " * latitude (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n", + " * longitude (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n", + "Data variables:\n", + " r (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n", + " t (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n", + "Attributes:\n", + " class: od\n", + " stream: oper\n", + " levtype: pl\n", + " type: fc\n", + " expver: 0001\n", + " date: 20240603\n", + " time: 0\n", + " domain: g\n", + " number: 0\n", + " Conventions: CF-1.8\n", + " institution: ECMWF" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds_fl.to_xarray()" + ] + }, + { + "cell_type": "markdown", + "id": "5e22a7d6-8f86-454a-b7e7-fca4273cb493", + "metadata": {}, + "source": [ + "When using ``squeeze=True`` these dimension are added to the dataset." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "e19e154e-89ac-4d5c-a82c-bd6227bc94f6", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
    \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
    <xarray.Dataset> Size: 176kB\n",
    +       "Dimensions:                  (number: 1, forecast_reference_time: 4, step: 2,\n",
    +       "                              level: 2, level_type: 1, latitude: 19,\n",
    +       "                              longitude: 36)\n",
    +       "Coordinates:\n",
    +       "  * number                   (number) int64 8B 0\n",
    +       "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 32B 202...\n",
    +       "  * step                     (step) timedelta64[ns] 16B 00:00:00 06:00:00\n",
    +       "  * level                    (level) int64 16B 500 700\n",
    +       "  * level_type               (level_type) <U2 8B 'pl'\n",
    +       "  * latitude                 (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n",
    +       "  * longitude                (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n",
    +       "Data variables:\n",
    +       "    r                        (number, forecast_reference_time, step, level, level_type, latitude, longitude) float64 88kB ...\n",
    +       "    t                        (number, forecast_reference_time, step, level, level_type, latitude, longitude) float64 88kB ...\n",
    +       "Attributes:\n",
    +       "    class:        od\n",
    +       "    stream:       oper\n",
    +       "    type:         fc\n",
    +       "    expver:       0001\n",
    +       "    date:         20240603\n",
    +       "    time:         0\n",
    +       "    domain:       g\n",
    +       "    Conventions:  CF-1.8\n",
    +       "    institution:  ECMWF
    " + ], + "text/plain": [ + " Size: 176kB\n", + "Dimensions: (number: 1, forecast_reference_time: 4, step: 2,\n", + " level: 2, level_type: 1, latitude: 19,\n", + " longitude: 36)\n", + "Coordinates:\n", + " * number (number) int64 8B 0\n", + " * forecast_reference_time (forecast_reference_time) datetime64[ns] 32B 202...\n", + " * step (step) timedelta64[ns] 16B 00:00:00 06:00:00\n", + " * level (level) int64 16B 500 700\n", + " * level_type (level_type) \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
    <xarray.Dataset> Size: 176kB\n",
    +       "Dimensions:                  (number: 1, forecast_reference_time: 4, step: 2,\n",
    +       "                              level: 2, latitude: 19, longitude: 36)\n",
    +       "Coordinates:\n",
    +       "  * number                   (number) int64 8B 0\n",
    +       "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 32B 202...\n",
    +       "  * step                     (step) timedelta64[ns] 16B 00:00:00 06:00:00\n",
    +       "  * level                    (level) int64 16B 500 700\n",
    +       "  * latitude                 (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n",
    +       "  * longitude                (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n",
    +       "Data variables:\n",
    +       "    r                        (number, forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n",
    +       "    t                        (number, forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n",
    +       "Attributes:\n",
    +       "    class:        od\n",
    +       "    stream:       oper\n",
    +       "    levtype:      pl\n",
    +       "    type:         fc\n",
    +       "    expver:       0001\n",
    +       "    date:         20240603\n",
    +       "    time:         0\n",
    +       "    domain:       g\n",
    +       "    Conventions:  CF-1.8\n",
    +       "    institution:  ECMWF
    " + ], + "text/plain": [ + " Size: 176kB\n", + "Dimensions: (number: 1, forecast_reference_time: 4, step: 2,\n", + " level: 2, latitude: 19, longitude: 36)\n", + "Coordinates:\n", + " * number (number) int64 8B 0\n", + " * forecast_reference_time (forecast_reference_time) datetime64[ns] 32B 202...\n", + " * step (step) timedelta64[ns] 16B 00:00:00 06:00:00\n", + " * level (level) int64 16B 500 700\n", + " * latitude (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n", + " * longitude (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n", + "Data variables:\n", + " r (number, forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n", + " t (number, forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n", + "Attributes:\n", + " class: od\n", + " stream: oper\n", + " levtype: pl\n", + " type: fc\n", + " expver: 0001\n", + " date: 20240603\n", + " time: 0\n", + " domain: g\n", + " Conventions: CF-1.8\n", + " institution: ECMWF" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds_fl.to_xarray(ensure_dims=[\"number\", \"level_type\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a6937b30-029a-4b2a-8391-ed4f5ac8eeae", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "dev", + "language": "python", + "name": "dev" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/guide/xarray/dim.rst b/docs/guide/xarray/dim.rst new file mode 100644 index 000000000..2e1c39904 --- /dev/null +++ b/docs/guide/xarray/dim.rst @@ -0,0 +1,110 @@ +.. _xr_dim: + +Dimensions +================== + +One of the most important aspect of the :ref:`xr_engine` is how it generates dimensions in the Xarray dataset. + +.. _xr_dim_roles: +.. _xr_predefined_dims: + + +Predefined dimensions and dimension roles +------------------------------------------- + +By default, a list of predefined dimensions are generated. Their order is fixed: + +- ensemble forecast member dimension +- temporal dimensions (controlled by ``time_dim_mode``) +- vertical dimensions (controlled by ``level_dim_mode``) + +The predefined dimensions are based on the ``dim_roles``, which is a mapping between the "roles" and the metadata keys associated with the roles. +The possible roles are as follows: + +.. list-table:: Default dimension roles + :header-rows: 1 + + * - Dimension role + - Description + - Key (profile: :ref:`mars `) + - Key (profile: :ref:`grib `) + * - "number" + - metadata key interpreted as ensemble forecast members + - "number" + - "number" + * - "date" + - metadata key interpreted as date part of the "forecast_reference_time" + - "date" + - "date" + * - "time" + - metadata key interpreted as time part of the "forecast_reference_time" + - "time" + - "time" + * - "step" + - metadata key interpreted as forecast step + - "step_timedelta" + - "step_timedelta" + * - "forecast_reference_time" + - if not specified or None or empty the forecast reference time is built using the "date" and "time" roles + - None + - None + * - "valid_time" + - if not specified or None or empty the valid time is built using the "validityDate" and "validityTime" metadata keys + - None + - None + * - "level" + - metadata key interpreted as level + - "levelist" + - "level" + * - "level_type" + - metadata key interpreted as level type + - "levtype" + - "typeOfLevel" + +By default, the dimension names are the same as the role names. To use the associated metadata keys instead use the ``keep_dim_role_names=False`` option. + +the metadata keys. However, this can be controlled with the ``keep_dim_role_names`` option. If set to ``False``, the dimension names will be the same as the dimension roles. This is useful when you want to use the dimension roles in your code, as they are more descriptive than the metadata keys. + +.. note:: + + For GRIB data, "step_timedelta" is a generated metadata key (by earthkit-data), which is the representation of the value of the "endStep" key as a `datetime.timedelta`. + + +Dimension modes +---------------------- + +The ``time_dim_mode`` and ``level_dim_mode`` options control how the temporal and vertical dimensions are generated in the Xarray dataset using ``dim_roles``. See the following notebooks for examples of how these modes work: + +``time_dim_mode``: + +- :ref:`/examples/xr_engine_temporal.ipynb` +- :ref:`/examples/xr_engine_seasonal.ipynb` + + +``level_dim_mode``: +- :ref:`/examples/xr_engine_level.ipynb` + + +Squeezing/ensuring dimensions +---------------------------------- + +By default, the dimensions are squeezed. This means that if a dimension has only one value, it is removed from the dataset. This can be controlled with the ``squeeze`` option. Alternatively, the ``ensure_dims`` option can be used to ensure that certain dimensions are always present in the dataset, even if they have only one value. This is useful when you want to keep the dimensions for consistency or for further processing. + +See the following notebooks for examples of how this works: + +- :ref:`/examples/xr_engine_squeeze.ipynb` + + +Extra dimensions +---------------------- + +The ``extra_dims`` option allows to add extra dimensions to the Xarray dataset on top of the predefined ones. E.g. + + + +Fixed dimensions +---------------------- + + +Split dimensions +---------------------- diff --git a/docs/guide/xarray/overview.rst b/docs/guide/xarray/overview.rst index aa8031cb6..1c9e726ee 100644 --- a/docs/guide/xarray/overview.rst +++ b/docs/guide/xarray/overview.rst @@ -25,12 +25,12 @@ We can convert :ref:`grib` data into an Xarray dataset by using :py:meth:`~data. Coordinates: * forecast_reference_time (forecast_reference_time) datetime64[ns] 32B 202... * step (step) timedelta64[ns] 16B 00:00:00 06:00:00 - * levelist (levelist) int64 16B 500 700 + * level (level) int64 16B 500 700 * latitude (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0 * longitude (longitude) float64 288B 0.0 10.0 ... 340.0 350.0 Data variables: - r (forecast_reference_time, step, levelist, latitude, longitude) float64 88kB ... - t (forecast_reference_time, step, levelist, latitude, longitude) float64 88kB ... + r (forecast_reference_time, step, level, latitude, longitude) float64 88kB ... + t (forecast_reference_time, step, level, latitude, longitude) float64 88kB ... ... .. note:: @@ -51,6 +51,11 @@ We can also use the Xarray engine to read GRIB data directly with the :py:func:` Size: 176kB ... +Dimensions +++++++++++ + +The generated Xarray dataset will have the following dimensions: + Profiles +++++++++ diff --git a/docs/release_notes/version_0.15_updates.rst b/docs/release_notes/version_0.15_updates.rst index 8fe6c0ef8..45bd3d080 100644 --- a/docs/release_notes/version_0.15_updates.rst +++ b/docs/release_notes/version_0.15_updates.rst @@ -16,7 +16,30 @@ Xarray engine Breaking changes ------------------- -- Separated the dimension names from metadata keys used to generate the dimensions. Dimensions associated with the dimension roles are now taking the name of the dimension role, irrespective of the metadata key the dimension role is mapped to. E.g.: the "level_type" dimension role now generates a dimension called "level_type". Previously, the dimension name was the name of the associated metadata key: e.g. "levtype" in the :ref:`default ` profile and "typeOfLevel" in the :ref:`grib ` profile. The old behaviour can still be invoked by using the newly added ``keep_dim_role_names=False`` option. +- Separated the dimension names from the metadata keys used to generate the dimensions. Dimensions associated with the dimension roles are now taking the name of the dimension role, irrespective of the metadata key the dimension role is mapped to. E.g.: the "level_type" dimension role now generates a dimension called "level_type". Previously, the dimension name was the name of the associated metadata key: e.g. it was "levtype" in the :ref:`default ` profile. The old behaviour can still be invoked by using the newly added ``keep_dim_role_names=False`` option. + +.. list-table:: Dimension roles and their associated metadata keys + :header-rows: 1 + + * - Dimension role + - Pow + - Previously + * - Dimension role" + - "level_type" + - "level_type" + * - "Dimension name" + - "stream" + * - "mars_class" + - "class" + * - "mars_typeOfLevel" + - "typeOfLevel" + * - "mars_level_type" + - "levtype" + * - "mars_step_timedelta" + - "endStep" + * - "mars_step" + + - The ``step`` dimension role is now mapped to the ``step_timedelta`` metadata key, which is the ``datatime.timedelta`` representation of the ``"endStep"`` GRIB/metadata key. Previously, this role was mapped to the ``"step"`` key. Please note that due to this change when ``keep_dim_role_names=False`` is used the step dimension will be called "step_timedelta" instead of "step". diff --git a/src/earthkit/data/core/index.py b/src/earthkit/data/core/index.py index c13cf508d..66f2f1543 100644 --- a/src/earthkit/data/core/index.py +++ b/src/earthkit/data/core/index.py @@ -115,6 +115,7 @@ def compare_elements(self, a, b): for k, v in self.actions.items(): n = v(a_metadata(k, default=None), b_metadata(k, default=None)) + print(f"Comparing {k}: {a_metadata(k, default=None)} vs {b_metadata(k, default=None)} -> {n}") if n != 0: return n return 0 diff --git a/src/earthkit/data/utils/xarray/fieldlist.py b/src/earthkit/data/utils/xarray/fieldlist.py index 474cd62e5..c4d6c441e 100644 --- a/src/earthkit/data/utils/xarray/fieldlist.py +++ b/src/earthkit/data/utils/xarray/fieldlist.py @@ -7,7 +7,7 @@ # nor does it submit to any jurisdiction. # - +import datetime import logging from collections import defaultdict @@ -210,7 +210,7 @@ def unique_values(self, names, component=False): for k, v in vals.items(): v = [x for x in v if x is not None] - if all(isinstance(x, int) for x in v): + if all(isinstance(x, (int, datetime.timedelta)) for x in v): vals[k] = sorted(v) else: vals[k] = sorted(v, key=str) diff --git a/tests/xr_engine/test_xr_time.py b/tests/xr_engine/test_xr_time.py index 9560d720e..eb5d0ecb7 100644 --- a/tests/xr_engine/test_xr_time.py +++ b/tests/xr_engine/test_xr_time.py @@ -506,3 +506,73 @@ def test_xr_valid_time_coord(kwargs, dims, step_units, coords): assert ( ds[step_units[0]].attrs["units"] == step_units[1] ), f"step units mismatch {ds[step_units[0]].attrs['units']} != {step_units[1]}" + + +@pytest.mark.cache +@pytest.mark.parametrize( + "kwargs,dims,step_units", + [ + ( + { + "time_dim_mode": "raw", + "keep_dim_role_names": True, + "ensure_dims": ["date", "time", "step"], + }, + { + "date": [np.datetime64("2011-12-15", "ns")], + "time": [np.timedelta64(12, "h")], + "step": [ + np.timedelta64(12, "h"), + np.timedelta64(18, "h"), + np.timedelta64(24, "h"), + np.timedelta64(30, "h"), + np.timedelta64(36, "h"), + ], + }, + None, + ), + ], +) +def test_xr_time_step_range_1(kwargs, dims, step_units): + ds_ek = from_source( + "url", earthkit_remote_test_data_file("test-data/xr_engine/date/wgust_step_range.grib1") + ) + + ds = ds_ek.to_xarray(**kwargs) + compare_dims(ds, dims, order_ref_var="10fg6") + + if step_units is not None: + assert ( + ds[step_units[0]].attrs["units"] == step_units[1] + ), f"step units mismatch {ds[step_units[0]].attrs['units']} != {step_units[1]}" + + +@pytest.mark.cache +@pytest.mark.parametrize( + "kwargs,dims,step_units", + [ + ( + { + "time_dim_mode": "raw", + "keep_dim_role_names": True, + "ensure_dims": ["date", "time", "step"], + }, + { + "date": [np.datetime64("2025-05-27", "ns")], + "time": [np.timedelta64(0, "ns")], + "step": [np.timedelta64(72, "h")], + }, + None, + ), + ], +) +def test_xr_time_step_range_2(kwargs, dims, step_units): + ds_ek = from_source("url", earthkit_remote_test_data_file("test-data/xr_engine/date/tp_step_range.grib2")) + + ds = ds_ek.to_xarray(**kwargs) + compare_dims(ds, dims, order_ref_var="lsp") + + if step_units is not None: + assert ( + ds[step_units[0]].attrs["units"] == step_units[1] + ), f"step units mismatch {ds[step_units[0]].attrs['units']} != {step_units[1]}" From 5ce22ffd804aceea1da8e17dd1178faf349846d2 Mon Sep 17 00:00:00 2001 From: Sandor Kertesz Date: Tue, 10 Jun 2025 13:34:20 +0100 Subject: [PATCH 5/9] Handle step range --- docs/examples/index.rst | 3 +- docs/examples/xarray_engine_ensemble.ipynb | 63 +- docs/examples/xarray_engine_seasonal.ipynb | 2402 ++++++++--------- docs/examples/xarray_engine_step_ranges.ipynb | 671 +++++ docs/guide/xarray/dim.rst | 6 +- docs/guide/xarray/overview.rst | 2 +- docs/release_notes/deprecations.rst | 28 + .../include/deprec_ens_dim_role.py | 7 + .../include/migrated_ens_dim_role.py | 7 + docs/release_notes/version_0.15_updates.rst | 43 +- src/earthkit/data/core/index.py | 1 - src/earthkit/data/utils/xarray/defaults.yaml | 2 +- src/earthkit/data/utils/xarray/dim.py | 10 +- src/earthkit/data/utils/xarray/engine.py | 69 +- src/earthkit/data/utils/xarray/profile.py | 2 +- tests/xr_engine/test_xr_attrs.py | 10 +- tests/xr_engine/test_xr_dims.py | 43 +- tests/xr_engine/test_xr_engine.py | 12 +- tests/xr_engine/test_xr_level.py | 42 +- tests/xr_engine/test_xr_remapping.py | 6 +- tests/xr_engine/test_xr_split.py | 10 +- tests/xr_engine/test_xr_time.py | 50 +- tests/xr_engine/test_xr_write.py | 2 +- 23 files changed, 2131 insertions(+), 1360 deletions(-) create mode 100644 docs/examples/xarray_engine_step_ranges.ipynb create mode 100644 docs/release_notes/include/deprec_ens_dim_role.py create mode 100644 docs/release_notes/include/migrated_ens_dim_role.py diff --git a/docs/examples/index.rst b/docs/examples/index.rst index 68875b017..c945dddd4 100644 --- a/docs/examples/index.rst +++ b/docs/examples/index.rst @@ -158,13 +158,14 @@ Xarray engine xarray_engine_overview.ipynb xarray_engine_temporal.ipynb + xarray_engine_step_ranges.ipynb + xarray_engine_seasonal.ipynb xarray_engine_level.ipynb xarray_engine_ensemble.ipynb xarray_engine_variable_key.ipynb xarray_engine_field_dims.ipynb xarray_engine_to_grib.ipynb xarray_engine_split.ipynb - xarray_engine_seasonal.ipynb xarray_engine_squeeze.ipynb xarray_engine_chunks.ipynb diff --git a/docs/examples/xarray_engine_ensemble.ipynb b/docs/examples/xarray_engine_ensemble.ipynb index d75d1726a..c2c296da9 100644 --- a/docs/examples/xarray_engine_ensemble.ipynb +++ b/docs/examples/xarray_engine_ensemble.ipynb @@ -3,7 +3,13 @@ { "cell_type": "markdown", "id": "9ea9a922-c03f-43c1-aa83-b4ce321b75f5", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "source": [ "## Xarray engine: ensemble data" ] @@ -11,9 +17,15 @@ { "cell_type": "markdown", "id": "ef0e9584-da7b-4461-804e-5785e494485e", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "source": [ - "First, we get some ensemble forecast data and read it into a GRIB fieldlist." + "Get input GRIB ensemble forecast data." ] }, { @@ -31,7 +43,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "54e1f557c9b0480ba48f170084bede29", + "model_id": "fe83d176cacd410da62e44b2ac16e0b2", "version_major": 2, "version_minor": 0 }, @@ -53,14 +65,21 @@ "id": "b36e3ccd-d616-4b51-bfc5-8e42c34312d6", "metadata": {}, "source": [ - "The data contains 3 ensemble members: one control member and 2 perturbed members." + "The data contains 3 ensemble members: 1 control and 2 perturbed members." ] }, { "cell_type": "code", "execution_count": 2, - "id": "cea9edf5-8367-49a9-a029-05b9203f7abc", - "metadata": {}, + "id": "6d754b0e-6717-425a-9d48-0ab8ed20b171", + "metadata": { + "editable": true, + "raw_mimetype": "text/restructuredtext", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "outputs": [ { "data": { @@ -217,7 +236,7 @@ "tags": [] }, "source": [ - "When we convert GRIB data to Xarray with :py:meth:`~data.readers.grib.index.GribFieldList.to_xarray` the ensemble dimension is defined by the \"number\" :ref:`dimension role ` dimension role. By default, this role is using the \"number\" metadata key." + "When we convert GRIB data to Xarray with :py:meth:`~data.readers.grib.index.GribFieldList.to_xarray` the ensemble dimension is defined by the \"number\" :ref:`dimension role `. By default, this role is using the \"number\" metadata key." ] }, { @@ -621,16 +640,16 @@ " domain: g\n", " levelist: 500\n", " Conventions: CF-1.8\n", - " institution: ECMWF
  • param :
    t
    paramId :
    130
    class :
    od
    stream :
    enfo
    levtype :
    pl
    type :
    cf
    expver :
    0001
    date :
    20240603
    time :
    0
    domain :
    g
    levelist :
    500
    Conventions :
    CF-1.8
    institution :
    ECMWF
  • " ], "text/plain": [ " Size: 33kB\n", @@ -672,19 +691,19 @@ "id": "6f2a0d29-10bf-45ad-abf5-e457fd6f820d", "metadata": { "editable": true, - "raw_mimetype": "text/x-rst", + "raw_mimetype": "text/restructuredtext", "slideshow": { "slide_type": "" }, "tags": [] }, "source": [ - "This default can be overridden by specifying custom ``dim_roles``. E.g. to get the ensemble member number from the \"perturbatioNumber\" key we can use:" + "This default behaviour can be overridden by specifying custom ``dim_roles``. E.g. to get the ensemble member number from the \"perturbatioNumber\" key we can use:" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "id": "ba4a3746-3549-494b-8272-df0b03d6936c", "metadata": {}, "outputs": [ @@ -1083,16 +1102,16 @@ " number: 0\n", " levelist: 500\n", " Conventions: CF-1.8\n", - " institution: ECMWF
  • param :
    t
    paramId :
    130
    class :
    od
    stream :
    enfo
    levtype :
    pl
    type :
    cf
    expver :
    0001
    date :
    20240603
    time :
    0
    domain :
    g
    number :
    0
    levelist :
    500
    Conventions :
    CF-1.8
    institution :
    ECMWF
  • " ], "text/plain": [ " Size: 33kB\n", @@ -1120,7 +1139,7 @@ " institution: ECMWF" ] }, - "execution_count": 5, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } diff --git a/docs/examples/xarray_engine_seasonal.ipynb b/docs/examples/xarray_engine_seasonal.ipynb index 0727d75d5..52a0a0b1d 100644 --- a/docs/examples/xarray_engine_seasonal.ipynb +++ b/docs/examples/xarray_engine_seasonal.ipynb @@ -1,1204 +1,1204 @@ { - "cells": [ - { - "cell_type": "markdown", - "id": "55f1f7bf-9589-4a43-b246-7c4c7880fa2d", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" + "cells": [ + { + "cell_type": "markdown", + "id": "55f1f7bf-9589-4a43-b246-7c4c7880fa2d", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Xarray engine: seasonal forecast" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "7e0be52c-bedb-4ae7-984c-4807bf253d7f", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "653a95e071ca4633aadbe42f597676a9", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "seasonal_monthly.grib: 0%| | 0.00/160k [00:00\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    centreshortNametypeOfLevelleveldataDatedataTimestepRangedataTypenumbergridTypeforecastMonth
    0lfpw2tsurface0199310010744fcmean0regular_ll1
    1lfpw2tsurface0199310010744fcmean1regular_ll1
    2lfpw2tsurface0199310010744fcmean2regular_ll1
    3lfpw2tsurface01993100101464fcmean0regular_ll2
    \n", + "" + ], + "text/plain": [ + " centre shortName typeOfLevel level dataDate dataTime stepRange dataType \\\n", + "0 lfpw 2t surface 0 19931001 0 744 fcmean \n", + "1 lfpw 2t surface 0 19931001 0 744 fcmean \n", + "2 lfpw 2t surface 0 19931001 0 744 fcmean \n", + "3 lfpw 2t surface 0 19931001 0 1464 fcmean \n", + "\n", + " number gridType forecastMonth \n", + "0 0 regular_ll 1 \n", + "1 1 regular_ll 1 \n", + "2 2 regular_ll 1 \n", + "3 0 regular_ll 2 " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds_fl[0:4].ls(extra_keys=\"forecastMonth\")" + ] + }, + { + "cell_type": "raw", + "id": "665fba14-79d5-4344-84fb-2e16da77936d", + "metadata": { + "editable": true, + "raw_mimetype": "text/restructuredtext", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "In order to use ``forecastMonth`` instead of ``step`` we need to use the ``dim_roles`` option in :py:meth:`~data.readers.grib.index.GribFieldList.to_xarray`." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "c500c39a-8cdf-4e25-950e-581924879e6c", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
    \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
    <xarray.Dataset> Size: 395kB\n",
    +                            "Dimensions:                  (number: 3, forecast_reference_time: 4, step: 6,\n",
    +                            "                              latitude: 19, longitude: 36)\n",
    +                            "Coordinates:\n",
    +                            "  * number                   (number) int64 24B 0 1 2\n",
    +                            "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 32B 199...\n",
    +                            "  * step                     (step) int64 48B 1 2 3 4 5 6\n",
    +                            "  * latitude                 (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n",
    +                            "  * longitude                (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n",
    +                            "Data variables:\n",
    +                            "    2t                       (number, forecast_reference_time, step, latitude, longitude) float64 394kB ...\n",
    +                            "Attributes: (12/15)\n",
    +                            "    param:        2t\n",
    +                            "    paramId:      167\n",
    +                            "    class:        c3\n",
    +                            "    stream:       msmm\n",
    +                            "    levtype:      sfc\n",
    +                            "    type:         fcmean\n",
    +                            "    ...           ...\n",
    +                            "    fcmonth:      1\n",
    +                            "    origin:       lfpw\n",
    +                            "    domain:       g\n",
    +                            "    method:       1\n",
    +                            "    Conventions:  CF-1.8\n",
    +                            "    institution:  ECMWF
    " + ], + "text/plain": [ + " Size: 395kB\n", + "Dimensions: (number: 3, forecast_reference_time: 4, step: 6,\n", + " latitude: 19, longitude: 36)\n", + "Coordinates:\n", + " * number (number) int64 24B 0 1 2\n", + " * forecast_reference_time (forecast_reference_time) datetime64[ns] 32B 199...\n", + " * step (step) int64 48B 1 2 3 4 5 6\n", + " * latitude (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n", + " * longitude (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n", + "Data variables:\n", + " 2t (number, forecast_reference_time, step, latitude, longitude) float64 394kB ...\n", + "Attributes: (12/15)\n", + " param: 2t\n", + " paramId: 167\n", + " class: c3\n", + " stream: msmm\n", + " levtype: sfc\n", + " type: fcmean\n", + " ... ...\n", + " fcmonth: 1\n", + " origin: lfpw\n", + " domain: g\n", + " method: 1\n", + " Conventions: CF-1.8\n", + " institution: ECMWF" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds = ds_fl.to_xarray(time_dim_mode=\"forecast\", \n", + " dim_roles={\"step\": \"forecastMonth\"})\n", + "ds" + ] + }, + { + "cell_type": "markdown", + "id": "e917dbc1-ba05-4180-b1d8-62e04bf98d50", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "When we check the \"step\" dimension we can see its units are \"months\"." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "850836de-db60-48ac-b42b-253ab335ceef", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Size: 48B\n", + "array([1, 2, 3, 4, 5, 6])\n", + "Coordinates:\n", + " * step (step) int64 48B 1 2 3 4 5 6\n", + "Attributes:\n", + " units: months\n" + ] + } + ], + "source": [ + "print(ds[\"step\"])" + ] + }, + { + "cell_type": "raw", + "id": "13b0fdb1-ed1f-4a0a-b77f-71115adf40ad", + "metadata": { + "editable": true, + "raw_mimetype": "text/restructuredtext", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "By default, the dimensions related to dimension roles are named after the roles. So, although the step dimension was generated from the \"forecastMonth\" GRIB key the dimension name is still \"step\". To override this use the ``keep_dim_role_name=False`` option in :py:meth:`~data.readers.grib.index.GribFieldList.to_xarray`." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "cbf6d822-546e-42ab-a8c7-ad8d7d0c61fc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
    \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
    <xarray.Dataset> Size: 395kB\n",
    +                            "Dimensions:                  (number: 3, forecast_reference_time: 4,\n",
    +                            "                              forecastMonth: 6, latitude: 19, longitude: 36)\n",
    +                            "Coordinates:\n",
    +                            "  * number                   (number) int64 24B 0 1 2\n",
    +                            "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 32B 199...\n",
    +                            "  * forecastMonth            (forecastMonth) int64 48B 1 2 3 4 5 6\n",
    +                            "  * latitude                 (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n",
    +                            "  * longitude                (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n",
    +                            "Data variables:\n",
    +                            "    2t                       (number, forecast_reference_time, forecastMonth, latitude, longitude) float64 394kB ...\n",
    +                            "Attributes: (12/15)\n",
    +                            "    param:        2t\n",
    +                            "    paramId:      167\n",
    +                            "    class:        c3\n",
    +                            "    stream:       msmm\n",
    +                            "    levtype:      sfc\n",
    +                            "    type:         fcmean\n",
    +                            "    ...           ...\n",
    +                            "    fcmonth:      1\n",
    +                            "    origin:       lfpw\n",
    +                            "    domain:       g\n",
    +                            "    method:       1\n",
    +                            "    Conventions:  CF-1.8\n",
    +                            "    institution:  ECMWF
    " + ], + "text/plain": [ + " Size: 395kB\n", + "Dimensions: (number: 3, forecast_reference_time: 4,\n", + " forecastMonth: 6, latitude: 19, longitude: 36)\n", + "Coordinates:\n", + " * number (number) int64 24B 0 1 2\n", + " * forecast_reference_time (forecast_reference_time) datetime64[ns] 32B 199...\n", + " * forecastMonth (forecastMonth) int64 48B 1 2 3 4 5 6\n", + " * latitude (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n", + " * longitude (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n", + "Data variables:\n", + " 2t (number, forecast_reference_time, forecastMonth, latitude, longitude) float64 394kB ...\n", + "Attributes: (12/15)\n", + " param: 2t\n", + " paramId: 167\n", + " class: c3\n", + " stream: msmm\n", + " levtype: sfc\n", + " type: fcmean\n", + " ... ...\n", + " fcmonth: 1\n", + " origin: lfpw\n", + " domain: g\n", + " method: 1\n", + " Conventions: CF-1.8\n", + " institution: ECMWF" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds = ds_fl.to_xarray(time_dim_mode=\"forecast\", \n", + " dim_roles={\"step\": \"forecastMonth\"}, \n", + " dim_name_from_role_name=False)\n", + "ds" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "dev", + "language": "python", + "name": "dev" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.12" + } }, - "tags": [] - }, - "source": [ - "## Xarray engine: seasonal forecast" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "7e0be52c-bedb-4ae7-984c-4807bf253d7f", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "653a95e071ca4633aadbe42f597676a9", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "seasonal_monthly.grib: 0%| | 0.00/160k [00:00\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
    centreshortNametypeOfLevelleveldataDatedataTimestepRangedataTypenumbergridTypeforecastMonth
    0lfpw2tsurface0199310010744fcmean0regular_ll1
    1lfpw2tsurface0199310010744fcmean1regular_ll1
    2lfpw2tsurface0199310010744fcmean2regular_ll1
    3lfpw2tsurface01993100101464fcmean0regular_ll2
    \n", - "" - ], - "text/plain": [ - " centre shortName typeOfLevel level dataDate dataTime stepRange dataType \\\n", - "0 lfpw 2t surface 0 19931001 0 744 fcmean \n", - "1 lfpw 2t surface 0 19931001 0 744 fcmean \n", - "2 lfpw 2t surface 0 19931001 0 744 fcmean \n", - "3 lfpw 2t surface 0 19931001 0 1464 fcmean \n", - "\n", - " number gridType forecastMonth \n", - "0 0 regular_ll 1 \n", - "1 1 regular_ll 1 \n", - "2 2 regular_ll 1 \n", - "3 0 regular_ll 2 " - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ds_fl[0:4].ls(extra_keys=\"forecastMonth\")" - ] - }, - { - "cell_type": "raw", - "id": "665fba14-79d5-4344-84fb-2e16da77936d", - "metadata": { - "editable": true, - "raw_mimetype": "text/restructuredtext", - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "source": [ - "In order to use ``forecastMonth`` instead of ``step`` we need to use the ``dim_roles`` option in :py:meth:`~data.readers.grib.index.GribFieldList.to_xarray`." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "c500c39a-8cdf-4e25-950e-581924879e6c", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "
    \n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
    <xarray.Dataset> Size: 395kB\n",
    -       "Dimensions:                  (number: 3, forecast_reference_time: 4, step: 6,\n",
    -       "                              latitude: 19, longitude: 36)\n",
    -       "Coordinates:\n",
    -       "  * number                   (number) int64 24B 0 1 2\n",
    -       "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 32B 199...\n",
    -       "  * step                     (step) int64 48B 1 2 3 4 5 6\n",
    -       "  * latitude                 (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n",
    -       "  * longitude                (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n",
    -       "Data variables:\n",
    -       "    2t                       (number, forecast_reference_time, step, latitude, longitude) float64 394kB ...\n",
    -       "Attributes: (12/15)\n",
    -       "    param:        2t\n",
    -       "    paramId:      167\n",
    -       "    class:        c3\n",
    -       "    stream:       msmm\n",
    -       "    levtype:      sfc\n",
    -       "    type:         fcmean\n",
    -       "    ...           ...\n",
    -       "    fcmonth:      1\n",
    -       "    origin:       lfpw\n",
    -       "    domain:       g\n",
    -       "    method:       1\n",
    -       "    Conventions:  CF-1.8\n",
    -       "    institution:  ECMWF
    " - ], - "text/plain": [ - " Size: 395kB\n", - "Dimensions: (number: 3, forecast_reference_time: 4, step: 6,\n", - " latitude: 19, longitude: 36)\n", - "Coordinates:\n", - " * number (number) int64 24B 0 1 2\n", - " * forecast_reference_time (forecast_reference_time) datetime64[ns] 32B 199...\n", - " * step (step) int64 48B 1 2 3 4 5 6\n", - " * latitude (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n", - " * longitude (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n", - "Data variables:\n", - " 2t (number, forecast_reference_time, step, latitude, longitude) float64 394kB ...\n", - "Attributes: (12/15)\n", - " param: 2t\n", - " paramId: 167\n", - " class: c3\n", - " stream: msmm\n", - " levtype: sfc\n", - " type: fcmean\n", - " ... ...\n", - " fcmonth: 1\n", - " origin: lfpw\n", - " domain: g\n", - " method: 1\n", - " Conventions: CF-1.8\n", - " institution: ECMWF" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ds = ds_fl.to_xarray(time_dim_mode=\"forecast\", \n", - " dim_roles={\"step\": \"forecastMonth\"})\n", - "ds" - ] - }, - { - "cell_type": "markdown", - "id": "e917dbc1-ba05-4180-b1d8-62e04bf98d50", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "source": [ - "When we check the \"step\" dimension we can see its units are \"months\"." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "850836de-db60-48ac-b42b-253ab335ceef", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Size: 48B\n", - "array([1, 2, 3, 4, 5, 6])\n", - "Coordinates:\n", - " * step (step) int64 48B 1 2 3 4 5 6\n", - "Attributes:\n", - " units: months\n" - ] - } - ], - "source": [ - "print(ds[\"step\"])" - ] - }, - { - "cell_type": "raw", - "id": "13b0fdb1-ed1f-4a0a-b77f-71115adf40ad", - "metadata": { - "editable": true, - "raw_mimetype": "text/restructuredtext", - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "source": [ - "By default, the dimensions related to dimension roles are named after the roles. So, although the step dimension was generated from the \"forecastMonth\" GRIB key the dimension name is still \"step\". To override this use the ``keep_dim_role_name=False`` option in :py:meth:`~data.readers.grib.index.GribFieldList.to_xarray`." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "cbf6d822-546e-42ab-a8c7-ad8d7d0c61fc", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
    \n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
    <xarray.Dataset> Size: 395kB\n",
    -       "Dimensions:                  (number: 3, forecast_reference_time: 4,\n",
    -       "                              forecastMonth: 6, latitude: 19, longitude: 36)\n",
    -       "Coordinates:\n",
    -       "  * number                   (number) int64 24B 0 1 2\n",
    -       "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 32B 199...\n",
    -       "  * forecastMonth            (forecastMonth) int64 48B 1 2 3 4 5 6\n",
    -       "  * latitude                 (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n",
    -       "  * longitude                (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n",
    -       "Data variables:\n",
    -       "    2t                       (number, forecast_reference_time, forecastMonth, latitude, longitude) float64 394kB ...\n",
    -       "Attributes: (12/15)\n",
    -       "    param:        2t\n",
    -       "    paramId:      167\n",
    -       "    class:        c3\n",
    -       "    stream:       msmm\n",
    -       "    levtype:      sfc\n",
    -       "    type:         fcmean\n",
    -       "    ...           ...\n",
    -       "    fcmonth:      1\n",
    -       "    origin:       lfpw\n",
    -       "    domain:       g\n",
    -       "    method:       1\n",
    -       "    Conventions:  CF-1.8\n",
    -       "    institution:  ECMWF
    " - ], - "text/plain": [ - " Size: 395kB\n", - "Dimensions: (number: 3, forecast_reference_time: 4,\n", - " forecastMonth: 6, latitude: 19, longitude: 36)\n", - "Coordinates:\n", - " * number (number) int64 24B 0 1 2\n", - " * forecast_reference_time (forecast_reference_time) datetime64[ns] 32B 199...\n", - " * forecastMonth (forecastMonth) int64 48B 1 2 3 4 5 6\n", - " * latitude (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n", - " * longitude (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n", - "Data variables:\n", - " 2t (number, forecast_reference_time, forecastMonth, latitude, longitude) float64 394kB ...\n", - "Attributes: (12/15)\n", - " param: 2t\n", - " paramId: 167\n", - " class: c3\n", - " stream: msmm\n", - " levtype: sfc\n", - " type: fcmean\n", - " ... ...\n", - " fcmonth: 1\n", - " origin: lfpw\n", - " domain: g\n", - " method: 1\n", - " Conventions: CF-1.8\n", - " institution: ECMWF" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ds = ds_fl.to_xarray(time_dim_mode=\"forecast\", \n", - " dim_roles={\"step\": \"forecastMonth\"}, \n", - " keep_dim_role_names=False)\n", - "ds" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "dev", - "language": "python", - "name": "dev" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.12" - } - }, - "nbformat": 4, - "nbformat_minor": 5 + "nbformat": 4, + "nbformat_minor": 5 } diff --git a/docs/examples/xarray_engine_step_ranges.ipynb b/docs/examples/xarray_engine_step_ranges.ipynb new file mode 100644 index 000000000..cef1aa2ba --- /dev/null +++ b/docs/examples/xarray_engine_step_ranges.ipynb @@ -0,0 +1,671 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "415a2c2a-8d00-48fc-9a02-6fc79aac663f", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Xarray engine: step range" + ] + }, + { + "cell_type": "markdown", + "id": "b8e40382-ef81-46d7-8c94-2a01bd3a5214", + "metadata": {}, + "source": [ + "Get input GRIB2 data containing precipitation forecast for step ranges." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "85d04283-3488-477f-90dd-ee27f0a91935", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "936ac87a643d414781370a22f10a9904", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "lsp_step_range.grib2: 0%| | 0.00/1.17k [00:00\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    paramstepstepRangestartStependStep
    0lsp71-7271-727172
    1lsp72-7372-737273
    \n", + "" + ], + "text/plain": [ + " param step stepRange startStep endStep\n", + "0 lsp 71-72 71-72 71 72\n", + "1 lsp 72-73 72-73 72 73" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import earthkit.data as ekd\n", + "ds_fl = ekd.from_source(\"sample\", \"lsp_step_range.grib2\")\n", + "ds_fl.ls(keys=[\"param\", \"step\", \"stepRange\", \"startStep\", \"endStep\"])" + ] + }, + { + "cell_type": "raw", + "id": "b2fab96a-8435-4ed3-b43d-e7f5dcc27141", + "metadata": { + "editable": true, + "raw_mimetype": "text/restructuredtext", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "When we convert GRIB data to Xarray with :py:meth:`~data.readers.grib.index.GribFieldList.to_xarray` the step dimension is defined by the \"step\" :ref:`dimension role `. By default, this role is using the \"step_timedelta\" generated metadata key that is the timedelta representation of the \"endStep\" GRIB key." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "5b6872c9-97b6-4336-89ba-4e6491605f90", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
    \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
    <xarray.Dataset> Size: 2kB\n",
    +       "Dimensions:    (step: 2, latitude: 7, longitude: 12)\n",
    +       "Coordinates:\n",
    +       "  * step       (step) timedelta64[ns] 16B 3 days 3 days 01:00:00\n",
    +       "  * latitude   (latitude) float64 56B 90.0 60.0 30.0 0.0 -30.0 -60.0 -90.0\n",
    +       "  * longitude  (longitude) float64 96B 0.0 30.0 60.0 90.0 ... 270.0 300.0 330.0\n",
    +       "Data variables:\n",
    +       "    lsp        (step, latitude, longitude) float64 1kB ...\n",
    +       "Attributes:\n",
    +       "    param:        lsp\n",
    +       "    paramId:      142\n",
    +       "    class:        d1\n",
    +       "    stream:       oper\n",
    +       "    levtype:      sfc\n",
    +       "    type:         fc\n",
    +       "    expver:       0001\n",
    +       "    date:         20250527\n",
    +       "    time:         0\n",
    +       "    domain:       g\n",
    +       "    Conventions:  CF-1.8\n",
    +       "    institution:  ECMWF
    " + ], + "text/plain": [ + " Size: 2kB\n", + "Dimensions: (step: 2, latitude: 7, longitude: 12)\n", + "Coordinates:\n", + " * step (step) timedelta64[ns] 16B 3 days 3 days 01:00:00\n", + " * latitude (latitude) float64 56B 90.0 60.0 30.0 0.0 -30.0 -60.0 -90.0\n", + " * longitude (longitude) float64 96B 0.0 30.0 60.0 90.0 ... 270.0 300.0 330.0\n", + "Data variables:\n", + " lsp (step, latitude, longitude) float64 1kB ...\n", + "Attributes:\n", + " param: lsp\n", + " paramId: 142\n", + " class: d1\n", + " stream: oper\n", + " levtype: sfc\n", + " type: fc\n", + " expver: 0001\n", + " date: 20250527\n", + " time: 0\n", + " domain: g\n", + " Conventions: CF-1.8\n", + " institution: ECMWF" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds = ds_fl.to_xarray()\n", + "ds" + ] + }, + { + "cell_type": "markdown", + "id": "5e0f85a6-30bd-4dfe-ae98-b9c13304a465", + "metadata": {}, + "source": [ + "We can check the \"step\" coordinate in the dataset to see that it matches the \"endStep\" values." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "f9a4b868-29dd-4bb1-bbaa-18caa68f405e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[72, 73]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# convert to hours from ns\n", + "[int(x* 1E-9/(3600)) for x in ds[\"step\"].values]" + ] + }, + { + "cell_type": "markdown", + "id": "542a047c-39d8-4ec1-9194-bb362e9de4f7", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "This default behaviour can be overridden by specifying custom ``dim_roles``. E.g. to get the step from the \"startStep\" key we can use:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "5ea06ff7-70c6-4967-80ce-7b7b6fa12fa5", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[71, 72]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds = ds_fl.to_xarray(dim_roles={\"step\": \"startStep\"})\n", + "[int(x* 1E-9/(3600)) for x in ds[\"step\"].values]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "495387d6-331c-4dc5-90fa-e05a6da9b998", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "dev", + "language": "python", + "name": "dev" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/guide/xarray/dim.rst b/docs/guide/xarray/dim.rst index 2e1c39904..ef5f8adce 100644 --- a/docs/guide/xarray/dim.rst +++ b/docs/guide/xarray/dim.rst @@ -3,7 +3,7 @@ Dimensions ================== -One of the most important aspect of the :ref:`xr_engine` is how it generates dimensions in the Xarray dataset. +One of the most important aspect of the :ref:`xr_engine` is how it generates dimensions in the Xarray dataset with :py:meth:`~data.readers.grib.index.GribFieldList.to_xarray`. .. _xr_dim_roles: .. _xr_predefined_dims: @@ -61,9 +61,9 @@ The possible roles are as follows: - "levtype" - "typeOfLevel" -By default, the dimension names are the same as the role names. To use the associated metadata keys instead use the ``keep_dim_role_names=False`` option. +By default, the dimension names are the same as the role names. To use the associated metadata keys instead use the ``dim_name_from_role_name=False`` option. -the metadata keys. However, this can be controlled with the ``keep_dim_role_names`` option. If set to ``False``, the dimension names will be the same as the dimension roles. This is useful when you want to use the dimension roles in your code, as they are more descriptive than the metadata keys. +the metadata keys. However, this can be controlled with the ``dim_name_from_role_name`` option. If set to ``False``, the dimension names will be the same as the dimension roles. This is useful when you want to use the dimension roles in your code, as they are more descriptive than the metadata keys. .. note:: diff --git a/docs/guide/xarray/overview.rst b/docs/guide/xarray/overview.rst index 1c9e726ee..57af5d94e 100644 --- a/docs/guide/xarray/overview.rst +++ b/docs/guide/xarray/overview.rst @@ -54,7 +54,7 @@ We can also use the Xarray engine to read GRIB data directly with the :py:func:` Dimensions ++++++++++ -The generated Xarray dataset will have the following dimensions: +The pivotal question when generating the Xarray dataset is how to form the dimensions. The :py:meth:`~data.readers.grib.index.GribFieldList.to_xarray` method has a number of options to control the dimensions. Please see more details in the :ref:`dimensions ` section. Profiles diff --git a/docs/release_notes/deprecations.rst b/docs/release_notes/deprecations.rst index 6d142fd63..110bab34e 100644 --- a/docs/release_notes/deprecations.rst +++ b/docs/release_notes/deprecations.rst @@ -1,6 +1,34 @@ Deprecations ============= + +.. _deprecated-0.15.0: + +Version 0.15.0 +----------------- + +.. _deprecated-ens-dim-role: + +The "ens" dimension role has been renamed to "number" +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +The name of the ensemble member :ref:`dimension role <_xr_dim_roles>` changed to "number" from "ens" in the ``dim_roles`` option of :py:meth:`~data.readers.grib.index.GribFieldList.to_xarray`. The old name is still available for backward compatibility but will be removed in a future release. + +.. list-table:: + :header-rows: 0 + + * - Deprecated code + * - + + .. literalinclude:: include/deprec_ens_dim_role.py + + * - New code + * - + + .. literalinclude:: include/migrated_ens_dim_role.py + + + .. _deprecated-0.13.0: Version 0.13.0 diff --git a/docs/release_notes/include/deprec_ens_dim_role.py b/docs/release_notes/include/deprec_ens_dim_role.py new file mode 100644 index 000000000..965e63215 --- /dev/null +++ b/docs/release_notes/include/deprec_ens_dim_role.py @@ -0,0 +1,7 @@ +import earthkit.data as ekd + +ds_fl = ekd.from_source("sample", "ens_cf_pf.grib") + +ds = ds_fl.to_xarray( + dim_roles={"ens": "perturbationNumber"}, +) diff --git a/docs/release_notes/include/migrated_ens_dim_role.py b/docs/release_notes/include/migrated_ens_dim_role.py new file mode 100644 index 000000000..1c98f6af8 --- /dev/null +++ b/docs/release_notes/include/migrated_ens_dim_role.py @@ -0,0 +1,7 @@ +import earthkit.data as ekd + +ds_fl = ekd.from_source("sample", "ens_cf_pf.grib") + +ds = ds_fl.to_xarray( + dim_roles={"number": "perturbationNumber"}, +) diff --git a/docs/release_notes/version_0.15_updates.rst b/docs/release_notes/version_0.15_updates.rst index 45bd3d080..9a498bd7e 100644 --- a/docs/release_notes/version_0.15_updates.rst +++ b/docs/release_notes/version_0.15_updates.rst @@ -8,7 +8,7 @@ Version 0.15.0 Deprecations +++++++++++++++++++ -- :ref:`deprecated-ens-role` +- :ref:`deprecated-ens-dim-role` Xarray engine ++++++++++++++++++++++++++++++ @@ -16,45 +16,24 @@ Xarray engine Breaking changes ------------------- -- Separated the dimension names from the metadata keys used to generate the dimensions. Dimensions associated with the dimension roles are now taking the name of the dimension role, irrespective of the metadata key the dimension role is mapped to. E.g.: the "level_type" dimension role now generates a dimension called "level_type". Previously, the dimension name was the name of the associated metadata key: e.g. it was "levtype" in the :ref:`default ` profile. The old behaviour can still be invoked by using the newly added ``keep_dim_role_names=False`` option. +- Separated the dimension names from the metadata keys used to generate the dimensions. Dimensions associated with the dimension roles are now taking the name of the :ref:`dimension role <_xr_dim_roles>`, irrespective of the metadata key the dimension role is mapped to. E.g.: the "level_type" dimension role now generates a dimension called "level_type". Previously, the dimension name was the name of the associated metadata key: e.g. it was "levtype" in the :ref:`default ` profile. The old behaviour can still be invoked by using the newly added ``dim_name_from_role_name=False`` option. See: :py:meth:`~data.readers.grib.index.GribFieldList.to_xarray`. -.. list-table:: Dimension roles and their associated metadata keys - :header-rows: 1 - * - Dimension role - - Pow - - Previously - * - Dimension role" - - "level_type" - - "level_type" - * - "Dimension name" - - "stream" - * - "mars_class" - - "class" - * - "mars_typeOfLevel" - - "typeOfLevel" - * - "mars_level_type" - - "levtype" - * - "mars_step_timedelta" - - "endStep" - * - "mars_step" - - -- The ``step`` dimension role is now mapped to the ``step_timedelta`` metadata key, which is the ``datatime.timedelta`` representation of the ``"endStep"`` GRIB/metadata key. Previously, this role was mapped to the ``"step"`` key. Please note that due to this change when ``keep_dim_role_names=False`` is used the step dimension will be called "step_timedelta" instead of "step". +- The ``step`` dimension role is now mapped to the ``step_timedelta`` metadata key, which is the ``datatime.timedelta`` representation of the ``"endStep"`` GRIB/metadata key. Previously, this role was mapped to the ``"step"`` key. Please note that due to this change when ``dim_name_from_role_name=False`` is used the step dimension will be called "step_timedelta" instead of "step". Other changes ------------------- -- Allowed using mappings in the ``ensure_dims``, ``extra_dims`` and ``fixed_dims`` options to define both the name of the dimensions and the metadata keys to generate their values. Previously, these options only took a single/multiple metadata keys. E.g. both the options below will generate the "expver", "mars_stream" and "mars_class" dimensions using the "expver", "stream" and "class" metadata keys. +- Allowed using mappings in the ``extra_dims`` and ``fixed_dims`` options to define both the name of the dimensions and the metadata keys to generate their values. Previously, these options only took a single/multiple metadata keys. E.g. both the options below will generate the "expver", "mars_stream" and "mars_class" dimensions using the "expver", "stream" and "class" metadata keys. .. code-block:: python - extra_dims = ["expver", {"mars_stream": "stream"}, ("mars_class", "endStep")] + extra_dims = ["expver", {"mars_stream": "stream"}, ("mars_class", "class")] extra_dims = { "expver": "expver", "mars_stream": "stream", - "mars_class": "endStep", + "mars_class": "class", } @@ -62,6 +41,16 @@ Other changes - TensorBackendArray, which implements the lazy loading of DataArrays in the Xarray engine, now uses a ``dask.utils.SerializableLock`` when accessing the data (:pr:`700`). - Enabled converting :ref:`data-sources-lod` fieldlists into Xarray (:pr:`701`). See the :ref:`/examples/list_of_dicts_to_xarray.ipynb` notebook example. +New Xarray engine notebooks +------------------------------ + +- :ref:`/examples/xr_engine_step_range.ipynb` +- :ref:`/examples/xr_engine_ensemble.ipynb` +- :ref:`/examples/xr_engine_squeeze.ipynb` +- :ref:`/examples/xarray_engine_chunks.ipynb` +- :ref:`/examples/list_of_dicts_to_xarray.ipynb` + + New features +++++++++++++++++ diff --git a/src/earthkit/data/core/index.py b/src/earthkit/data/core/index.py index 66f2f1543..c13cf508d 100644 --- a/src/earthkit/data/core/index.py +++ b/src/earthkit/data/core/index.py @@ -115,7 +115,6 @@ def compare_elements(self, a, b): for k, v in self.actions.items(): n = v(a_metadata(k, default=None), b_metadata(k, default=None)) - print(f"Comparing {k}: {a_metadata(k, default=None)} vs {b_metadata(k, default=None)} -> {n}") if n != 0: return n return 0 diff --git a/src/earthkit/data/utils/xarray/defaults.yaml b/src/earthkit/data/utils/xarray/defaults.yaml index d2316340c..0aaa1ae9f 100644 --- a/src/earthkit/data/utils/xarray/defaults.yaml +++ b/src/earthkit/data/utils/xarray/defaults.yaml @@ -52,7 +52,7 @@ dim_roles: level: level level_type: typeOfLevel -keep_dim_role_names: true +dim_name_from_role_name: true coord_attrs: latitude: diff --git a/src/earthkit/data/utils/xarray/dim.py b/src/earthkit/data/utils/xarray/dim.py index 36951acce..9fd9c8cfa 100644 --- a/src/earthkit/data/utils/xarray/dim.py +++ b/src/earthkit/data/utils/xarray/dim.py @@ -592,7 +592,7 @@ def __init__( split_dims, rename_dims, dim_roles, - keep_dim_role_names, + dim_name_from_role_name, dims_as_attrs, time_dim_mode, level_dim_mode, @@ -601,8 +601,8 @@ def __init__( self.profile = profile - self.dim_roles = DimRole(dim_roles, name_as_key=keep_dim_role_names) - # self.keep_dim_role_names = keep_dim_role_names + self.dim_roles = DimRole(dim_roles, name_as_key=dim_name_from_role_name) + # self.dim_name_from_role_name = dim_name_from_role_name self.extra_dims = ensure_dim_map(extra_dims) self.drop_dims = ensure_iterable(drop_dims) self.ensure_dims = ensure_iterable(ensure_dims) @@ -618,7 +618,7 @@ def __init__( # Warning.deprecated("'ens' key in dim_roles is deprecated. Use 'number' instead") # self.dim_roles["number"] = self.dim_roles.pop("ens") - # if self.keep_dim_role_names: + # if self.dim_name_from_role_name: # d = {v: k for k, v in self.dim_roles.items()} # for k in list(self.rename_dims_map.keys()): # if k in self.dim_roles: @@ -678,7 +678,7 @@ def __init__( # self.rename_dims_map[d.key] = d.name # else d - # if self.keep_dim_role_names: + # if self.dim_name_from_role_name: # d = {v: k for k, v in self.dim_roles.items()} # for k in list(self.rename_dims_map.keys()): # if k in self.dim_roles: diff --git a/src/earthkit/data/utils/xarray/engine.py b/src/earthkit/data/utils/xarray/engine.py index f1288b190..a73c7ba04 100644 --- a/src/earthkit/data/utils/xarray/engine.py +++ b/src/earthkit/data/utils/xarray/engine.py @@ -29,7 +29,7 @@ def open_dataset( ensure_dims=None, fixed_dims=None, dim_roles=None, - keep_dim_role_names=None, + dim_name_from_role_name=None, rename_dims=None, dims_as_attrs=None, time_dim_mode=None, @@ -72,18 +72,61 @@ def open_dataset( rename_variables: dict, None Mapping to rename variables. Default is None. extra_dims: str, or iterable of str, None - Metadata key or list of metadata keys to be used as additional dimensions on top of the - predefined dimensions. Only enabled when no ``fixed_dims`` is specified. Default is None. + Define additional dimensions on top of the predefined dimensions. Only enabled when no ``fixed_dims`` + is specified. Default is None. It can be a single item or a list. Each item is either a metadata key, or + a dict/tuple defining mapping between the dimension name and the metadata key. The whole option can be a + dict. E.g. + + .. code-block:: python + + # use key "expver" as a dimension + extra_dims = "expver" + # use keys "expver" and "steam" as a dimension + extra_dims = ["expver", "stream"] + # define dimensions "expver", mars_stream" and "mars_type" from + # metadata keys "expver", "stream" and "type" + extra_dims = [ + "expver", + {"mars_stream": "stream"}, + ("mars_type", "type"), + ] + extra_dims = [ + { + "expver": "expver", + "mars_stream": "stream", + "mars_type": "type", + } + ] + drop_dims: str, or iterable of str, None - Metadata key or list of metadata keys to be ignored as dimensions. Default is None. + Single or multiple dimensions to be ignored. Default is None. Default is None. ensure_dims: str, or iterable of str, None - Metadata key or list of metadata keys that should be used as dimensions even - when ``squeeze=True``. Default is None. + Dimension or dimensions that should be kept even when ``squeeze=True`` and their size + is only 1. Default is None. fixed_dims: str, or iterable of str, None - Metadata key or list of metadata keys in the order they should be used as dimensions. When - defined no other dimensions will be used. Might be incompatible with other settings. - Default is None. + Define all the dimensions to be generated. When used no other dimensions will be created. + Might be incompatible with other settings. Default is None. It can be a single item or a list. + Each item is either a metadata key, or a dict/tuple defining mapping between the dimension + name and the metadata key. The whole option can be a dict. E.g. + + .. code-block:: python + + # use key "step" as a dimension + fixed_dims = "step" + # use keys "step" and "levelist" as a dimension + extra_dims = ["step", "levelist"] + # define dimensions "step", level" and "level_type" from + # metadata keys "step", "levelist" and "levtype" + extra_dims = [ + "step", + {"level": "levelist"}, + ("level_type", "levtype"), + ] + extra_dims = [ + {"step": "step", "level": "levelist", "level_type": "levtype"} + ] + dim_roles: dict, None Specify the "roles" used to form the predefined dimensions. The predefined dimensions are automatically generated when no ``fixed_dims`` specified and comprise the following @@ -96,7 +139,7 @@ def open_dataset( ``dim_roles`` is a mapping between the "roles" and the metadata keys representing the roles. The possible roles are as follows: - - "ens": metadata key interpreted as ensemble forecast members + - "number": metadata key interpreted as ensemble forecast members - "date": metadata key interpreted as date part of the "forecast_reference_time" - "time": metadata key interpreted as time part of the "forecast_reference_time" - "step": metadata key interpreted as forecast step @@ -112,7 +155,7 @@ def open_dataset( .. code-block:: python { - "ens": "number", + "number": "number", "date": "dataDate", "time": "dataTime", "step": "step", @@ -124,7 +167,7 @@ def open_dataset( ``dims_roles`` behaves differently to the other kwargs in the sense that it does not override but update the default values. So e.g. to change only "ens" in - the defaults it is enough to specify: "dim_roles={"ens": "perturbationNumber"}. + the defaults it is enough to specify: "dim_roles={"number": "perturbationNumber"}. rename_dims: dict, None Mapping to rename dimensions. Default is None. dims_as_attrs: str, or iterable of str, None @@ -267,7 +310,7 @@ def open_dataset( fixed_dims=fixed_dims, rename_dims=rename_dims, dim_roles=dim_roles, - keep_dim_role_names=keep_dim_role_names, + dim_name_from_role_name=dim_name_from_role_name, dims_as_attrs=dims_as_attrs, time_dim_mode=time_dim_mode, level_dim_mode=level_dim_mode, diff --git a/src/earthkit/data/utils/xarray/profile.py b/src/earthkit/data/utils/xarray/profile.py index 41a2e078c..bdf8b60f8 100644 --- a/src/earthkit/data/utils/xarray/profile.py +++ b/src/earthkit/data/utils/xarray/profile.py @@ -125,7 +125,7 @@ def __init__( kwargs.pop("split_dims"), kwargs.pop("rename_dims"), kwargs.pop("dim_roles"), - kwargs.pop("keep_dim_role_names"), + kwargs.pop("dim_name_from_role_name"), kwargs.pop("dims_as_attrs"), kwargs.pop("time_dim_mode"), kwargs.pop("level_dim_mode"), diff --git a/tests/xr_engine/test_xr_attrs.py b/tests/xr_engine/test_xr_attrs.py index 03a404fa4..5639ecd72 100644 --- a/tests/xr_engine/test_xr_attrs.py +++ b/tests/xr_engine/test_xr_attrs.py @@ -48,7 +48,7 @@ def _get_attrs_for_key_2(key, metadata): "decode_times": False, "decode_timedelta": False, "strict": True, - "keep_dim_role_names": False, + "dim_name_from_role_name": False, }, { "date": [20240603, 20240604], @@ -67,7 +67,7 @@ def _get_attrs_for_key_2(key, metadata): "decode_times": False, "decode_timedelta": False, "strict": True, - "keep_dim_role_names": False, + "dim_name_from_role_name": False, }, { "date": [20240603, 20240604], @@ -86,7 +86,7 @@ def _get_attrs_for_key_2(key, metadata): "decode_times": False, "decode_timedelta": False, "strict": False, - "keep_dim_role_names": False, + "dim_name_from_role_name": False, }, { "date": [20240603, 20240604], @@ -105,7 +105,7 @@ def _get_attrs_for_key_2(key, metadata): "decode_times": False, "decode_timedelta": False, "strict": False, - "keep_dim_role_names": False, + "dim_name_from_role_name": False, }, { "date": [20240603, 20240604], @@ -156,7 +156,7 @@ def test_xr_dims_as_attrs(kwargs, coords, dims, attrs): "decode_times": False, "decode_timedelta": False, "strict": False, - "keep_dim_role_names": False, + "dim_name_from_role_name": False, }, { "date": [20240603, 20240604], diff --git a/tests/xr_engine/test_xr_dims.py b/tests/xr_engine/test_xr_dims.py index 922cfa9d7..d2fb5dc97 100644 --- a/tests/xr_engine/test_xr_dims.py +++ b/tests/xr_engine/test_xr_dims.py @@ -119,13 +119,13 @@ def test_xr_dims_input_fieldlist(): [ ({}, "param", ["r", "t"], ["step_timedelta", "levelist"]), ( - {"time_dim_mode": "forecast", "keep_dim_role_names": False}, + {"time_dim_mode": "forecast", "dim_name_from_role_name": False}, "param", ["r", "t"], ["step_timedelta", "levelist"], ), ( - {"squeeze": False, "time_dim_mode": "raw", "keep_dim_role_names": False}, + {"squeeze": False, "time_dim_mode": "raw", "dim_name_from_role_name": False}, "param", ["r", "t"], ["time", "step_timedelta", "levelist"], @@ -149,13 +149,13 @@ def test_xr_dims_ds_lev(kwargs, var_key, variables, dim_keys): "kwargs,var_key,variables,dims", [ ( - {"time_dim_mode": "forecast", "keep_dim_role_names": False}, + {"time_dim_mode": "forecast", "dim_name_from_role_name": False}, "param", ["r", "t"], ["forecast_reference_time", "step_timedelta", "levelist", "levtype"], ), ( - {"time_dim_mode": "raw", "variable_key": "param_level", "keep_dim_role_names": False}, + {"time_dim_mode": "raw", "variable_key": "param_level", "dim_name_from_role_name": False}, "param_level", ["r1000", "r850", "t1000", "t850"], ["date", "time", "step_timedelta", "levtype"], @@ -165,14 +165,14 @@ def test_xr_dims_ds_lev(kwargs, var_key, variables, dim_keys): "time_dim_mode": "raw", "variable_key": "param_level", "remapping": {"param_level": "{param}_{level}"}, - "keep_dim_role_names": False, + "dim_name_from_role_name": False, }, "param_level", ["r_1000", "r_850", "t_1000", "t_850"], ["date", "time", "step_timedelta", "levtype"], ), ( - {"time_dim_mode": "raw", "variable_key": "shortName", "keep_dim_role_names": False}, + {"time_dim_mode": "raw", "variable_key": "shortName", "dim_name_from_role_name": False}, "shortName", ["r", "t"], ["date", "time", "step_timedelta", "levelist", "levtype"], @@ -182,7 +182,7 @@ def test_xr_dims_ds_lev(kwargs, var_key, variables, dim_keys): "time_dim_mode": "raw", "variable_key": "shortName", "drop_variables": ["r"], - "keep_dim_role_names": False, + "dim_name_from_role_name": False, }, "shortName", ["t"], @@ -193,7 +193,7 @@ def test_xr_dims_ds_lev(kwargs, var_key, variables, dim_keys): "time_dim_mode": "raw", "variable_key": "param_level", "drop_variables": ["r", "r1000"], - "keep_dim_role_names": False, + "dim_name_from_role_name": False, }, "param_level", ["r850", "t1000", "t850"], @@ -205,7 +205,7 @@ def test_xr_dims_ds_lev(kwargs, var_key, variables, dim_keys): ], ), ( - {"time_dim_mode": "raw", "level_dim_mode": "level_and_type", "keep_dim_role_names": False}, + {"time_dim_mode": "raw", "level_dim_mode": "level_and_type", "dim_name_from_role_name": False}, "param", ["r", "t"], { @@ -216,7 +216,7 @@ def test_xr_dims_ds_lev(kwargs, var_key, variables, dim_keys): }, ), ( - {"time_dim_mode": "raw", "extra_dims": "class", "keep_dim_role_names": False}, + {"time_dim_mode": "raw", "extra_dims": "class", "dim_name_from_role_name": False}, "param", ["r", "t"], { @@ -229,7 +229,7 @@ def test_xr_dims_ds_lev(kwargs, var_key, variables, dim_keys): }, ), ( - {"time_dim_mode": "raw", "ensure_dims": "class", "keep_dim_role_names": False}, + {"time_dim_mode": "raw", "ensure_dims": "class", "dim_name_from_role_name": False}, "param", ["r", "t"], { @@ -242,7 +242,7 @@ def test_xr_dims_ds_lev(kwargs, var_key, variables, dim_keys): }, ), ( - {"time_dim_mode": "raw", "ensure_dims": ["class", "step"], "keep_dim_role_names": False}, + {"time_dim_mode": "raw", "ensure_dims": ["class", "step"], "dim_name_from_role_name": False}, "param", ["r", "t"], { @@ -259,7 +259,7 @@ def test_xr_dims_ds_lev(kwargs, var_key, variables, dim_keys): { "time_dim_mode": "raw", "ensure_dims": ["class", "step_timedelta"], - "keep_dim_role_names": False, + "dim_name_from_role_name": False, }, "param", ["r", "t"], @@ -273,7 +273,12 @@ def test_xr_dims_ds_lev(kwargs, var_key, variables, dim_keys): }, ), ( - {"time_dim_mode": "raw", "extra_dims": "class", "squeeze": False, "keep_dim_role_names": False}, + { + "time_dim_mode": "raw", + "extra_dims": "class", + "squeeze": False, + "dim_name_from_role_name": False, + }, "param", ["r", "t"], { @@ -349,7 +354,7 @@ def test_xr_dims_ds_sfc_and_pl(kwargs, var_key, variables, dim_keys): "profile": "mars", "time_dim_mode": "raw", "rename_dims": {"levelist": "zz"}, - "keep_dim_role_names": False, + "dim_name_from_role_name": False, }, ["date", "time", "step_timedelta", "zz"], ), @@ -438,7 +443,7 @@ def test_xr_fixed_dims(kwargs, dim_keys): "drop_dims": "number", "time_dim_mode": "raw", "squeeze": False, - "keep_dim_role_names": True, + "dim_name_from_role_name": True, }, ["date", "time", "step", "level", "level_type"], ), @@ -448,7 +453,7 @@ def test_xr_fixed_dims(kwargs, dim_keys): "drop_dims": ["level_type", "number"], "time_dim_mode": "raw", "squeeze": False, - "keep_dim_role_names": True, + "dim_name_from_role_name": True, }, ["date", "time", "step", "level"], ), @@ -458,7 +463,7 @@ def test_xr_fixed_dims(kwargs, dim_keys): "drop_dims": "number", "time_dim_mode": "raw", "squeeze": False, - "keep_dim_role_names": False, + "dim_name_from_role_name": False, }, ["date", "time", "step_timedelta", "levelist", "levtype"], ), @@ -468,7 +473,7 @@ def test_xr_fixed_dims(kwargs, dim_keys): "drop_dims": ["levtype", "number"], "time_dim_mode": "raw", "squeeze": False, - "keep_dim_role_names": False, + "dim_name_from_role_name": False, }, ["date", "time", "step_timedelta", "levelist"], ), diff --git a/tests/xr_engine/test_xr_engine.py b/tests/xr_engine/test_xr_engine.py index 2c40411e7..7c574b61a 100644 --- a/tests/xr_engine/test_xr_engine.py +++ b/tests/xr_engine/test_xr_engine.py @@ -73,7 +73,7 @@ def test_xr_engine_detailed_check_1(api): decode_times=False, decode_timedelta=False, add_valid_time_coord=False, - keep_dim_role_names=False, + dim_name_from_role_name=False, ) else: import xarray as xr @@ -85,7 +85,7 @@ def test_xr_engine_detailed_check_1(api): decode_times=False, decode_timedelta=False, add_valid_time_coord=False, - keep_dim_role_names=False, + dim_name_from_role_name=False, ) assert ds is not None @@ -250,7 +250,7 @@ def test_xr_engine_detailed_check_2(api): decode_times=False, decode_timedelta=False, add_valid_time_coord=False, - keep_dim_role_names=True, + dim_name_from_role_name=True, ) else: import xarray as xr @@ -262,7 +262,7 @@ def test_xr_engine_detailed_check_2(api): decode_times=False, decode_timedelta=False, add_valid_time_coord=False, - keep_dim_role_names=True, + dim_name_from_role_name=True, ) assert ds is not None @@ -436,7 +436,7 @@ def test_xr_engine_detailed_flatten_check_1(stream, lazy_load, release_source, d "lazy_load": lazy_load, "release_source": release_source, "direct_backend": direct_backend, - "keep_dim_role_names": False, + "dim_name_from_role_name": False, } } } @@ -615,7 +615,7 @@ def test_xr_engine_detailed_flatten_check_2(stream, lazy_load, release_source, d "lazy_load": lazy_load, "release_source": release_source, "direct_backend": direct_backend, - "keep_dim_role_names": True, + "dim_name_from_role_name": True, } } } diff --git a/tests/xr_engine/test_xr_level.py b/tests/xr_engine/test_xr_level.py index 1e0b36591..6d4260acb 100644 --- a/tests/xr_engine/test_xr_level.py +++ b/tests/xr_engine/test_xr_level.py @@ -28,11 +28,11 @@ "kwargs,dims", [ ( - {"profile": "mars", "level_dim_mode": "level", "keep_dim_role_names": False}, + {"profile": "mars", "level_dim_mode": "level", "dim_name_from_role_name": False}, {"levelist": [300, 400, 500, 700, 850, 1000]}, ), ( - {"profile": "mars", "level_dim_mode": "level_and_type", "keep_dim_role_names": False}, + {"profile": "mars", "level_dim_mode": "level_and_type", "dim_name_from_role_name": False}, {"level_and_type": ["1000pl", "300pl", "400pl", "500pl", "700pl", "850pl"]}, ), ], @@ -50,7 +50,7 @@ def test_xr_level_dim(kwargs, dims): [ ( "pl.grib", - {"profile": "grib", "level_dim_mode": "level", "keep_dim_role_names": False}, + {"profile": "grib", "level_dim_mode": "level", "dim_name_from_role_name": False}, {"level": [300, 400, 500, 700, 850, 1000]}, "isobaricInhPa", ), @@ -60,7 +60,7 @@ def test_xr_level_dim(kwargs, dims): "profile": "grib", "level_dim_mode": "level", "ensure_dims": "level", - "keep_dim_role_names": False, + "dim_name_from_role_name": False, }, {"level": [80]}, "isobaricInPa", @@ -71,7 +71,7 @@ def test_xr_level_dim(kwargs, dims): "profile": "mars", "level_dim_mode": "level", "ensure_dims": "levelist", - "keep_dim_role_names": False, + "dim_name_from_role_name": False, }, {"levelist": [0.01, 0.1, 1]}, "pl", @@ -82,7 +82,7 @@ def test_xr_level_dim(kwargs, dims): "profile": "grib", "level_dim_mode": "level", "ensure_dims": "level", - "keep_dim_role_names": False, + "dim_name_from_role_name": False, }, {"level": [100, 1000, 2000, 3000]}, "heightAboveSea", @@ -93,7 +93,7 @@ def test_xr_level_dim(kwargs, dims): "profile": "grib", "level_dim_mode": "level", "ensure_dims": "level", - "keep_dim_role_names": False, + "dim_name_from_role_name": False, }, {"level": [500, 1000, 2500, 10000]}, "heightAboveGround", @@ -104,7 +104,7 @@ def test_xr_level_dim(kwargs, dims): "profile": "grib", "level_dim_mode": "level", "ensure_dims": "level", - "keep_dim_role_names": False, + "dim_name_from_role_name": False, }, {"level": [320]}, "theta", @@ -115,7 +115,7 @@ def test_xr_level_dim(kwargs, dims): "profile": "grib", "level_dim_mode": "level", "ensure_dims": "level", - "keep_dim_role_names": False, + "dim_name_from_role_name": False, }, {"level": [1500]}, "potentialVorticity", @@ -126,7 +126,7 @@ def test_xr_level_dim(kwargs, dims): "profile": "grib", "level_dim_mode": "level", "ensure_dims": "level", - "keep_dim_role_names": False, + "dim_name_from_role_name": False, }, {"level": [7]}, "depthBelowLand", @@ -137,7 +137,7 @@ def test_xr_level_dim(kwargs, dims): "profile": "grib", "level_dim_mode": "level", "ensure_dims": "level", - "keep_dim_role_names": False, + "dim_name_from_role_name": False, }, {"level": [3]}, "snowLayer", @@ -148,7 +148,7 @@ def test_xr_level_dim(kwargs, dims): "profile": "grib", "level_dim_mode": "level", "ensure_dims": "level", - "keep_dim_role_names": False, + "dim_name_from_role_name": False, }, {"level": [77]}, "hybrid", @@ -159,7 +159,7 @@ def test_xr_level_dim(kwargs, dims): "profile": "grib", "level_dim_mode": "level", "ensure_dims": "level", - "keep_dim_role_names": False, + "dim_name_from_role_name": False, }, {"level": [0]}, "surface", @@ -176,7 +176,7 @@ def test_xr_level_dim(kwargs, dims): "profile": "grib", "level_dim_mode": "level", "ensure_dims": "level", - "keep_dim_role_names": False, + "dim_name_from_role_name": False, }, {"level": [0]}, "meanSea", @@ -187,14 +187,14 @@ def test_xr_level_dim(kwargs, dims): "profile": "grib", "level_dim_mode": "level", "ensure_dims": "level", - "keep_dim_role_names": False, + "dim_name_from_role_name": False, }, {"level": [1]}, "generalVerticalLayer", ), ( "pl.grib", - {"profile": "mars", "level_dim_mode": "level", "keep_dim_role_names": False}, + {"profile": "mars", "level_dim_mode": "level", "dim_name_from_role_name": False}, {"levelist": [300, 400, 500, 700, 850, 1000]}, "pl", ), @@ -204,7 +204,7 @@ def test_xr_level_dim(kwargs, dims): "profile": "mars", "level_dim_mode": "level", "ensure_dims": "levelist", - "keep_dim_role_names": False, + "dim_name_from_role_name": False, }, {"levelist": [0.8]}, "pl", @@ -215,7 +215,7 @@ def test_xr_level_dim(kwargs, dims): "profile": "mars", "level_dim_mode": "level", "ensure_dims": "levelist", - "keep_dim_role_names": False, + "dim_name_from_role_name": False, }, {"levelist": [320]}, "pt", @@ -226,7 +226,7 @@ def test_xr_level_dim(kwargs, dims): "profile": "mars", "level_dim_mode": "level", "ensure_dims": "levelist", - "keep_dim_role_names": False, + "dim_name_from_role_name": False, }, {"levelist": [1500]}, "pv", @@ -237,7 +237,7 @@ def test_xr_level_dim(kwargs, dims): "profile": "grib", "level_dim_mode": "level", "ensure_dims": "levelist", - "keep_dim_role_names": False, + "dim_name_from_role_name": False, }, {"levelist": [3]}, "sol", @@ -248,7 +248,7 @@ def test_xr_level_dim(kwargs, dims): "profile": "mars", "level_dim_mode": "level", "ensure_dims": "levelist", - "keep_dim_role_names": False, + "dim_name_from_role_name": False, }, {"levelist": [0.01, 0.1, 1]}, "pl", diff --git a/tests/xr_engine/test_xr_remapping.py b/tests/xr_engine/test_xr_remapping.py index b491071ec..3011c0df9 100644 --- a/tests/xr_engine/test_xr_remapping.py +++ b/tests/xr_engine/test_xr_remapping.py @@ -46,7 +46,7 @@ def test_xr_remapping_1(): dim_roles={"level": "_k"}, level_dim_mode="level", remapping={"_k": "{levelist}_{levtype}"}, - keep_dim_role_names=False, + dim_name_from_role_name=False, ), {"_k": ["500_pl", "700_pl"]}, {"forecast_reference_time": 4, "step_timedelta": 2, "_k": 2, "latitude": 19, "longitude": 36}, @@ -56,7 +56,7 @@ def test_xr_remapping_1(): dim_roles={"level": "_k"}, level_dim_mode="level", remapping={"_k": "{levelist}_{levtype}"}, - keep_dim_role_names=True, + dim_name_from_role_name=True, ), {"level": ["500_pl", "700_pl"]}, {"forecast_reference_time": 4, "step": 2, "level": 2, "latitude": 19, "longitude": 36}, @@ -67,7 +67,7 @@ def test_xr_remapping_1(): level_dim_mode="level", remapping={"_k": "{levelist}_{levtype}"}, rename_dims={"level": "_k"}, - keep_dim_role_names=True, + dim_name_from_role_name=True, ), {"_k": ["500_pl", "700_pl"]}, {"forecast_reference_time": 4, "step": 2, "_k": 2, "latitude": 19, "longitude": 36}, diff --git a/tests/xr_engine/test_xr_split.py b/tests/xr_engine/test_xr_split.py index db8c2d4e9..af537cb9a 100644 --- a/tests/xr_engine/test_xr_split.py +++ b/tests/xr_engine/test_xr_split.py @@ -22,7 +22,7 @@ [ ( ["level", "pl.grib"], - {"time_dim_mode": "raw", "split_dims": ["step"], "keep_dim_role_names": False}, + {"time_dim_mode": "raw", "split_dims": ["step"], "dim_name_from_role_name": False}, 2, ["2t", "msl", "r", "t"], ["date", "time", "levelist"], @@ -34,7 +34,7 @@ "time_dim_mode": "raw", "split_dims": ["step"], "ensure_dims": "step", - "keep_dim_role_names": False, + "dim_name_from_role_name": False, }, 2, ["2t", "msl", "r", "t"], @@ -46,7 +46,7 @@ { "time_dim_mode": "valid_time", "split_dims": ["stream", "dataType", "edition", "Ni"], - "keep_dim_role_names": False, + "dim_name_from_role_name": False, }, 11, None, @@ -67,7 +67,7 @@ ), ( ["level", "pl.grib"], - {"time_dim_mode": "raw", "split_dims": ["step"], "keep_dim_role_names": True}, + {"time_dim_mode": "raw", "split_dims": ["step"], "dim_name_from_role_name": True}, 2, ["2t", "msl", "r", "t"], ["date", "time", "level"], @@ -79,7 +79,7 @@ "time_dim_mode": "raw", "split_dims": ["step"], "ensure_dims": "step", - "keep_dim_role_names": True, + "dim_name_from_role_name": True, }, 2, ["2t", "msl", "r", "t"], diff --git a/tests/xr_engine/test_xr_time.py b/tests/xr_engine/test_xr_time.py index eb5d0ecb7..ff8ebd5b4 100644 --- a/tests/xr_engine/test_xr_time.py +++ b/tests/xr_engine/test_xr_time.py @@ -33,7 +33,7 @@ "time_dim_mode": "raw", "decode_times": False, "decode_timedelta": False, - "keep_dim_role_names": True, + "dim_name_from_role_name": True, }, {"date": [20240603, 20240604], "time": [0, 1200], "step": [0, 6]}, ("step", "hours"), @@ -41,7 +41,7 @@ ( { "time_dim_mode": "raw", - "keep_dim_role_names": True, + "dim_name_from_role_name": True, }, { "date": [np.datetime64("2024-06-03", "ns"), np.datetime64("2024-06-04", "ns")], @@ -55,7 +55,7 @@ "time_dim_mode": "forecast", "decode_times": False, "decode_timedelta": False, - "keep_dim_role_names": True, + "dim_name_from_role_name": True, }, { "forecast_reference_time": [ @@ -71,7 +71,7 @@ ( { "time_dim_mode": "forecast", - "keep_dim_role_names": True, + "dim_name_from_role_name": True, }, { "forecast_reference_time": [ @@ -89,7 +89,7 @@ "time_dim_mode": "valid_time", "decode_times": False, "decode_timedelta": False, - "keep_dim_role_names": True, + "dim_name_from_role_name": True, }, { "valid_time": [ @@ -110,7 +110,7 @@ "time_dim_mode": "valid_time", "decode_times": True, "decode_timedelta": True, - "keep_dim_role_names": True, + "dim_name_from_role_name": True, }, { "valid_time": [ @@ -131,7 +131,7 @@ "time_dim_mode": "raw", "decode_times": False, "decode_timedelta": False, - "keep_dim_role_names": False, + "dim_name_from_role_name": False, }, {"date": [20240603, 20240604], "time": [0, 1200], "step_timedelta": [0, 6]}, ("step_timedelta", "hours"), @@ -139,7 +139,7 @@ ( { "time_dim_mode": "raw", - "keep_dim_role_names": False, + "dim_name_from_role_name": False, }, { "date": [np.datetime64("2024-06-03", "ns"), np.datetime64("2024-06-04", "ns")], @@ -171,7 +171,7 @@ def test_xr_time_basic(kwargs, dims, step_units): "dim_roles": {"date": "indexingDate", "time": "indexingTime", "step": "forecastMonth"}, "decode_times": False, "decode_timedelta": False, - "keep_dim_role_names": False, + "dim_name_from_role_name": False, }, { "indexing_time": [ @@ -187,7 +187,7 @@ def test_xr_time_basic(kwargs, dims, step_units): "dim_roles": {"forecast_reference_time": "indexing_time", "step": "forecastMonth"}, "decode_times": False, "decode_timedelta": False, - "keep_dim_role_names": False, + "dim_name_from_role_name": False, }, { "indexing_time": [ @@ -203,7 +203,7 @@ def test_xr_time_basic(kwargs, dims, step_units): "dim_roles": {"date": "indexingDate", "time": "indexingTime", "step": "forecastMonth"}, "decode_times": False, "decode_timedelta": False, - "keep_dim_role_names": True, + "dim_name_from_role_name": True, }, { "forecast_reference_time": [ @@ -219,7 +219,7 @@ def test_xr_time_basic(kwargs, dims, step_units): "dim_roles": {"forecast_reference_time": "indexing_time", "step": "forecastMonth"}, "decode_times": False, "decode_timedelta": False, - "keep_dim_role_names": True, + "dim_name_from_role_name": True, }, { "forecast_reference_time": [ @@ -257,7 +257,7 @@ def test_xr_time_seasonal_monthly_indexing_date(kwargs, dims, step_units): "dim_roles": {"step": "forecastMonth"}, "decode_times": False, "decode_timedelta": False, - "keep_dim_role_names": False, + "dim_name_from_role_name": False, }, { "number": [0, 1, 2], @@ -277,7 +277,7 @@ def test_xr_time_seasonal_monthly_indexing_date(kwargs, dims, step_units): "dim_roles": {"step": "fcmonth"}, "decode_times": False, "decode_timedelta": False, - "keep_dim_role_names": False, + "dim_name_from_role_name": False, }, { "number": [0, 1, 2], @@ -298,7 +298,7 @@ def test_xr_time_seasonal_monthly_indexing_date(kwargs, dims, step_units): "decode_times": False, "decode_timedelta": False, "ensure_dims": ["number", "date", "time", "forecastMonth"], - "keep_dim_role_names": False, + "dim_name_from_role_name": False, }, { "number": [0, 1, 2], @@ -319,7 +319,7 @@ def test_xr_time_seasonal_monthly_indexing_date(kwargs, dims, step_units): "dim_roles": {"step": "forecastMonth"}, "decode_times": False, "decode_timedelta": False, - "keep_dim_role_names": True, + "dim_name_from_role_name": True, }, { "number": [0, 1, 2], @@ -339,7 +339,7 @@ def test_xr_time_seasonal_monthly_indexing_date(kwargs, dims, step_units): "dim_roles": {"step": "fcmonth"}, "decode_times": False, "decode_timedelta": False, - "keep_dim_role_names": True, + "dim_name_from_role_name": True, }, { "number": [0, 1, 2], @@ -360,7 +360,7 @@ def test_xr_time_seasonal_monthly_indexing_date(kwargs, dims, step_units): "decode_times": False, "decode_timedelta": False, "ensure_dims": ["number", "date", "time", "step"], - "keep_dim_role_names": True, + "dim_name_from_role_name": True, }, { "number": [0, 1, 2], @@ -402,7 +402,7 @@ def test_xr_time_seasonal_monthly_simple(kwargs, dims, step_units): "add_valid_time_coord": True, "decode_times": False, "decode_timedelta": False, - "keep_dim_role_names": True, + "dim_name_from_role_name": True, }, { "forecast_reference_time": [ @@ -469,7 +469,7 @@ def test_xr_time_seasonal_monthly_simple(kwargs, dims, step_units): "add_valid_time_coord": True, "decode_times": False, "decode_timedelta": False, - "keep_dim_role_names": False, + "dim_name_from_role_name": False, }, { "forecast_reference_time": [ @@ -515,7 +515,7 @@ def test_xr_valid_time_coord(kwargs, dims, step_units, coords): ( { "time_dim_mode": "raw", - "keep_dim_role_names": True, + "dim_name_from_role_name": True, "ensure_dims": ["date", "time", "step"], }, { @@ -554,20 +554,22 @@ def test_xr_time_step_range_1(kwargs, dims, step_units): ( { "time_dim_mode": "raw", - "keep_dim_role_names": True, + "dim_name_from_role_name": True, "ensure_dims": ["date", "time", "step"], }, { "date": [np.datetime64("2025-05-27", "ns")], "time": [np.timedelta64(0, "ns")], - "step": [np.timedelta64(72, "h")], + "step": [np.timedelta64(72, "h"), np.timedelta64(73, "h")], }, None, ), ], ) def test_xr_time_step_range_2(kwargs, dims, step_units): - ds_ek = from_source("url", earthkit_remote_test_data_file("test-data/xr_engine/date/tp_step_range.grib2")) + ds_ek = from_source( + "url", earthkit_remote_test_data_file("test-data/xr_engine/date/lsp_step_range.grib2") + ) ds = ds_ek.to_xarray(**kwargs) compare_dims(ds, dims, order_ref_var="lsp") diff --git a/tests/xr_engine/test_xr_write.py b/tests/xr_engine/test_xr_write.py index 687e039f2..0154690dc 100644 --- a/tests/xr_engine/test_xr_write.py +++ b/tests/xr_engine/test_xr_write.py @@ -211,7 +211,7 @@ def test_xr_write_seasonal(): ds = ds_ek.to_xarray( time_dim_mode="forecast", dim_roles={"date": "indexingDate", "time": "indexingTime", "step": "forecastMonth"}, - keep_dim_role_names=False, + dim_name_from_role_name=False, ) import xarray as xr From 6055e1034e63095ea60fe14d87b582a5ccd6d693 Mon Sep 17 00:00:00 2001 From: Sandor Kertesz Date: Tue, 10 Jun 2025 13:47:04 +0100 Subject: [PATCH 6/9] Handle step range --- docs/release_notes/version_0.15_updates.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/release_notes/version_0.15_updates.rst b/docs/release_notes/version_0.15_updates.rst index 9a498bd7e..f53d1e020 100644 --- a/docs/release_notes/version_0.15_updates.rst +++ b/docs/release_notes/version_0.15_updates.rst @@ -19,7 +19,7 @@ Breaking changes - Separated the dimension names from the metadata keys used to generate the dimensions. Dimensions associated with the dimension roles are now taking the name of the :ref:`dimension role <_xr_dim_roles>`, irrespective of the metadata key the dimension role is mapped to. E.g.: the "level_type" dimension role now generates a dimension called "level_type". Previously, the dimension name was the name of the associated metadata key: e.g. it was "levtype" in the :ref:`default ` profile. The old behaviour can still be invoked by using the newly added ``dim_name_from_role_name=False`` option. See: :py:meth:`~data.readers.grib.index.GribFieldList.to_xarray`. -- The ``step`` dimension role is now mapped to the ``step_timedelta`` metadata key, which is the ``datatime.timedelta`` representation of the ``"endStep"`` GRIB/metadata key. Previously, this role was mapped to the ``"step"`` key. Please note that due to this change when ``dim_name_from_role_name=False`` is used the step dimension will be called "step_timedelta" instead of "step". +- The ``step`` dimension role is now mapped to the ``step_timedelta`` metadata key, which is the ``datetime.timedelta`` representation of the ``"endStep"`` GRIB/metadata key. Previously, this role was mapped to the ``"step"`` key. Please note that due to this change when ``dim_name_from_role_name=False`` is used the step dimension will be called "step_timedelta" instead of "step". Other changes From 0fd02e5c1c9c61228a3e001cddc288aef7fb7383 Mon Sep 17 00:00:00 2001 From: Sandor Kertesz Date: Tue, 10 Jun 2025 14:26:03 +0100 Subject: [PATCH 7/9] Handle step range --- tests/xr_engine/test_xr_ens.py | 69 ++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 tests/xr_engine/test_xr_ens.py diff --git a/tests/xr_engine/test_xr_ens.py b/tests/xr_engine/test_xr_ens.py new file mode 100644 index 000000000..10118a42d --- /dev/null +++ b/tests/xr_engine/test_xr_ens.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python3 + +# (C) Copyright 2020 ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. +# + +import os +import sys + +import pytest + +from earthkit.data import from_source +from earthkit.data.testing import earthkit_remote_test_data_file +from earthkit.data.utils.xarray.profile import PROFILE_CONF + +here = os.path.dirname(__file__) +sys.path.insert(0, here) +from xr_engine_fixtures import compare_dims # noqa: E402 + + +@pytest.mark.cache +@pytest.mark.parametrize( + "kwargs,dims", + [ + ( + {}, + { + "number": [0, 1, 2], + }, + ), + ( + { + "dim_roles": {"number": "perturbationNumber"}, + "dim_name_from_role_name": True, + }, + { + "number": [0, 1, 2], + }, + ), + ( + { + "dim_roles": {"ens": "perturbationNumber"}, + "dim_name_from_role_name": True, + }, + { + "number": [0, 1, 2], + }, + ), + ( + { + "dim_roles": {"number": "perturbationNumber"}, + "dim_name_from_role_name": False, + }, + { + "perturbationNumber": [0, 1, 2], + }, + ), + ], +) +def test_xr_number_dim(kwargs, dims): + ds_ek = from_source("url", earthkit_remote_test_data_file("test-data/xr_engine/ens/ens_cf_pf.grib")) + + ds = ds_ek.to_xarray(**kwargs) + compare_dims(ds, dims, order_ref_var="t") From 27e6f6387d332e7f1d8bae2783631e7d0126ef91 Mon Sep 17 00:00:00 2001 From: Sandor Kertesz Date: Tue, 10 Jun 2025 14:33:53 +0100 Subject: [PATCH 8/9] Handle step range --- tests/xr_engine/test_xr_ens.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/xr_engine/test_xr_ens.py b/tests/xr_engine/test_xr_ens.py index 10118a42d..319155bf9 100644 --- a/tests/xr_engine/test_xr_ens.py +++ b/tests/xr_engine/test_xr_ens.py @@ -16,7 +16,6 @@ from earthkit.data import from_source from earthkit.data.testing import earthkit_remote_test_data_file -from earthkit.data.utils.xarray.profile import PROFILE_CONF here = os.path.dirname(__file__) sys.path.insert(0, here) From 6b5065ae08f9167f0cc0cbe31aaa6aec82b25b94 Mon Sep 17 00:00:00 2001 From: Sandor Kertesz Date: Tue, 10 Jun 2025 14:46:35 +0100 Subject: [PATCH 9/9] Update docs --- src/earthkit/data/readers/grib/xarray.py | 70 ++++++++++++++++--- src/earthkit/data/utils/xarray/engine.py | 87 +++++++++++++----------- 2 files changed, 105 insertions(+), 52 deletions(-) diff --git a/src/earthkit/data/readers/grib/xarray.py b/src/earthkit/data/readers/grib/xarray.py index 84837d546..dcc6405d3 100644 --- a/src/earthkit/data/readers/grib/xarray.py +++ b/src/earthkit/data/readers/grib/xarray.py @@ -112,19 +112,62 @@ def to_xarray(self, engine="earthkit", xarray_open_dataset_kwargs=None, **kwargs A variable or list of variables to drop from the dataset. Default is None. * rename_variables: dict, None Mapping to rename variables. Default is None. - * extra_dims: str, or iterable of str, None - Metadata key or list of metadata keys to be used as additional dimensions on top of the - predefined dimensions. Only enabled when no ``fixed_dims`` is specified. Default is None. + * extra_dims: str, or iterable of str, None + Define additional dimensions on top of the predefined dimensions. Only enabled when no + ``fixed_dims`` is specified. Default is None. It can be a single item or a list. Each + item is either a metadata key, or a dict/tuple defining mapping between the dimension + name and the metadata key. The whole option can be a dict. E.g. + + .. code-block:: python + + # use key "expver" as a dimension + extra_dims = "expver" + # use keys "expver" and "steam" as a dimension + extra_dims = ["expver", "stream"] + # define dimensions "expver", mars_stream" and "mars_type" from + # metadata keys "expver", "stream" and "type" + extra_dims = [ + "expver", + {"mars_stream": "stream"}, + ("mars_type", "type"), + ] + extra_dims = [ + { + "expver": "expver", + "mars_stream": "stream", + "mars_type": "type", + } + ] + * drop_dims: str, or iterable of str, None - Metadata key or list of metadata keys to be ignored as dimensions. Default is None. + Single or multiple dimensions to be ignored. Default is None. Default is None. * ensure_dims: str, or iterable of str, None - Metadata key or list of metadata keys that should be used as dimensions even - when ``squeeze=True``. Default is None. + Dimension or dimensions that should be kept even when ``squeeze=True`` and their size + is only 1. Default is None. * fixed_dims: str, or iterable of str, None - Metadata key or list of metadata keys in the order they should be used as dimensions. When - defined no other dimensions will be used. Might be incompatible with other settings. - Default is None. + Define all the dimensions to be generated. When used no other dimensions will be created. + Might be incompatible with other settings. Default is None. It can be a single item or a list. + Each item is either a metadata key, or a dict/tuple defining mapping between the dimension + name and the metadata key. The whole option can be a dict. E.g.: + + .. code-block:: python + + # use key "step" as a dimension + fixed_dims = "step" + # use keys "step" and "levelist" as a dimension + extra_dims = ["step", "levelist"] + # define dimensions "step", level" and "level_type" from + # metadata keys "step", "levelist" and "levtype" + extra_dims = [ + "step", + {"level": "levelist"}, + ("level_type", "levtype"), + ] + extra_dims = [ + {"step": "step", "level": "levelist", "level_type": "levtype"} + ] + * dim_roles: dict, None Specify the "roles" used to form the predefined dimensions. The predefined dimensions are automatically generated when no ``fixed_dims`` specified and comprise the following @@ -137,7 +180,7 @@ def to_xarray(self, engine="earthkit", xarray_open_dataset_kwargs=None, **kwargs ``dim_roles`` is a mapping between the "roles" and the metadata keys representing the roles. The possible roles are as follows: - - "ens": metadata key interpreted as ensemble forecast members + - "number": metadata key interpreted as ensemble forecast members - "date": metadata key interpreted as date part of the "forecast_reference_time" - "time": metadata key interpreted as time part of the "forecast_reference_time" - "step": metadata key interpreted as forecast step @@ -153,7 +196,7 @@ def to_xarray(self, engine="earthkit", xarray_open_dataset_kwargs=None, **kwargs .. code-block:: python { - "ens": "number", + "number": "number", "date": "dataDate", "time": "dataTime", "step": "step", @@ -166,6 +209,11 @@ def to_xarray(self, engine="earthkit", xarray_open_dataset_kwargs=None, **kwargs ``dims_roles`` behaves differently to the other kwargs in the sense that it does not override but update the default values. So e.g. to change only "ens" in the defaults it is enough to specify: "dim_roles={"ens": "perturbationNumber"}. + * dim_name_from_role_name: bool, None + If True, the dimension names are formed from the role names. Otherwise the + dimension names are formed from the metadata keys specified in ``dim_roles``. + Its default value (None) expands to True unless the ``profile`` overwrites it. + Only used when no `fixed_dims`` are specified. *New in version 0.15.0*. * rename_dims: dict, None Mapping to rename dimensions. Default is None. * dims_as_attrs: str, or iterable of str, None diff --git a/src/earthkit/data/utils/xarray/engine.py b/src/earthkit/data/utils/xarray/engine.py index a73c7ba04..7a8789841 100644 --- a/src/earthkit/data/utils/xarray/engine.py +++ b/src/earthkit/data/utils/xarray/engine.py @@ -72,31 +72,31 @@ def open_dataset( rename_variables: dict, None Mapping to rename variables. Default is None. extra_dims: str, or iterable of str, None - Define additional dimensions on top of the predefined dimensions. Only enabled when no ``fixed_dims`` - is specified. Default is None. It can be a single item or a list. Each item is either a metadata key, or - a dict/tuple defining mapping between the dimension name and the metadata key. The whole option can be a - dict. E.g. - - .. code-block:: python - - # use key "expver" as a dimension - extra_dims = "expver" - # use keys "expver" and "steam" as a dimension - extra_dims = ["expver", "stream"] - # define dimensions "expver", mars_stream" and "mars_type" from - # metadata keys "expver", "stream" and "type" - extra_dims = [ - "expver", - {"mars_stream": "stream"}, - ("mars_type", "type"), - ] - extra_dims = [ - { - "expver": "expver", - "mars_stream": "stream", - "mars_type": "type", - } - ] + Define additional dimensions on top of the predefined dimensions. Only enabled when no + ``fixed_dims`` is specified. Default is None. It can be a single item or a list. Each + item is either a metadata key, or a dict/tuple defining mapping between the dimension + name and the metadata key. The whole option can be a dict. E.g. + + .. code-block:: python + + # use key "expver" as a dimension + extra_dims = "expver" + # use keys "expver" and "steam" as a dimension + extra_dims = ["expver", "stream"] + # define dimensions "expver", mars_stream" and "mars_type" from + # metadata keys "expver", "stream" and "type" + extra_dims = [ + "expver", + {"mars_stream": "stream"}, + ("mars_type", "type"), + ] + extra_dims = [ + { + "expver": "expver", + "mars_stream": "stream", + "mars_type": "type", + } + ] drop_dims: str, or iterable of str, None Single or multiple dimensions to be ignored. Default is None. @@ -110,22 +110,22 @@ def open_dataset( Each item is either a metadata key, or a dict/tuple defining mapping between the dimension name and the metadata key. The whole option can be a dict. E.g. - .. code-block:: python - - # use key "step" as a dimension - fixed_dims = "step" - # use keys "step" and "levelist" as a dimension - extra_dims = ["step", "levelist"] - # define dimensions "step", level" and "level_type" from - # metadata keys "step", "levelist" and "levtype" - extra_dims = [ - "step", - {"level": "levelist"}, - ("level_type", "levtype"), - ] - extra_dims = [ - {"step": "step", "level": "levelist", "level_type": "levtype"} - ] + .. code-block:: python + + # use key "step" as a dimension + fixed_dims = "step" + # use keys "step" and "levelist" as a dimension + extra_dims = ["step", "levelist"] + # define dimensions "step", level" and "level_type" from + # metadata keys "step", "levelist" and "levtype" + extra_dims = [ + "step", + {"level": "levelist"}, + ("level_type", "levtype"), + ] + extra_dims = [ + {"step": "step", "level": "levelist", "level_type": "levtype"} + ] dim_roles: dict, None Specify the "roles" used to form the predefined dimensions. The predefined dimensions are @@ -168,6 +168,11 @@ def open_dataset( ``dims_roles`` behaves differently to the other kwargs in the sense that it does not override but update the default values. So e.g. to change only "ens" in the defaults it is enough to specify: "dim_roles={"number": "perturbationNumber"}. + dim_name_from_role_name: bool, None + If True, the dimension names are formed from the role names. Otherwise the + dimension names are formed from the metadata keys specified in ``dim_roles``. + Its default value (None) expands to True unless the ``profile`` overwrites it. + Only used when no `fixed_dims`` are specified. *New in version 0.15.0*. rename_dims: dict, None Mapping to rename dimensions. Default is None. dims_as_attrs: str, or iterable of str, None