From bb9fcc5f77e6e3919c0e9204402c754a84a26f16 Mon Sep 17 00:00:00 2001 From: Pawel Wolff Date: Fri, 29 May 2026 16:38:33 +0200 Subject: [PATCH 01/21] "wavelength" and "chem_long_name" keys added to Parameter component of Field For GRIB data, "chem_variable" key maps to the GRIB-key "parameter.chemShortName" which does not use "unknown" value, contrary to "chemShortName" --- .../data/field/component/parameter.py | 84 ++++++++++++++++++- src/earthkit/data/field/grib/parameter.py | 25 +++++- src/earthkit/data/field/mars/parameter.py | 2 + src/earthkit/data/field/xarray/parameter.py | 2 + 4 files changed, 110 insertions(+), 3 deletions(-) diff --git a/src/earthkit/data/field/component/parameter.py b/src/earthkit/data/field/component/parameter.py index bf7530aeb..292059952 100644 --- a/src/earthkit/data/field/component/parameter.py +++ b/src/earthkit/data/field/component/parameter.py @@ -39,6 +39,9 @@ class ParameterBase(SimpleFieldComponent): - "long_name": string representing the long name of the parameter variable - "units": as a string or a :class:`Units` object representing the parameter units - "chem_variable": string representing the parameter chemical variable + - "chem_long_name": string representing the long name of the parameter chemical variable + - "wavelength": int representing the optical parameter wavelength in nanometers, + or a 2-tuple of ints representing the wavelength range in nanometers - "param": alias of "variable" Depending on the type of parameter information available, some of these keys may not be supported @@ -104,6 +107,12 @@ def chem_variable(self) -> Optional[str]: r"""Return the parameter chemical variable.""" pass + @mark_get_key + @abstractmethod + def chem_long_name(self) -> Optional[str]: + r"""Return the long name of the parameter chemical variable.""" + pass + @mark_alias("variable") def param(self) -> Optional[str]: pass @@ -127,6 +136,12 @@ def long_name(self) -> Optional[str]: """ pass + @mark_get_key + @abstractmethod + def wavelength(self) -> Optional[Union[int, tuple[int, int]]]: + """Return the optical parameter wavelength or wavelength interval in nanometers.""" + pass + def create_parameter(d: dict) -> "ParameterBase": """Create a ParameterBase object from a dictionary. @@ -146,7 +161,16 @@ def create_parameter(d: dict) -> "ParameterBase": cls = Parameter d1 = cls._normalise_create_kwargs( - d, allowed_keys=("variable", "units", "chem_variable", "standard_name", "long_name") + d, + allowed_keys=( + "variable", + "units", + "chem_variable", + "chem_long_name", + "standard_name", + "long_name", + "wavelength", + ), ) if "variable" not in d1: raise ValueError("Cannot create Parameter without variable") @@ -192,6 +216,20 @@ def chem_variable(self) -> None: """ return None + def chem_long_name(self) -> None: + r"""Return the long name of the parameter chemical variable. + + An EmptyParameter does not contain any parameter information, and this method returns None. + """ + return None + + def wavelength(self) -> None: + r"""Return the optical parameter wavelength or wavelength interval in nanometers. + + An EmptyParameter does not contain any parameter information, and this method returns None. + """ + return None + @classmethod def from_dict(cls, d: dict) -> "ParameterBase": """Create an EmptyParameter object from a dictionary.""" @@ -228,9 +266,13 @@ class Parameter(ParameterBase): The parameter units, by default None. Can be provided as a string or a Units object. chem_variable : str, optional The parameter chemical variable, by default None. + wavelength : int or 2-tuple of ints, optional + The optical parameter wavelength in nanometers, or a wavelength range in nanometers, by default None. """ _chem_variable = None + _chem_long_name = None + _wavelength = None def __init__( self, @@ -239,6 +281,8 @@ def __init__( long_name: str = None, units: Union[str, "Units"] = None, chem_variable: str = None, + chem_long_name: str = None, + wavelength: Union[int, tuple[int, int]] = None, ) -> None: self._variable = variable self._standard_name = standard_name @@ -246,6 +290,10 @@ def __init__( self._units = Units.from_any(units) if chem_variable is not None: self._chem_variable = chem_variable + if chem_long_name is not None: + self._chem_long_name = chem_long_name + if wavelength is not None: + self._wavelength = wavelength def variable(self) -> Optional[str]: return self._variable @@ -262,6 +310,12 @@ def units(self) -> Optional["Units"]: def chem_variable(self) -> Optional[str]: return self._chem_variable + def chem_long_name(self) -> Optional[str]: + return self._chem_long_name + + def wavelength(self) -> Optional[Union[int, tuple[int, int]]]: + return self._wavelength + @classmethod def from_dict(cls, d: dict) -> "Parameter": """Create a Parameter object from a dictionary. @@ -274,7 +328,13 @@ def from_dict(cls, d: dict) -> "Parameter": The dictionary can contain the following keys: - "variable": The parameter variable. + - "standard_name": The standard name of the parameter variable. + - "long_name": The long name of the parameter variable. - "units": The parameter units, as a string or a Units object. + - "chem_variable": The chemical variable of the parameter. + - "chem_long_name": The long name of the chemical variable of the parameter. + - "wavelength": The optical parameter wavelength in nanometers, or a wavelength range in nanometers, + as an int or a 2-tuple of ints. Returns ------- @@ -290,6 +350,8 @@ def to_dict(self): "long_name": self._long_name, "units": str(self._units), "chem_variable": self._chem_variable, + "chem_long_name": self._chem_long_name, + "wavelength": self._wavelength, } def __getstate__(self): @@ -299,6 +361,8 @@ def __getstate__(self): state["long_name"] = self._long_name state["units"] = str(self._units) state["chem_variable"] = self._chem_variable + state["chem_long_name"] = self._chem_long_name + state["wavelength"] = self._wavelength return state def __setstate__(self, state): @@ -308,6 +372,8 @@ def __setstate__(self, state): long_name=state["long_name"], units=state["units"], chem_variable=state["chem_variable"], + chem_long_name=state["chem_long_name"], + wavelength=state["wavelength"], ) def set(self, *args, **kwargs): @@ -327,9 +393,21 @@ def set(self, *args, **kwargs): - "standard_name": The standard name of the parameter variable. - "long_name": The long name of the parameter variable. - "chem_variable": The chemical variable of the parameter. + - "chem_long_name": The long name of the chemical variable of the parameter. + - "wavelength": The optical parameter wavelength in nanometers, or a wavelength range in nanometers. """ d = self._normalise_set_kwargs( - *args, allowed_keys=("variable", "units", "chem_variable", "standard_name", "long_name"), **kwargs + *args, + allowed_keys=( + "variable", + "units", + "chem_variable", + "chem_long_name", + "standard_name", + "long_name", + "wavelength", + ), + **kwargs, ) current = { @@ -338,6 +416,8 @@ def set(self, *args, **kwargs): "long_name": self._long_name, "units": self._units, "chem_variable": self._chem_variable, + "chem_long_name": self._chem_long_name, + "wavelength": self._wavelength, } current.update(d) diff --git a/src/earthkit/data/field/grib/parameter.py b/src/earthkit/data/field/grib/parameter.py index c9061d861..5c01748f6 100644 --- a/src/earthkit/data/field/grib/parameter.py +++ b/src/earthkit/data/field/grib/parameter.py @@ -38,7 +38,28 @@ def _get(key, default=None): units = _get("units", None) - chem_name = _get("chemShortName", None) + chem_name = _get("parameter.chemShortName", None) + # using "parameter.chemShortName" instead of "chemShortName" avoids getting "unknown" if this key is not defined + # cf. https://github.com/ecmwf/eccodes/blob/eac2eb507b5b44fcc3d3c58e382efde3a274b1c4/definitions/grib2/parameters.def#L29 + + chem_long_name = _get("chemName", None) + if chem_long_name == "unknown": + chem_long_name = None + + _wavelength = _get("mars.wavelength", None) + # The logic below follows the "mars.wavelength" key definition: + # https://github.com/ecmwf/eccodes/blob/develop/definitions/mars/mars.wavelength.def + if isinstance(_wavelength, (int, float)): + wavelength = round(_wavelength) + elif isinstance(_wavelength, str): + # expected format is "-" + try: + wlen1, wlen2 = _wavelength.split("-") + wavelength = round(float(wlen1)), round(float(wlen2)) + except Exception: + wavelength = None + else: + wavelength = None return dict( variable=variable, @@ -46,6 +67,8 @@ def _get(key, default=None): long_name=long_name, units=units, chem_variable=chem_name, + chem_long_name=chem_long_name, + wavelength=wavelength, ) diff --git a/src/earthkit/data/field/mars/parameter.py b/src/earthkit/data/field/mars/parameter.py index 335cf9297..d9e0b8033 100644 --- a/src/earthkit/data/field/mars/parameter.py +++ b/src/earthkit/data/field/mars/parameter.py @@ -28,6 +28,8 @@ def build(request, build_empty=False): @staticmethod def _build_dict(request): + # TODO: add chem_variable and wavelength? + # TODO: chem_variable would require an unaliasing table "grib-chemid.csv" param = request.get("param", None) if param is None: diff --git a/src/earthkit/data/field/xarray/parameter.py b/src/earthkit/data/field/xarray/parameter.py index 3d68e2048..1bb318bd4 100644 --- a/src/earthkit/data/field/xarray/parameter.py +++ b/src/earthkit/data/field/xarray/parameter.py @@ -35,5 +35,7 @@ def __init__(self, owner, selection=None) -> None: standard_name = owner.variable.attrs.get("standard_name", "unknown") long_name = owner.variable.attrs.get("long_name", "unknown") units = owner.variable.attrs.get("units", None) + # TODO: add "chem_variable", "wavelength" + # would need a similar mechanism to the one in the field/xarray/ensemble.py module p = Parameter.from_dict(dict(variable=name, standard_name=standard_name, long_name=long_name, units=units)) super().__init__(p) From 4c115c3249f480a1e2269a8b0b05be335ad8d933 Mon Sep 17 00:00:00 2001 From: Pawel Wolff Date: Fri, 29 May 2026 17:44:41 +0200 Subject: [PATCH 02/21] "wave_direction" and "wave_frequency" keys added to Parameter component of Field --- .../data/field/component/parameter.py | 58 +++++++++++++++++++ src/earthkit/data/field/grib/parameter.py | 29 ++++++++++ src/earthkit/data/field/xarray/parameter.py | 2 +- 3 files changed, 88 insertions(+), 1 deletion(-) diff --git a/src/earthkit/data/field/component/parameter.py b/src/earthkit/data/field/component/parameter.py index 292059952..b8d202097 100644 --- a/src/earthkit/data/field/component/parameter.py +++ b/src/earthkit/data/field/component/parameter.py @@ -42,6 +42,8 @@ class ParameterBase(SimpleFieldComponent): - "chem_long_name": string representing the long name of the parameter chemical variable - "wavelength": int representing the optical parameter wavelength in nanometers, or a 2-tuple of ints representing the wavelength range in nanometers + - "wave_direction": float representing the wave direction in degrees of the 2D spectra parameter, or None + - "wave_frequency": float representing the wave frequency in Hz of the 2D spectra parameter, or None - "param": alias of "variable" Depending on the type of parameter information available, some of these keys may not be supported @@ -142,6 +144,18 @@ def wavelength(self) -> Optional[Union[int, tuple[int, int]]]: """Return the optical parameter wavelength or wavelength interval in nanometers.""" pass + @mark_get_key + @abstractmethod + def wave_direction(self) -> Optional[float]: + """Return the wave direction in degrees of the 2D spectra parameter.""" + pass + + @mark_get_key + @abstractmethod + def wave_frequency(self) -> Optional[float]: + """Return the wave frequency in Hz of the 2D spectra parameter.""" + pass + def create_parameter(d: dict) -> "ParameterBase": """Create a ParameterBase object from a dictionary. @@ -170,6 +184,8 @@ def create_parameter(d: dict) -> "ParameterBase": "standard_name", "long_name", "wavelength", + "wave_direction", + "wave_frequency", ), ) if "variable" not in d1: @@ -230,6 +246,20 @@ def wavelength(self) -> None: """ return None + def wave_direction(self) -> None: + r"""Return the wave direction in degrees of the 2D spectra parameter. + + An EmptyParameter does not contain any parameter information, and this method returns None. + """ + return None + + def wave_frequency(self) -> None: + r"""Return the wave frequency in Hz of the 2D spectra parameter. + + An EmptyParameter does not contain any parameter information, and this method returns None. + """ + return None + @classmethod def from_dict(cls, d: dict) -> "ParameterBase": """Create an EmptyParameter object from a dictionary.""" @@ -273,6 +303,8 @@ class Parameter(ParameterBase): _chem_variable = None _chem_long_name = None _wavelength = None + _wave_direction = None + _wave_frequency = None def __init__( self, @@ -283,6 +315,8 @@ def __init__( chem_variable: str = None, chem_long_name: str = None, wavelength: Union[int, tuple[int, int]] = None, + wave_direction: float = None, + wave_frequency: float = None, ) -> None: self._variable = variable self._standard_name = standard_name @@ -294,6 +328,10 @@ def __init__( self._chem_long_name = chem_long_name if wavelength is not None: self._wavelength = wavelength + if wave_direction is not None: + self._wave_direction = wave_direction + if wave_frequency is not None: + self._wave_frequency = wave_frequency def variable(self) -> Optional[str]: return self._variable @@ -316,6 +354,12 @@ def chem_long_name(self) -> Optional[str]: def wavelength(self) -> Optional[Union[int, tuple[int, int]]]: return self._wavelength + def wave_direction(self) -> Optional[float]: + return self._wave_direction + + def wave_frequency(self) -> Optional[float]: + return self._wave_frequency + @classmethod def from_dict(cls, d: dict) -> "Parameter": """Create a Parameter object from a dictionary. @@ -335,6 +379,8 @@ def from_dict(cls, d: dict) -> "Parameter": - "chem_long_name": The long name of the chemical variable of the parameter. - "wavelength": The optical parameter wavelength in nanometers, or a wavelength range in nanometers, as an int or a 2-tuple of ints. + - "wave_direction": The wave direction in degrees of the 2D spectra parameter. + - "wave_frequency": The wave frequency in Hz of the 2D spectra parameter. Returns ------- @@ -352,6 +398,8 @@ def to_dict(self): "chem_variable": self._chem_variable, "chem_long_name": self._chem_long_name, "wavelength": self._wavelength, + "wave_direction": self._wave_direction, + "wave_frequency": self._wave_frequency, } def __getstate__(self): @@ -363,6 +411,8 @@ def __getstate__(self): state["chem_variable"] = self._chem_variable state["chem_long_name"] = self._chem_long_name state["wavelength"] = self._wavelength + state["wave_direction"] = self._wave_direction + state["wave_frequency"] = self._wave_frequency return state def __setstate__(self, state): @@ -374,6 +424,8 @@ def __setstate__(self, state): chem_variable=state["chem_variable"], chem_long_name=state["chem_long_name"], wavelength=state["wavelength"], + wave_direction=state.get("wave_direction"), + wave_frequency=state.get("wave_frequency"), ) def set(self, *args, **kwargs): @@ -395,6 +447,8 @@ def set(self, *args, **kwargs): - "chem_variable": The chemical variable of the parameter. - "chem_long_name": The long name of the chemical variable of the parameter. - "wavelength": The optical parameter wavelength in nanometers, or a wavelength range in nanometers. + - "wave_direction": The wave direction in degrees of the 2D spectra parameter. + - "wave_frequency": The wave frequency in Hz of the 2D spectra parameter. """ d = self._normalise_set_kwargs( *args, @@ -406,6 +460,8 @@ def set(self, *args, **kwargs): "standard_name", "long_name", "wavelength", + "wave_direction", + "wave_frequency", ), **kwargs, ) @@ -418,6 +474,8 @@ def set(self, *args, **kwargs): "chem_variable": self._chem_variable, "chem_long_name": self._chem_long_name, "wavelength": self._wavelength, + "wave_direction": self._wave_direction, + "wave_frequency": self._wave_frequency, } current.update(d) diff --git a/src/earthkit/data/field/grib/parameter.py b/src/earthkit/data/field/grib/parameter.py index 5c01748f6..b7e3ff09a 100644 --- a/src/earthkit/data/field/grib/parameter.py +++ b/src/earthkit/data/field/grib/parameter.py @@ -61,6 +61,33 @@ def _get(key, default=None): else: wavelength = None + _grib_edition = _get("edition", None) + + def _scale_value(v, scaling_factor): + if _grib_edition == 1: + return float(v / scaling_factor) + elif _grib_edition >= 2: + return float(v * 10 ** (-scaling_factor)) + raise ValueError(f"Unsupported GRIB edition: {_grib_edition}") + + # Wave direction + try: + scaled_directions = _get("scaledDirections", None) + direction_number = _get("directionNumber", None) + direction_scaling_factor = _get("directionScalingFactor", None) + wave_direction = _scale_value(scaled_directions[direction_number - 1], direction_scaling_factor) + except Exception: + wave_direction = None + + # Wave frequency + try: + scaled_frequencies = _get("scaledFrequencies", None) + frequency_number = _get("frequencyNumber", None) + frequency_scaling_factor = _get("frequencyScalingFactor", None) + wave_frequency = _scale_value(scaled_frequencies[frequency_number - 1], frequency_scaling_factor) + except Exception: + wave_frequency = None + return dict( variable=variable, standard_name=standard_name, @@ -69,6 +96,8 @@ def _get(key, default=None): chem_variable=chem_name, chem_long_name=chem_long_name, wavelength=wavelength, + wave_direction=wave_direction, + wave_frequency=wave_frequency, ) diff --git a/src/earthkit/data/field/xarray/parameter.py b/src/earthkit/data/field/xarray/parameter.py index 1bb318bd4..fb26af66d 100644 --- a/src/earthkit/data/field/xarray/parameter.py +++ b/src/earthkit/data/field/xarray/parameter.py @@ -35,7 +35,7 @@ def __init__(self, owner, selection=None) -> None: standard_name = owner.variable.attrs.get("standard_name", "unknown") long_name = owner.variable.attrs.get("long_name", "unknown") units = owner.variable.attrs.get("units", None) - # TODO: add "chem_variable", "wavelength" + # TODO: add "chem_variable", "wavelength", "wave_direction", "wave_frequency" # would need a similar mechanism to the one in the field/xarray/ensemble.py module p = Parameter.from_dict(dict(variable=name, standard_name=standard_name, long_name=long_name, units=units)) super().__init__(p) From ef97d5c5fcf6413f08888d152c9907f92e1eeba4 Mon Sep 17 00:00:00 2001 From: Pawel Wolff Date: Mon, 1 Jun 2026 08:34:01 +0200 Subject: [PATCH 03/21] Four dimensions and dim roles added: chem_varialbe, wavelength, wave_direction, wave_frequency --- src/earthkit/data/xr_engine/dim.py | 88 +++++++++++++++++-- .../data/xr_engine/profiles/defaults.yaml | 17 ++++ .../data/xr_engine/profiles/earthkit.yaml | 4 + .../data/xr_engine/profiles/grib.yaml | 4 + .../data/xr_engine/profiles/mars.yaml | 4 + 5 files changed, 111 insertions(+), 6 deletions(-) diff --git a/src/earthkit/data/xr_engine/dim.py b/src/earthkit/data/xr_engine/dim.py index 7d8dfb3e2..98ba3a397 100644 --- a/src/earthkit/data/xr_engine/dim.py +++ b/src/earthkit/data/xr_engine/dim.py @@ -48,6 +48,11 @@ def _get_metadata_keys(keys): _ENS_KEYS = ["member", "realisation", "realization"] ENS_KEYS = ["member"] + _get_component_keys("ensemble", _ENS_KEYS) + _get_metadata_keys(_GRIB_ENS_KEYS) +CHEM_KEYS = ["chem_variable"] + _get_component_keys("parameter", ["chem_variable"]) +WAVELENGTH_KEYS = ["wavelength"] + _get_component_keys("parameter", ["wavelength"]) +WAVE_DIRECTION_KEYS = ["wave_direction"] + _get_component_keys("parameter", ["wave_direction"]) +WAVE_FREQUENCY_KEYS = ["wave_frequency"] + _get_component_keys("parameter", ["wave_frequency"]) + _GRIB_LEVEL_KEYS = ["level", "levelist", "topLevel", "bottomLevel", "levels"] _VERTICAL_LEVEL_KEYS = ["level", "layer"] LEVEL_KEYS = ["level"] + _get_component_keys("vertical", _VERTICAL_LEVEL_KEYS) + _get_metadata_keys(_GRIB_LEVEL_KEYS) @@ -102,6 +107,10 @@ def _get_metadata_keys(keys): KEYS = ( ENS_KEYS, + CHEM_KEYS, + WAVELENGTH_KEYS, + WAVE_DIRECTION_KEYS, + WAVE_FREQUENCY_KEYS, LEVEL_KEYS, LEVEL_TYPE_KEYS, DATE_KEYS, @@ -295,6 +304,22 @@ class MemberDim(Dim): alias = get_keys(ENS_KEYS) +class ChemDim(Dim): + alias = get_keys(CHEM_KEYS) + + +class WavelengthDim(Dim): + alias = get_keys(WAVELENGTH_KEYS) + + +class WaveDirectionDim(Dim): + alias = get_keys(WAVE_DIRECTION_KEYS) + + +class WaveFrequencyDim(Dim): + alias = get_keys(WAVE_FREQUENCY_KEYS) + + class DateDim(Dim): name = "date" drop = get_keys(DATE_KEYS + DATETIME_KEYS, drop="date") @@ -449,7 +474,20 @@ class OtherDim(Dim): class DimRole: - NAMES = ("member", "date", "time", "step", "level", "level_type", "forecast_reference_time", "valid_time") + NAMES = ( + "member", + "date", + "time", + "step", + "level", + "level_type", + "forecast_reference_time", + "valid_time", + "chem_variable", + "wavelength", + "wave_direction", + "wave_frequency", + ) def __init__(self, d, name_as_key=True): self.d = d @@ -564,21 +602,44 @@ class LevelPerTypeDimMode(LevelAndTypeDimMode): class DimBuilder: + name = None + dim_class = None used = {} ignored = {} + def __init__(self, profile, owner): + key, name = owner.dim_roles.role(self.name) + + dim = self.dim_class(owner, name=name, key=key) + self.used = {dim.name: dim} + def dims(self): return self.used, self.ignored class MemberDimBuilder(DimBuilder): name = "member" + dim_class = MemberDim - def __init__(self, profile, owner): - key, name = owner.dim_roles.role("member") - dim = MemberDim(owner, name=name, key=key) - self.used = {dim.name: dim} +class ChemDimBuilder(DimBuilder): + name = "chem_variable" + dim_class = ChemDim + + +class WavelengthDimBuilder(DimBuilder): + name = "wavelength" + dim_class = WavelengthDim + + +class WaveDirectionDimBuilder(DimBuilder): + name = "wave_direction" + dim_class = WaveDirectionDim + + +class WaveFrequencyDimBuilder(DimBuilder): + name = "wave_frequency" + dim_class = WaveFrequencyDim class TimeDimBuilder(DimBuilder): @@ -685,7 +746,18 @@ def __init__(self, profile, owner): self.ignored.update(_ignored) -DIM_BUILDERS = {v.name: v for v in [MemberDimBuilder, TimeDimBuilder, LevelDimBuilder]} +DIM_BUILDERS = { + v.name: v + for v in [ + MemberDimBuilder, + ChemDimBuilder, + WavelengthDimBuilder, + WaveDirectionDimBuilder, + WaveFrequencyDimBuilder, + TimeDimBuilder, + LevelDimBuilder, + ] +} def ensure_dim_map(d): @@ -1074,6 +1146,10 @@ def rename_dataset_dims(self, dataset): PREDEFINED_DIMS = {} for i, d in enumerate([ MemberDim, + ChemDim, + WavelengthDim, + WaveDirectionDim, + WaveFrequencyDim, ForecastRefTimeDim, DateDim, TimeDim, diff --git a/src/earthkit/data/xr_engine/profiles/defaults.yaml b/src/earthkit/data/xr_engine/profiles/defaults.yaml index d53f860f2..6b516881c 100644 --- a/src/earthkit/data/xr_engine/profiles/defaults.yaml +++ b/src/earthkit/data/xr_engine/profiles/defaults.yaml @@ -47,11 +47,28 @@ dim_roles: time: time.base_time level: vertical.level level_type: vertical.level_type + chem_variable: parameter.chem_variable + wavelength: parameter.wavelength + wave_direction: parameter.wave_direction + wave_frequency: parameter.wave_frequency dim_name_from_role_name: true coord_attrs: member: standard_name: realization long_name: ensemble member id + chem_variable: + long_name: chemical or physical constituent type + wavelength: + units: nm + long_name: wavelength + wave_direction: # see: https://github.com/wavespectra/wavespectra/blob/main/wavespectra/core/attributes.yml + standard_name: sea_surface_wave_from_direction + units: degrees + long_name: wave direction + wave_frequency: + standard_name: sea_surface_wave_frequency + units: s-1 + long_name: wave frequency latitude: units: degrees_north standard_name: latitude diff --git a/src/earthkit/data/xr_engine/profiles/earthkit.yaml b/src/earthkit/data/xr_engine/profiles/earthkit.yaml index 4b8903acd..ac0cf42b1 100644 --- a/src/earthkit/data/xr_engine/profiles/earthkit.yaml +++ b/src/earthkit/data/xr_engine/profiles/earthkit.yaml @@ -7,6 +7,10 @@ dim_roles: time: time.base_time level: vertical.level level_type: vertical.level_type + chem_variable: parameter.chem_variable + wavelength: parameter.wavelength + wave_direction: parameter.wave_direction + wave_frequency: parameter.wave_frequency variable_key: parameter.variable attrs_mode: fixed variable_attrs: diff --git a/src/earthkit/data/xr_engine/profiles/grib.yaml b/src/earthkit/data/xr_engine/profiles/grib.yaml index 2273b1e6f..e88cc0206 100644 --- a/src/earthkit/data/xr_engine/profiles/grib.yaml +++ b/src/earthkit/data/xr_engine/profiles/grib.yaml @@ -7,6 +7,10 @@ dim_roles: time: metadata.time level: metadata.level level_type: metadata.typeOfLevel + chem_variable: parameter.chem_variable + wavelength: parameter.wavelength + wave_direction: parameter.wave_direction + wave_frequency: parameter.wave_frequency variable_key: metadata.param attrs_mode: fixed variable_attrs: diff --git a/src/earthkit/data/xr_engine/profiles/mars.yaml b/src/earthkit/data/xr_engine/profiles/mars.yaml index fb004ac56..5196c0c71 100644 --- a/src/earthkit/data/xr_engine/profiles/mars.yaml +++ b/src/earthkit/data/xr_engine/profiles/mars.yaml @@ -7,6 +7,10 @@ dim_roles: time: metadata.time level: metadata.levelist level_type: metadata.levtype + chem_variable: parameter.chem_variable + wavelength: parameter.wavelength + wave_direction: parameter.wave_direction + wave_frequency: parameter.wave_frequency variable_key: metadata.param attrs_mode: unique attrs: From ce495cd2c04a11fc7622f05c3c78b599596cf2e5 Mon Sep 17 00:00:00 2001 From: Pawel Wolff Date: Mon, 1 Jun 2026 19:55:14 +0200 Subject: [PATCH 04/21] "standard_name" attr for "chem_variable" and "wavelength" coordinate variables added Docs on xr_engine dimensions updated and how-to notebooks on chem/optical/2d wave spectra dimensions added. --- docs/source/concepts/xarray/dim.rst | 50 + docs/source/how-tos/xr_engine/index.rst | 2 + .../xr_engine/xarray_engine_extra_dims.ipynb | 1503 +---------------- .../data/xr_engine/profiles/defaults.yaml | 2 + tests/xr_engine/test_xr_engine_dims.py | 58 + 5 files changed, 138 insertions(+), 1477 deletions(-) diff --git a/docs/source/concepts/xarray/dim.rst b/docs/source/concepts/xarray/dim.rst index 808dfaa22..af165950c 100644 --- a/docs/source/concepts/xarray/dim.rst +++ b/docs/source/concepts/xarray/dim.rst @@ -15,6 +15,12 @@ Predefined dimensions and dimension roles By default, the following predefined dimensions are generated, in the following order: - ensemble forecast member dimension +- aerosol type, or atmospheric chemical or physical constituent type +(discrete dimension, applicable for chemical parameters; see details :ref:`here <_xr_chem_optical_dims>`) +- optical depth or wavelength range or colour channel (applicable for optical parameters, +simulated satellite images, etc.; see details :ref:`here <_xr_chem_optical_dims>`) +- direction and frequency of sea waves (applicable for 2D wave spectra parameters; +see details :ref:`here <_xr_wave_spectra_dims>`) - temporal dimensions, controlled by ``time_dims`` (see details :ref:`here `) - vertical dimensions, controlled by ``level_dim_mode`` (see details :ref:`here `) @@ -29,6 +35,18 @@ The predefined dimensions are based on the ``dim_roles``, which is a mapping bet * - "member" - Ensemble forecast member - "ensemble.member" + * - "chem_variable" + - Aerosol type, or chemical or physical constituent type + - "parameter.chem_variable" + * - "wavelength" + - Optical wavelength in nanometers (e.g. for aerosol optical depth) + - "parameter.wavelength" + * - "wave_direction" + - Wave direction in degrees (for 2D wave spectra) + - "parameter.wave_direction" + * - "wave_frequency" + - Wave frequency in Hz (for 2D wave spectra) + - "parameter.wave_frequency" * - "forecast_reference_time" - Forecast reference time (base datetime). Can be a single metadata key, or a list/tuple of two metadata keys representing the "date" and "time" parts of the forecast reference time. Alternatively, it can be a dict with "date" and "time" keys specifying the corresponding metadata keys. Used when ``"forecast_reference_time"`` is in ``time_dims``. - "time.forecast_reference_time" @@ -134,6 +152,38 @@ The following example demonstrates the vertical dimensions modes: - :ref:`/how-tos/xr_engine/xarray_engine_level.ipynb` +.. _xr_chem_optical_dims: + +Chemical and optical dimensions +------------------------------------------------ + +The following dimensions are applicable for chemical and optical parameters (see for example CAMS datasets): + +- ``"chem_variable"``: Indicates an aerosol type, chemical specie, etc. (for example, for the parameter representing *mass mixing ratio*, the coordinates can be ``"CO"``, ``"O3"``, etc.). + +- ``"wavelength"`` (*nm*): Wavelength at which the optical parameter is measured, modelled or reported. + +The following notebook illustrates the use of the above dimensions in a CAMS dataset containing chemical and optical parameters: + +- :ref:`/how-tos/xr_engine/xarray_engine_chem.ipynb` + + +.. _xr_wave_spectra_dims: + +2D wave spectra dimensions +------------------------------------------------ + +The following dimensions are applicable for 2D wave spectra parameters: + +- ``"wave_direction"`` (*degree*): Direction from which the waves propagate, expressed in degrees clockwise from true north. + +- ``"wave_frequency"`` (*s-1*): Wave frequency corresponding to the spectral component. + +The following notebook presents an example 2D wave spectra dataset: + +- :ref:`/how-tos/xr_engine/xarray_engine_wave_spectra.ipynb` + + .. _xr_squeeze_and_ensure_dims: diff --git a/docs/source/how-tos/xr_engine/index.rst b/docs/source/how-tos/xr_engine/index.rst index 7ac54176f..2b1ef518c 100644 --- a/docs/source/how-tos/xr_engine/index.rst +++ b/docs/source/how-tos/xr_engine/index.rst @@ -13,6 +13,8 @@ Xarray engine xarray_engine_seasonal.ipynb xarray_engine_level.ipynb xarray_engine_ensemble.ipynb + xarray_engine_chem.ipynb + xarray_engine_wave_spectra.ipynb xarray_engine_variable_key.ipynb xarray_engine_mono_variable.ipynb xarray_engine_mono_variable_remapping.ipynb diff --git a/docs/source/how-tos/xr_engine/xarray_engine_extra_dims.ipynb b/docs/source/how-tos/xr_engine/xarray_engine_extra_dims.ipynb index f0ae5601f..ed14c63cc 100644 --- a/docs/source/how-tos/xr_engine/xarray_engine_extra_dims.ipynb +++ b/docs/source/how-tos/xr_engine/xarray_engine_extra_dims.ipynb @@ -11,1463 +11,26 @@ "tags": [] }, "source": [ - "# Xarray engine: extra dimensions" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "1a6e355d-3fbf-4d92-b32f-a9d7e770f9db", - "metadata": { - "editable": true, - "scrolled": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "import earthkit.data as ekd" - ] - }, - { - "cell_type": "markdown", - "id": "41394fd7-4f93-4cb0-97c0-5027fe11920e", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "source": [ - "### 2D wave spectra example" - ] - }, - { - "cell_type": "markdown", - "id": "f1b37637-7cce-4af5-8bad-1ddb6492d732", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "source": [ - "We analyse a 2D wave spectra product at 2025-12-15 00 UTC and 03 UTC.\n", - "A specific feature of this dataset is that the fields are additionally\n", - "indexed by **wavelength** and **frequency**, on top of the standard\n", - "temporal dimension." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "a59414fa-f89a-4149-948b-7d006d693fb2", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - " " - ] - } - ], - "source": [ - "ds_fl = ekd.from_source(\"sample\", \"2d-wave-spectra_an.grib\").to_fieldlist()" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "dae8b6e7-d697-4482-a83e-4afae1620e3b", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
metadata.directionNumbermetadata.frequencyNumber
011
121
231
341
451
.........
20833229
20843329
20853429
20863529
20873629
\n", - "

2088 rows × 2 columns

\n", - "
" - ], - "text/plain": [ - " metadata.directionNumber metadata.frequencyNumber\n", - "0 1 1\n", - "1 2 1\n", - "2 3 1\n", - "3 4 1\n", - "4 5 1\n", - "... ... ...\n", - "2083 32 29\n", - "2084 33 29\n", - "2085 34 29\n", - "2086 35 29\n", - "2087 36 29\n", - "\n", - "[2088 rows x 2 columns]" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ds_fl.ls(keys=[\"metadata.directionNumber\", \"metadata.frequencyNumber\"])" - ] - }, - { - "cell_type": "raw", - "id": "4e822233-94fc-4609-a3ce-68a2f0bbaf29", - "metadata": { - "editable": true, - "raw_mimetype": "text/restructuredtext", - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "source": [ - "To represent this structure in Xarray, the predefined dimensions of the\n", - "Xarray engine must therefore be complemented with dimensions derived\n", - "from the metadata keys ``\"directionNumber\"`` and ``\"frequencyNumber\"`` when calling :py:meth:`~earthkit.data.indexing.xarray.XarrayMixIn.to_xarray`" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "34d1814b-c9f4-450f-b868-1edb5d7c6382", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.Dataset> Size: 1MB\n",
-       "Dimensions:                  (directionNumber: 36, frequencyNumber: 29,\n",
-       "                              forecast_reference_time: 2, latitude: 7,\n",
-       "                              longitude: 12)\n",
-       "Coordinates:\n",
-       "  * directionNumber          (directionNumber) int64 288B 1 2 3 4 ... 34 35 36\n",
-       "  * frequencyNumber          (frequencyNumber) int64 232B 1 2 3 4 ... 27 28 29\n",
-       "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 16B 202...\n",
-       "  * latitude                 (latitude) float64 56B 90.0 60.0 ... -60.0 -90.0\n",
-       "  * longitude                (longitude) float64 96B 0.0 30.0 ... 300.0 330.0\n",
-       "Data variables:\n",
-       "    2dfd                     (directionNumber, frequencyNumber, forecast_reference_time, latitude, longitude) float64 1MB ...\n",
-       "Attributes:\n",
-       "    Conventions:  CF-1.8\n",
-       "    institution:  ECMWF
" - ], - "text/plain": [ - " Size: 1MB\n", - "Dimensions: (directionNumber: 36, frequencyNumber: 29,\n", - " forecast_reference_time: 2, latitude: 7,\n", - " longitude: 12)\n", - "Coordinates:\n", - " * directionNumber (directionNumber) int64 288B 1 2 3 4 ... 34 35 36\n", - " * frequencyNumber (frequencyNumber) int64 232B 1 2 3 4 ... 27 28 29\n", - " * forecast_reference_time (forecast_reference_time) datetime64[ns] 16B 202...\n", - " * latitude (latitude) float64 56B 90.0 60.0 ... -60.0 -90.0\n", - " * longitude (longitude) float64 96B 0.0 30.0 ... 300.0 330.0\n", - "Data variables:\n", - " 2dfd (directionNumber, frequencyNumber, forecast_reference_time, latitude, longitude) float64 1MB ...\n", - "Attributes:\n", - " Conventions: CF-1.8\n", - " institution: ECMWF" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ds = ds_fl.to_xarray(\n", - " extra_dims=[\"metadata.directionNumber\", \"metadata.frequencyNumber\"],\n", - " add_earthkit_attrs=False,\n", - ")\n", - "ds" - ] - }, - { - "cell_type": "markdown", - "id": "028aeab5-8de8-43b5-adab-0446b4f3eea2", - "metadata": { - "editable": true, - "raw_mimetype": "", - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "source": [ - "The ``extra_dims`` option also supports defining an explicit mapping\n", - "between the name of an extra dimension and the corresponding metadata\n", - "key, in a way that is conceptually similar to **dimension roles**." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "ee32a7f8-933b-4447-b771-23939b0d1d4e", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.Dataset> Size: 1MB\n",
-       "Dimensions:                  (d: 36, f: 29, forecast_reference_time: 2,\n",
-       "                              latitude: 7, longitude: 12)\n",
-       "Coordinates:\n",
-       "  * d                        (d) int64 288B 1 2 3 4 5 6 7 ... 31 32 33 34 35 36\n",
-       "  * f                        (f) int64 232B 1 2 3 4 5 6 7 ... 24 25 26 27 28 29\n",
-       "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 16B 202...\n",
-       "  * latitude                 (latitude) float64 56B 90.0 60.0 ... -60.0 -90.0\n",
-       "  * longitude                (longitude) float64 96B 0.0 30.0 ... 300.0 330.0\n",
-       "Data variables:\n",
-       "    2dfd                     (d, f, forecast_reference_time, latitude, longitude) float64 1MB ...\n",
-       "Attributes:\n",
-       "    Conventions:  CF-1.8\n",
-       "    institution:  ECMWF
" - ], - "text/plain": [ - " Size: 1MB\n", - "Dimensions: (d: 36, f: 29, forecast_reference_time: 2,\n", - " latitude: 7, longitude: 12)\n", - "Coordinates:\n", - " * d (d) int64 288B 1 2 3 4 5 6 7 ... 31 32 33 34 35 36\n", - " * f (f) int64 232B 1 2 3 4 5 6 7 ... 24 25 26 27 28 29\n", - " * forecast_reference_time (forecast_reference_time) datetime64[ns] 16B 202...\n", - " * latitude (latitude) float64 56B 90.0 60.0 ... -60.0 -90.0\n", - " * longitude (longitude) float64 96B 0.0 30.0 ... 300.0 330.0\n", - "Data variables:\n", - " 2dfd (d, f, forecast_reference_time, latitude, longitude) float64 1MB ...\n", - "Attributes:\n", - " Conventions: CF-1.8\n", - " institution: ECMWF" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ds2 = ds_fl.to_xarray(\n", - " extra_dims=[{\"d\": \"metadata.directionNumber\"}, {\"f\": \"metadata.frequencyNumber\"}],\n", - " add_earthkit_attrs=False,\n", - ")\n", - "ds2" + "# Xarray engine: extra dimensions\n", + "\n", + "Quantiles in a probabilistic forecast" ] }, { - "cell_type": "markdown", - "id": "9a8a57b1-3e2a-4a17-88ee-34eea704601a", + "cell_type": "code", + "execution_count": 1, + "id": "1a6e355d-3fbf-4d92-b32f-a9d7e770f9db", "metadata": { "editable": true, + "scrolled": true, "slideshow": { "slide_type": "" }, "tags": [] }, + "outputs": [], "source": [ - "### Quantiles in a probabilistic forecast" + "import earthkit.data as ekd" ] }, { @@ -1487,7 +50,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 2, "id": "111d30d9-8b3b-4732-ad5c-a9004a50cd4b", "metadata": { "editable": true, @@ -1501,12 +64,12 @@ "name": "stderr", "output_type": "stream", "text": [ - " " + " " ] } ], "source": [ - "ds_fl2 = ekd.from_source(\"sample\", \"quantiles_pd.grib\").to_fieldlist()" + "fl = ekd.from_source(\"sample\", \"quantiles_pd.grib\").to_fieldlist()" ] }, { @@ -1525,7 +88,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 3, "id": "6580667e-2ca2-4f78-a929-84e1d721c853", "metadata": {}, "outputs": [ @@ -1786,13 +349,13 @@ "17 10 10 10:10 " ] }, - "execution_count": 7, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "ds_fl2.ls(\n", + "fl.ls(\n", " keys=[\n", " \"parameter.variable\",\n", " \"time.base_datetime\",\n", @@ -1829,7 +392,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 4, "id": "cd3a79c9-a994-4d0a-89d1-d7a1e0d64d1b", "metadata": { "editable": true, @@ -2400,9 +963,9 @@ " 2tp (quantile, forecast_reference_time, step, level, level_type, latitude, longitude) float64 12kB ...\n", "Attributes:\n", " Conventions: CF-1.8\n", - " institution: ECMWF
    • 2tp
      (quantile, forecast_reference_time, step, level, level_type, latitude, longitude)
      float64
      ...
      standard_name :
      unknown
      long_name :
      2 metre temperature probability
      units :
      percent
      level_type :
      surface
      [1512 values with dtype=float64]
  • Conventions :
    CF-1.8
    institution :
    ECMWF
  • " ], "text/plain": [ " Size: 13kB\n", @@ -2424,19 +987,19 @@ " institution: ECMWF" ] }, - "execution_count": 8, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "ds3 = ds_fl2.to_xarray(\n", + "ds = fl.to_xarray(\n", " squeeze=False,\n", " extra_dims=\"metadata.quantile\",\n", " drop_dims=\"member\",\n", " add_earthkit_attrs=False,\n", ")\n", - "ds3" + "ds" ] }, { @@ -2469,7 +1032,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 5, "id": "7fcc5da4-02c5-494d-8cc6-7a34491b0491", "metadata": { "editable": true, @@ -3034,7 +1597,7 @@ " 2tp (quantile, latitude, longitude) float64 672B ...\n", "Attributes:\n", " Conventions: CF-1.8\n", - " institution: ECMWF" + " institution: ECMWF" ], "text/plain": [ " Size: 836B\n", @@ -3050,34 +1613,20 @@ " institution: ECMWF" ] }, - "execution_count": 9, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "ds4 = ds_fl2.sel({\"metadata.quantile\": \"2:3\"}).to_xarray(\n", + "ds2 = fl.sel({\"metadata.quantile\": \"2:3\"}).to_xarray(\n", " squeeze=True,\n", " ensure_dims=\"metadata.quantile\",\n", " drop_dims=\"member\",\n", " add_earthkit_attrs=False,\n", ")\n", - "ds4" + "ds2" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "daf42911-e416-406e-a2ee-cae49e7b77a3", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/src/earthkit/data/xr_engine/profiles/defaults.yaml b/src/earthkit/data/xr_engine/profiles/defaults.yaml index 6b516881c..ff0e13cdd 100644 --- a/src/earthkit/data/xr_engine/profiles/defaults.yaml +++ b/src/earthkit/data/xr_engine/profiles/defaults.yaml @@ -57,8 +57,10 @@ coord_attrs: standard_name: realization long_name: ensemble member id chem_variable: + standard_name: aerosol_type_in_atmosphere_layer_in_air long_name: chemical or physical constituent type wavelength: + standard_name: radiation_wavelength units: nm long_name: wavelength wave_direction: # see: https://github.com/wavespectra/wavespectra/blob/main/wavespectra/core/attributes.yml diff --git a/tests/xr_engine/test_xr_engine_dims.py b/tests/xr_engine/test_xr_engine_dims.py index 0c9328b5a..bf0f676f5 100644 --- a/tests/xr_engine/test_xr_engine_dims.py +++ b/tests/xr_engine/test_xr_engine_dims.py @@ -505,6 +505,64 @@ def test_xr_extra_dims(allow_holes, lazy_load, path, sel, kwargs, coords, dims, assert ds.attrs == global_attrs +@pytest.mark.cache +@pytest.mark.parametrize("allow_holes", [False, True]) +@pytest.mark.parametrize("lazy_load", [True, False]) +@pytest.mark.parametrize( + "path,sel,kwargs,coords,dims,var_attrs,global_attrs", + [ + ( + "wave_spectra.grib", + None, + { + "profile": "grib", + "time_dims": "valid_time", + "squeeze": False, + "add_earthkit_attrs": False, + }, + { + "wave_direction": [55.0, 115.0, 175.0, 235.0, 295.0, 355.0], + "wave_frequency": [0.034523, 0.1311, 0.497852], + "member": [0], + "valid_time": [pd.Timestamp("2025-12-10 00:00:00")], + "level": [0], + "level_type": ["meanSea"], + }, + { + "wave_direction": 6, + "wave_frequency": 3, + "member": 1, + "valid_time": 1, + "level": 1, + "level_type": 1, + }, + { + "2dfd": { + "standard_name": "unknown", + "long_name": "2D wave spectra (single)", + "units": "meter ** 2 * second / radian", + "typeOfLevel": "meanSea", + } + }, + {"Conventions": "CF-1.8", "institution": "ECMWF"}, + ), + ], +) +def test_xr_rare_builtin_dims(allow_holes, lazy_load, path, sel, kwargs, coords, dims, var_attrs, global_attrs): + ds0 = from_source("url", earthkit_remote_test_data_file("xr_engine", path)).to_fieldlist() + if sel: + ds0 = ds0.sel(**sel) + ds = ds0.to_xarray(lazy_load=lazy_load, allow_holes=allow_holes, **kwargs) + compare_coords(ds, coords) + compare_dims(ds, dims, sizes=True) + + for v in var_attrs: + v_attrs = dict(ds[v].attrs) + v_attrs.pop("_earthkit", None) + assert v_attrs == var_attrs[v] + assert ds.attrs == global_attrs + + @pytest.mark.cache @pytest.mark.parametrize("lazy_load", [True, False]) @pytest.mark.parametrize( From 4630029c6991eb107e80d2ad7915503365cbf5eb Mon Sep 17 00:00:00 2001 From: Pawel Wolff Date: Mon, 1 Jun 2026 20:00:11 +0200 Subject: [PATCH 05/21] sorting logic for coordinate of Xarray dimensions fixed in XarrayInputFieldList.unique_values: it now applies the plain `sorted()` whenever all items are either: * int/float, or * datetime.date (which includes datetime.datetime), or * datetime.time, or * datetime.timedelta. Otherwise, it falls back to `sorted(*, key=str)` --- src/earthkit/data/xr_engine/fieldlist.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/earthkit/data/xr_engine/fieldlist.py b/src/earthkit/data/xr_engine/fieldlist.py index 940b2a0d8..356755227 100644 --- a/src/earthkit/data/xr_engine/fieldlist.py +++ b/src/earthkit/data/xr_engine/fieldlist.py @@ -172,7 +172,12 @@ def unique_values(self, names): for k, v in vals.items(): v = [x for x in v if x is not None] - if all(isinstance(x, (int, datetime.timedelta)) for x in v): + if ( + all(isinstance(x, (int, float)) for x in v) + or all(isinstance(x, datetime.date) for x in v) + or all(isinstance(x, datetime.time) for x in v) + or all(isinstance(x, datetime.timedelta) for x in v) + ): vals[k] = sorted(v) else: vals[k] = sorted(v, key=str) From 319dc58b3241f316bc9f671cd3eee25b32b48037 Mon Sep 17 00:00:00 2001 From: Pawel Wolff Date: Tue, 2 Jun 2026 13:17:02 +0200 Subject: [PATCH 06/21] New how-to notebooks updated Tests added `chem_variable` attributes updated --- .../xr_engine/xarray_engine_chem.ipynb | 3170 +++++++++++++++++ .../xarray_engine_wave_spectra.ipynb | 1095 ++++++ .../data/xr_engine/profiles/defaults.yaml | 3 +- tests/data/chem-cams.grib | Bin 0 -> 4444 bytes tests/data/optical-cams.grib | Bin 0 -> 1565 bytes tests/data/wave_spectra.grib | Bin 0 -> 8640 bytes tests/field/test_parameter_component.py | 97 + tests/grib/test_grib_parameter.py | 68 + 8 files changed, 4431 insertions(+), 2 deletions(-) create mode 100644 docs/source/how-tos/xr_engine/xarray_engine_chem.ipynb create mode 100644 docs/source/how-tos/xr_engine/xarray_engine_wave_spectra.ipynb create mode 100644 tests/data/chem-cams.grib create mode 100644 tests/data/optical-cams.grib create mode 100644 tests/data/wave_spectra.grib diff --git a/docs/source/how-tos/xr_engine/xarray_engine_chem.ipynb b/docs/source/how-tos/xr_engine/xarray_engine_chem.ipynb new file mode 100644 index 000000000..420311ec4 --- /dev/null +++ b/docs/source/how-tos/xr_engine/xarray_engine_chem.ipynb @@ -0,0 +1,3170 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "a1b2c3d4", + "metadata": {}, + "source": [ + "# Xarray engine: chemical and optical dimensions\n", + "\n", + "This notebook demonstrates how earthkit-data deals with parameters which involve:\n", + "- a type of aerosol or chemical constituent (for example, CAMS atmospheric composition data),\n", + "- radiation wavelength (for example, CAMS aerosol optical depth data)." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "b1c2d3e4", + "metadata": {}, + "outputs": [], + "source": [ + "import earthkit.data as ekd" + ] + }, + { + "cell_type": "markdown", + "id": "c1d2e3f4", + "metadata": {}, + "source": [ + "### Chemical variable dimension\n", + "\n", + "Parameters which involve aerosol type or chemical constituent type should have a relevant metadata exposed via\n", + "earthkit's `parameter.chem_variable` and `parameter.chem_long_name` metadata keys. To illustrate this, consider\n", + "the following CAMS atmospheric composition dataset:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "d1e2f3a4", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " " + ] + } + ], + "source": [ + "fl_chem = ekd.from_source(\"sample\", \"chem-cams.grib\").to_fieldlist()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "2dfee2c8-08d4-4359-b77a-5d5ec7f32f68", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
    \n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    parameter.variableparameter.chem_variableparameter.chem_long_name
    0mass_mixratCOCarbon monoxide
    1mass_mixratHCHOFormaldehyde
    2mass_mixratO3Ozone
    \n", + "
    " + ], + "text/plain": [ + " parameter.variable parameter.chem_variable parameter.chem_long_name\n", + "0 mass_mixrat CO Carbon monoxide\n", + "1 mass_mixrat HCHO Formaldehyde\n", + "2 mass_mixrat O3 Ozone" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fl_chem.ls(keys=[\"parameter.variable\", \"parameter.chem_variable\", \"parameter.chem_long_name\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "a0bb00f9-4ede-4347-a297-4fcdd867be2c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
    Field
    \n", + "
    \n", + "
    \n", + "
    \n", + "
    \n", + "
    \n", + "\n", + "\n", + "\n", + "
    \n", + "\n", + "\n", + "\n", + "
    number_of_values84
    array_typendarray
    array_dtypefloat64
    \n", + "
    \n", + " \n", + "\n", + "\n", + "
    \n", + "\n", + "\n", + "\n", + "
    variablemass_mixrat
    standard_namemass_fraction_of_carbon_monoxide_in_air
    long_nameMass mixing ratio
    unitsdimensionless
    chem_variableCO
    chem_long_nameCarbon monoxide
    wavelengthNone
    wave_directionNone
    wave_frequencyNone
    \n", + "
    \n", + " \n", + "\n", + "\n", + "
    \n", + "\n", + "\n", + "\n", + "
    valid_datetime2011-09-29 00:00:00
    base_datetime2011-09-29 00:00:00
    step0:00:00
    \n", + "
    \n", + " \n", + "\n", + "\n", + "
    \n", + "\n", + "\n", + "\n", + "
    level1
    layerNone
    level_typehybrid
    \n", + "
    \n", + " \n", + "\n", + "\n", + "
    \n", + "\n", + "\n", + "\n", + "
    memberNone
    \n", + "
    \n", + " \n", + "\n", + "\n", + "
    \n", + "\n", + "\n", + "\n", + "
    grid_specNone
    grid_typeregular_ll
    shape(7, 12)
    area(90.0, 0.0, -90.0, 330.0)
    \n", + "
    \n", + " \n", + "
    \n", + "
    \n", + "
    \n", + "
    \n", + "
    \n", + " " + ], + "text/plain": [ + "Field(mass_mixrat, 2011-09-29 00:00:00, 2011-09-29 00:00:00, 0:00:00, 1, hybrid, None, regular_ll)" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fl_chem[0]" + ] + }, + { + "cell_type": "markdown", + "id": "843a0476-db89-4c2d-a8b8-745ed7f927b0", + "metadata": {}, + "source": [ + "The conversion into Xarray constructs the ``\"chem_variable\"`` dimension automatically:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "8cebce0e-64f2-402c-bba5-75dbfa8d99fd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
    \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
    <xarray.Dataset> Size: 2kB\n",
    +       "Dimensions:        (chem_variable: 3, latitude: 7, longitude: 12)\n",
    +       "Coordinates:\n",
    +       "  * chem_variable  (chem_variable) <U4 48B 'CO' 'HCHO' 'O3'\n",
    +       "  * latitude       (latitude) float64 56B 90.0 60.0 30.0 0.0 -30.0 -60.0 -90.0\n",
    +       "  * longitude      (longitude) float64 96B 0.0 30.0 60.0 ... 270.0 300.0 330.0\n",
    +       "Data variables:\n",
    +       "    mass_mixrat    (chem_variable, latitude, longitude) float64 2kB ...\n",
    +       "Attributes:\n",
    +       "    Conventions:  CF-1.8\n",
    +       "    institution:  ECMWF
    " + ], + "text/plain": [ + " Size: 2kB\n", + "Dimensions: (chem_variable: 3, latitude: 7, longitude: 12)\n", + "Coordinates:\n", + " * chem_variable (chem_variable) \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
    <xarray.Dataset> Size: 2kB\n",
    +       "Dimensions:         (chem_variable: 3, latitude: 7, longitude: 12)\n",
    +       "Coordinates:\n",
    +       "  * chem_variable   (chem_variable) <U4 48B 'CO' 'HCHO' 'O3'\n",
    +       "    chem_long_name  (chem_variable) <U15 180B ...\n",
    +       "  * latitude        (latitude) float64 56B 90.0 60.0 30.0 0.0 -30.0 -60.0 -90.0\n",
    +       "  * longitude       (longitude) float64 96B 0.0 30.0 60.0 ... 270.0 300.0 330.0\n",
    +       "Data variables:\n",
    +       "    mass_mixrat     (chem_variable, latitude, longitude) float64 2kB ...\n",
    +       "Attributes:\n",
    +       "    Conventions:  CF-1.8\n",
    +       "    institution:  ECMWF
    " + ], + "text/plain": [ + " Size: 2kB\n", + "Dimensions: (chem_variable: 3, latitude: 7, longitude: 12)\n", + "Coordinates:\n", + " * chem_variable (chem_variable) \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
    <xarray.DataArray 'chem_long_name' (chem_variable: 3)> Size: 180B\n",
    +       "array(['Carbon monoxide', 'Formaldehyde', 'Ozone'], dtype='<U15')\n",
    +       "Coordinates:\n",
    +       "  * chem_variable   (chem_variable) <U4 48B 'CO' 'HCHO' 'O3'\n",
    +       "    chem_long_name  (chem_variable) <U15 180B 'Carbon monoxide' ... 'Ozone'
    " + ], + "text/plain": [ + " Size: 180B\n", + "array(['Carbon monoxide', 'Formaldehyde', 'Ozone'], dtype='\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    parameter.variableparameter.long_nameparameter.chem_variableparameter.chem_long_nameparameter.wavelength
    0aodAerosol optical depthaer_totalTotal aerosol550
    1aodAerosol optical depthaer_totalTotal aerosol800
    2aodAerosol optical depthaer_smAerosol small mode550
    3aodAerosol optical depthaer_smAerosol small mode800
    \n", + "" + ], + "text/plain": [ + " parameter.variable parameter.long_name parameter.chem_variable \\\n", + "0 aod Aerosol optical depth aer_total \n", + "1 aod Aerosol optical depth aer_total \n", + "2 aod Aerosol optical depth aer_sm \n", + "3 aod Aerosol optical depth aer_sm \n", + "\n", + " parameter.chem_long_name parameter.wavelength \n", + "0 Total aerosol 550 \n", + "1 Total aerosol 800 \n", + "2 Aerosol small mode 550 \n", + "3 Aerosol small mode 800 " + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fl_opt = ekd.from_source(\"sample\", \"optical-cams.grib\").to_fieldlist()\n", + "fl_opt.ls(\n", + " keys=[\n", + " \"parameter.variable\",\n", + " \"parameter.long_name\",\n", + " \"parameter.chem_variable\",\n", + " \"parameter.chem_long_name\",\n", + " \"parameter.wavelength\",\n", + " ]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "9609a38a-30c6-41ea-b24d-8432b2e72dae", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
    Field
    \n", + "
    \n", + "
    \n", + "
    \n", + "
    \n", + "
    \n", + "\n", + "\n", + "\n", + "
    \n", + "\n", + "\n", + "\n", + "
    number_of_values84
    array_typendarray
    array_dtypefloat64
    \n", + "
    \n", + " \n", + "\n", + "\n", + "
    \n", + "\n", + "\n", + "\n", + "
    variableaod
    standard_nameunknown
    long_nameAerosol optical depth
    unitsNumeric
    chem_variableaer_total
    chem_long_nameTotal aerosol
    wavelength550
    wave_directionNone
    wave_frequencyNone
    \n", + "
    \n", + " \n", + "\n", + "\n", + "
    \n", + "\n", + "\n", + "\n", + "
    valid_datetime2011-09-29 00:00:00
    base_datetime2011-09-29 00:00:00
    step0:00:00
    \n", + "
    \n", + " \n", + "\n", + "\n", + "
    \n", + "\n", + "\n", + "\n", + "
    level0
    layerNone
    level_typesurface
    \n", + "
    \n", + " \n", + "\n", + "\n", + "
    \n", + "\n", + "\n", + "\n", + "
    memberNone
    \n", + "
    \n", + " \n", + "\n", + "\n", + "
    \n", + "\n", + "\n", + "\n", + "
    grid_specNone
    grid_typeregular_ll
    shape(7, 12)
    area(90.0, 0.0, -90.0, 330.0)
    \n", + "
    \n", + " \n", + "
    \n", + "
    \n", + "
    \n", + "
    \n", + "
    \n", + " " + ], + "text/plain": [ + "Field(aod, 2011-09-29 00:00:00, 2011-09-29 00:00:00, 0:00:00, 0, surface, None, regular_ll)" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fl_opt[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "k1l2m3n4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
    \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
    <xarray.Dataset> Size: 3kB\n",
    +       "Dimensions:        (chem_variable: 2, wavelength: 2, latitude: 7, longitude: 12)\n",
    +       "Coordinates:\n",
    +       "  * chem_variable  (chem_variable) <U9 72B 'aer_sm' 'aer_total'\n",
    +       "  * wavelength     (wavelength) int64 16B 550 800\n",
    +       "  * latitude       (latitude) float64 56B 90.0 60.0 30.0 0.0 -30.0 -60.0 -90.0\n",
    +       "  * longitude      (longitude) float64 96B 0.0 30.0 60.0 ... 270.0 300.0 330.0\n",
    +       "Data variables:\n",
    +       "    aod            (chem_variable, wavelength, latitude, longitude) float64 3kB ...\n",
    +       "Attributes:\n",
    +       "    Conventions:  CF-1.8\n",
    +       "    institution:  ECMWF
    " + ], + "text/plain": [ + " Size: 3kB\n", + "Dimensions: (chem_variable: 2, wavelength: 2, latitude: 7, longitude: 12)\n", + "Coordinates:\n", + " * chem_variable (chem_variable) \n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_typeparameter.wave_directionparameter.wave_frequency
    02dfd2025-12-15 00:00:002025-12-15 00:00:000 days0mean_sea0regular_ll5.00.034523
    12dfd2025-12-15 00:00:002025-12-15 00:00:000 days0mean_sea0regular_ll15.00.034523
    20862dfd2025-12-15 03:00:002025-12-15 03:00:000 days0mean_sea0regular_ll345.00.497852
    20872dfd2025-12-15 03:00:002025-12-15 03:00:000 days0mean_sea0regular_ll355.00.497852
    \n", + "" + ], + "text/plain": [ + " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", + "0 2dfd 2025-12-15 00:00:00 2025-12-15 00:00:00 0 days \n", + "1 2dfd 2025-12-15 00:00:00 2025-12-15 00:00:00 0 days \n", + "2086 2dfd 2025-12-15 03:00:00 2025-12-15 03:00:00 0 days \n", + "2087 2dfd 2025-12-15 03:00:00 2025-12-15 03:00:00 0 days \n", + "\n", + " vertical.level vertical.level_type ensemble.member geography.grid_type \\\n", + "0 0 mean_sea 0 regular_ll \n", + "1 0 mean_sea 0 regular_ll \n", + "2086 0 mean_sea 0 regular_ll \n", + "2087 0 mean_sea 0 regular_ll \n", + "\n", + " parameter.wave_direction parameter.wave_frequency \n", + "0 5.0 0.034523 \n", + "1 15.0 0.034523 \n", + "2086 345.0 0.497852 \n", + "2087 355.0 0.497852 " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fl.ls(extra_keys=[\"parameter.wave_direction\", \"parameter.wave_frequency\"]).iloc[[0, 1, -2, -1]]" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "353f2463-1086-43d7-8a74-3140599a1860", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
    Field
    \n", + "
    \n", + "
    \n", + "
    \n", + "
    \n", + "
    \n", + "\n", + "\n", + "\n", + "
    \n", + "\n", + "\n", + "\n", + "
    number_of_values84
    array_typendarray
    array_dtypefloat64
    \n", + "
    \n", + " \n", + "\n", + "\n", + "
    \n", + "\n", + "\n", + "\n", + "
    variable2dfd
    standard_nameunknown
    long_name2D wave spectra (single)
    unitsmeter ** 2 * second / radian
    chem_variableNone
    chem_long_nameNone
    wavelengthNone
    wave_direction5.0
    wave_frequency0.034523
    \n", + "
    \n", + " \n", + "\n", + "\n", + "
    \n", + "\n", + "\n", + "\n", + "
    valid_datetime2025-12-15 00:00:00
    base_datetime2025-12-15 00:00:00
    step0:00:00
    \n", + "
    \n", + " \n", + "\n", + "\n", + "
    \n", + "\n", + "\n", + "\n", + "
    level0
    layerNone
    level_typemean_sea
    \n", + "
    \n", + " \n", + "\n", + "\n", + "
    \n", + "\n", + "\n", + "\n", + "
    member0
    \n", + "
    \n", + " \n", + "\n", + "\n", + "
    \n", + "\n", + "\n", + "\n", + "
    grid_specNone
    grid_typeregular_ll
    shape(7, 12)
    area(90.0, 0.0, -90.0, 330.0)
    \n", + "
    \n", + " \n", + "
    \n", + "
    \n", + "
    \n", + "
    \n", + "
    \n", + " " + ], + "text/plain": [ + "Field(2dfd, 2025-12-15 00:00:00, 2025-12-15 00:00:00, 0:00:00, 0, mean_sea, 0, regular_ll)" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fl[0]" + ] + }, + { + "cell_type": "markdown", + "id": "m1n2o3p4", + "metadata": {}, + "source": [ + "Conversion to Xarray is straightforward:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "n1o2p3q4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
    \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
    <xarray.Dataset> Size: 1MB\n",
    +       "Dimensions:                  (member: 1, wave_direction: 36,\n",
    +       "                              wave_frequency: 29, forecast_reference_time: 2,\n",
    +       "                              step: 1, level: 1, level_type: 1, latitude: 7,\n",
    +       "                              longitude: 12)\n",
    +       "Coordinates:\n",
    +       "  * member                   (member) <U1 4B '0'\n",
    +       "  * wave_direction           (wave_direction) float64 288B 5.0 15.0 ... 355.0\n",
    +       "  * wave_frequency           (wave_frequency) float64 232B 0.03452 ... 0.4979\n",
    +       "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 16B 202...\n",
    +       "  * step                     (step) timedelta64[ns] 8B 00:00:00\n",
    +       "  * level                    (level) int64 8B 0\n",
    +       "  * level_type               (level_type) <U8 32B 'mean_sea'\n",
    +       "  * latitude                 (latitude) float64 56B 90.0 60.0 ... -60.0 -90.0\n",
    +       "  * longitude                (longitude) float64 96B 0.0 30.0 ... 300.0 330.0\n",
    +       "Data variables:\n",
    +       "    2dfd                     (member, wave_direction, wave_frequency, forecast_reference_time, step, level, level_type, latitude, longitude) float64 1MB ...\n",
    +       "Attributes:\n",
    +       "    Conventions:  CF-1.8\n",
    +       "    institution:  ECMWF
    " + ], + "text/plain": [ + " Size: 1MB\n", + "Dimensions: (member: 1, wave_direction: 36,\n", + " wave_frequency: 29, forecast_reference_time: 2,\n", + " step: 1, level: 1, level_type: 1, latitude: 7,\n", + " longitude: 12)\n", + "Coordinates:\n", + " * member (member) 9Ki2PO|NaHGCfFX=z-m`iAIl^-<`+r-r792m?eY~daOrfR9j>uL~O05 zLYc8vwMtZ?JYurmp-3Jfd2TFf5R#hxjom+V+J7DA?CE>Xz4v>+-|z4IzUSQU{oJq5 zjZ>iiU z5dcP)0H+NRWcjBHnv%{72fxY`PTzM~D8FkZJabnt5fuD9vcSsN{T^ z=-SoWVx5IA#CAUG#Bk~!ap*gycm^%b=H+eJBJv2f^?8aN=dQypR=aNjkb+jYSu8`u)^UrX`(n!3qqzR|4 z+>2MMgK^S8SG=_REDk?ck7q5LfCK8xF;W{~&yFDM(&2%J8B}0PY>JI)>XBwJKu=;+ z=<095NOe+xN*gn$}&a+(F?ZtMV<`v#8-#BV3U-^43zoKq9 zzqUM`|1fA1zo&UO!Tj|RVRNC5aDLN75RSowe=s0Ii@k_=Rz0!y%r0W{pl5^vD2Sri z9-?y5K;mRABCaI8BwD2P#EYB{#D|9Ykdb-~8Y=;0?;HfJQ}@E5e)@2Dvp*a~DB;-o zuFz|uC4?`FA)4F`{T4>T@A)-w${7|6G}r=zbPmBehpxg<7YDdt&OI0*DTWJKv2byE zHH^CL3}ZSTKxxZj7_0Dr@oon2Cq*?(c+?4(8@0m~tVwXi%OaRmwH&UTR|;hxYh)0E zS|s3W_kw&onLPlefW-k?m!6&$H^ut(z@%rljjwCjHEP>S^iteub8eJ0UF>9}p6Fsb zWK?pJYQK(EgMTJluWxg_kZ|cIW>89fcq32J`gm7zDJwd9_t96haA8*C zfOz>JX?=d~fZ2>RfvrjT`LN~+X~al{qr82YaZXjU&ysbE7I4`9DZoynxo>W`FX8y& z26~mzg^IpO^tSv5$Q!7LF9dR`X;)G&QGX-e--!1&;-6g9evp4QkdKaJ7|ERafc%*L z4_Z8Rv@vz4yz438$zwLH9+JDZO()v+!-2>p0o!%$6w@W+ wv(~h3$Qzbnm*~VuxDw8x85z*WXf)l{4SPzxu7%xkY^p76?UP7fgBIz30~*7+WB>pF literal 0 HcmV?d00001 diff --git a/tests/data/optical-cams.grib b/tests/data/optical-cams.grib new file mode 100644 index 0000000000000000000000000000000000000000..796ad975d4f8180321a8d75687456525f3923ae3 GIT binary patch literal 1565 zcmZ<{@^t$DpMi-13>e#hlqe%Z5(9%W10(xwPFavJNK%lAfssLlfr*7NF)`T!DB!^i zk_rJ4Z2wUKP>2VJ*+Ft38P-^@1`ykz703`?oz}p3@-P^K1g%&=G9qc83_v`HhX350 zKwH(orZW7W52P7EQjA~$2m$0uvI2<^1}&RAZ;~1W7#IW?_!Jl#fMRTH|AFLc_6zOu z|2_$=6KQ1O1PP;nC!e!<%>Kqa&scRgo4wCluSKnC)g|t^yI+f&1Jt*1tOGU~q6y06KK`ChZG~E+!Vi^BPcWN9LFVfjep}^Z?}tNh~mfya7xggkzyW z!>K_KBNjHYKRCGl|I2u;7FWz5INodR9pZm;`r~JdrXPMDw@R{XvD!D`wI;UV?K&U6 zd|T+(#I{O6sJJ6w^S{Nbx9*qpkh9uUb^qqf*B7T8%PM_p-cWJcEPArauPk%9GuvM* zscY)fFU_2%^5AIJ-h9iS2Qw5!7!*3>fzIz%;BY^m?;cjVIs!T9G1yq*q6HBbjP%VB z|DQnI2MT`@bHpUmW-K{kCHsZR`v0HkzVK*bI)LVPrr1ShYmLRF_fFlTc-A^$fw0oj z`-dVo9EtH%J@@5w^FqeW0`fi%Z~rgycDU`Q_j#v{LB?P0YYkDK#P1x1Ga|NkHRs>;L5);KHbu>+d*NL(y& z&<>P_=7)if1LE_8+_o=R^1~YT3w!SWzsS0mqeFn@0Ft*sOvXjZ>K9wST2>s7wp}*$ zxrXgxy9imp6#h`EJ?k|2|Ecl?x zfRKWvA{#KlRjd>pF323BQ9(x(U2qz8!Nfl| z=tfm=44o3U7zZk#Mkk#caXP6}WNz66o&WY?Y-%J!jlR(70NK$S5s8~w$-P)|PtxmM z+Dnt?)9>$o&*R9LlnhcwK=MlFE$VLQm=72+f^hzoeR0+?F^xaxreO~Cn!Kvx?`=zWkh4rG}gWLpG?Cmy6N6_hp$ z)Ju7wW*2~RxOnndV-sk{MbL}eK!0=}^idAW8 z&I9v90hqfksDdh>notAPBJua%{~6dk9jJ-|+-rc7OMy$#z^&^b>hC~C1cD@WfXqGx zB6vWm$AUC(1ce|_V=_R!RSl|kJE)tdK*t{ky&)0wwYy*@HGuiD7%Hm+szbw|cEm${ z{SMfQBY+X&L}|d1>@snZgm<&cRD8e+>k8}NQx>S{KX)1qu!n2GBTgLsKmGS>hwr`` z#rOZ>`?)Xq4ODz(^%X0qH2k%IC%n6h7cc*7f~EXk_QhAP_{!$Xg{#M`cK_1h1&(C| z-jj7Mo?o1zn_3&VCl)34%4bqH&os{4m{OJAR5UY|DoxowwR-2m-13s~rc&FZf58BJ zhbzAF`O@rI(YI6b_3QCXKEP)u?D+AD-O!~LpnuQBJRfk_IllPY`E?X`+nH=U;~LW} ztX~stu=Po~=enS;s`Fuh4YDQ$0czumpLE}k(Fpe)s)cLbTM6lit@b_n(qyafk ztJsoni!Z)vtlN$S;%S=< zbEXWUg~4yOeo|PPwyPqIK28evM?5*nlJ7-dd;=9<*?c*cb*~J$Ao<3qMc=SvH~EDN z_URU=jQh?ne09q=C$8`~UiiZ3b;cZ0aC*7j(0sZ@sJLIR7anz2z4<-wi?3esmCrYv zjalj5de2*Ct(@p@P|#g34U|> z)#w4_cevs!pKk~oxw>k)^!$y{ioTJ@y7;r>EL+VXO^>6fnhy1;u z5*yc;P`iG&&RUVW?9Y>HI#+#ddxEO>Ht#|5^=bX9QG8{~FT$8A8{M$LYh+Z5zM};2 zZ!BTb+03jv&hgoijjigl`CE$2k9AXzTzk(X-|v0NZ=m8UpKrMOV2-Is>aRxf0_U;h zfVZR==KoRnY0w^yz~tO?@U=q-HCS+aJNJ(n9)Pc2@s-V2+6d6JL!YW{DbkHSyzyd;a}EwKcn}#+xz|(Vxv2L literal 0 HcmV?d00001 diff --git a/tests/field/test_parameter_component.py b/tests/field/test_parameter_component.py index bb7412773..bfff8493a 100644 --- a/tests/field/test_parameter_component.py +++ b/tests/field/test_parameter_component.py @@ -21,6 +21,11 @@ def test_parameter_component_alias_1(): assert r.units() == "K" assert r.standard_name() is None assert r.long_name() is None + assert r.chem_variable() is None + assert r.chem_long_name() is None + assert r.wavelength() is None + assert r.wave_direction() is None + assert r.wave_frequency() is None @pytest.mark.parametrize( @@ -40,6 +45,31 @@ def test_parameter_component_alias_1(): "long_name": "Temperature", }, ), + ( + { + "variable": "aod", + "standard_name": "unknown", + "long_name": "Aerosol optical depth", + "units": "Numeric", + "chem_variable": "aer_total", + "chem_long_name": "Total aerosol", + "wavelength": 550, + "wave_direction": None, + "wave_frequency": None, + }, + { + "variable": "aod", + "param": "aod", + "standard_name": "unknown", + "long_name": "Aerosol optical depth", + "units": "Numeric", + "chem_variable": "aer_total", + "chem_long_name": "Total aerosol", + "wavelength": 550, + "wave_direction": None, + "wave_frequency": None, + }, + ), ], ) def test_parameter_component_from_dict_ok(input_d, ref): @@ -56,6 +86,11 @@ def test_parameter_component_from_dict_ok(input_d, ref): assert r.units() == ref["units"] assert r.standard_name() == ref["standard_name"] assert r.long_name() == ref["long_name"] + assert r.chem_variable() == ref.get("chem_variable", None) + assert r.chem_long_name() == ref.get("chem_long_name", None) + assert r.wavelength() == ref.get("wavelength", None) + assert r.wave_direction() == ref.get("wave_direction", None) + assert r.wave_frequency() == ref.get("wave_frequency", None) @pytest.mark.parametrize( @@ -76,6 +111,62 @@ def test_parameter_component_from_dict_ok(input_d, ref): "variable": "t", "param": "t", "units": "K", + "standard_name": None, + "long_name": None, + "chem_variable": None, + "chem_long_name": None, + "wavelength": None, + "wave_direction": None, + "wave_frequency": None, + }, + ), + ( + [ + { + "param": "aod", + "standard_name": "unknown", + "long_name": "Aerosol optical depth", + "units": "Numeric", + "chem_variable": "aer_total", + "chem_long_name": "Total aerosol", + "wavelength": 550, + } + ], + { + "variable": "aod", + "param": "aod", + "standard_name": "unknown", + "long_name": "Aerosol optical depth", + "units": "Numeric", + "chem_variable": "aer_total", + "chem_long_name": "Total aerosol", + "wavelength": 550, + "wave_direction": None, + "wave_frequency": None, + }, + ), + ( + [ + { + "variable": "2dfd", + "units": "meter ** 2 * second / radian", + "standard_name": "unknown", + "long_name": "2D wave spectra (single)", + "wave_direction": 5.0, + "wave_frequency": 0.034523, + } + ], + { + "variable": "2dfd", + "param": "2dfd", + "standard_name": "unknown", + "long_name": "2D wave spectra (single)", + "units": "meter ** 2 * second / radian", + "chem_variable": None, + "chem_long_name": None, + "wavelength": None, + "wave_direction": 5.0, + "wave_frequency": 0.034523, }, ), ], @@ -97,3 +188,9 @@ def test_parameter_component_set(input_d, ref): # the original object is unchanged assert r.variable() == "p" assert r.units() == "Pa" + + +def test_parameter_component_wavelength_tuple(): + """Test wavelength as a tuple (wavelength range).""" + p = Parameter(variable="aod", wavelength=(400, 700)) + assert p.wavelength() == (400, 700) diff --git a/tests/grib/test_grib_parameter.py b/tests/grib/test_grib_parameter.py index 55b50eff4..437cd56a1 100644 --- a/tests/grib/test_grib_parameter.py +++ b/tests/grib/test_grib_parameter.py @@ -65,3 +65,71 @@ def test_grib_parameter_chem(fl_type): assert f.parameter.param() == "tcvimd" assert f.parameter.chem_variable() == "CO" assert f.parameter.units() == "kg m**-2" + + +@pytest.mark.parametrize("fl_type", FL_FILE) +def test_grib_parameter_chem_long_name(fl_type): + """Test chem_long_name extraction from CAMS chemistry GRIB2 data.""" + ds, _ = load_grib_data("chem-cams.grib", fl_type, folder="data") + + expected = [ + ("mass_mixrat", "CO", "Carbon monoxide"), + ("mass_mixrat", "HCHO", "Formaldehyde"), + ("mass_mixrat", "O3", "Ozone"), + ] + + assert len(ds) == 3 + for i, (var, chem, chem_long) in enumerate(expected): + f = ds[i] + assert f.parameter.variable() == var + assert f.parameter.param() == var + assert f.parameter.units() == "dimensionless" + assert f.parameter.chem_variable() == chem + assert f.parameter.chem_long_name() == chem_long + assert f.parameter.wavelength() is None + assert f.parameter.wave_direction() is None + assert f.parameter.wave_frequency() is None + + +@pytest.mark.parametrize("fl_type", FL_FILE) +def test_grib_parameter_wavelength(fl_type): + """Test wavelength extraction from CAMS optical GRIB2 data.""" + ds, _ = load_grib_data("optical-cams.grib", fl_type, folder="data") + + assert len(ds) == 4 + # All fields have aod variable with wavelength 550 or 800 + for f in ds: + assert f.parameter.wavelength() in (550, 800) + assert isinstance(f.parameter.wavelength(), int) + + result = ds.unique("parameter.wavelength") + assert set(result["parameter.wavelength"]) == {550, 800} + + +@pytest.mark.parametrize("fl_type", FL_FILE) +def test_grib_parameter_wave_direction(fl_type): + """Test wave_direction extraction from 2D wave spectra GRIB data.""" + ds, _ = load_grib_data("wave_spectra.grib", fl_type, folder="data") + + assert len(ds) == 18 + # All fields should have non-None wave_direction + for f in ds: + assert f.parameter.wave_direction() is not None + assert isinstance(f.parameter.wave_direction(), float) + + result = ds.unique("parameter.wave_direction") + assert set(result["parameter.wave_direction"]) == {55.0, 115.0, 175.0, 235.0, 295.0, 355.0} + + +@pytest.mark.parametrize("fl_type", FL_FILE) +def test_grib_parameter_wave_frequency(fl_type): + """Test wave_frequency extraction from 2D wave spectra GRIB data.""" + ds, _ = load_grib_data("wave_spectra.grib", fl_type, folder="data") + + result = ds.unique("parameter.wave_frequency") + freqs = result["parameter.wave_frequency"] + assert len(freqs) == 3 + # Check approximate values + assert abs(freqs[0] - 0.034523) < 0.001 + assert abs(freqs[1] - 0.1311) < 0.001 + assert abs(freqs[2] - 0.497852) < 0.001 From 1f42de38c1be9de588d1ec562d536c982ec702a8 Mon Sep 17 00:00:00 2001 From: Pawel Wolff Date: Tue, 2 Jun 2026 15:53:56 +0200 Subject: [PATCH 07/21] Docstring for `Parameter` class updated How-to notebook on extra_dims updated xr_engine tests for new dimensions added --- .../xr_engine/xarray_engine_extra_dims.ipynb | 6 +- .../data/field/component/parameter.py | 16 +- tests/xr_engine/test_xr_engine_dims.py | 296 +++++++++++++++--- 3 files changed, 267 insertions(+), 51 deletions(-) diff --git a/docs/source/how-tos/xr_engine/xarray_engine_extra_dims.ipynb b/docs/source/how-tos/xr_engine/xarray_engine_extra_dims.ipynb index ed14c63cc..c93aaf18f 100644 --- a/docs/source/how-tos/xr_engine/xarray_engine_extra_dims.ipynb +++ b/docs/source/how-tos/xr_engine/xarray_engine_extra_dims.ipynb @@ -963,9 +963,9 @@ " 2tp (quantile, forecast_reference_time, step, level, level_type, latitude, longitude) float64 12kB ...\n", "Attributes:\n", " Conventions: CF-1.8\n", - " institution: ECMWF
    • 2tp
      (quantile, forecast_reference_time, step, level, level_type, latitude, longitude)
      float64
      ...
      standard_name :
      unknown
      long_name :
      2 metre temperature probability
      units :
      percent
      level_type :
      surface
      [1512 values with dtype=float64]
  • Conventions :
    CF-1.8
    institution :
    ECMWF
  • " ], "text/plain": [ " Size: 13kB\n", @@ -1597,7 +1597,7 @@ " 2tp (quantile, latitude, longitude) float64 672B ...\n", "Attributes:\n", " Conventions: CF-1.8\n", - " institution: ECMWF" + " institution: ECMWF" ], "text/plain": [ " Size: 836B\n", diff --git a/src/earthkit/data/field/component/parameter.py b/src/earthkit/data/field/component/parameter.py index b8d202097..808e282ed 100644 --- a/src/earthkit/data/field/component/parameter.py +++ b/src/earthkit/data/field/component/parameter.py @@ -38,10 +38,10 @@ class ParameterBase(SimpleFieldComponent): on the CF standard name - "long_name": string representing the long name of the parameter variable - "units": as a string or a :class:`Units` object representing the parameter units - - "chem_variable": string representing the parameter chemical variable - - "chem_long_name": string representing the long name of the parameter chemical variable + - "chem_variable": string representing the parameter chemical variable, or None + - "chem_long_name": string representing the long name of the parameter chemical variable, or None - "wavelength": int representing the optical parameter wavelength in nanometers, - or a 2-tuple of ints representing the wavelength range in nanometers + or a 2-tuple of ints representing the wavelength range in nanometers, or None - "wave_direction": float representing the wave direction in degrees of the 2D spectra parameter, or None - "wave_frequency": float representing the wave frequency in Hz of the 2D spectra parameter, or None - "param": alias of "variable" @@ -292,12 +292,22 @@ class Parameter(ParameterBase): ---------- variable : str, optional The parameter variable, by default None. + standard_name : str, optional + The standard name of the parameter variable, by default None. + long_name : str, optional + The long name of the parameter variable, by default None. units : str or Units, optional The parameter units, by default None. Can be provided as a string or a Units object. chem_variable : str, optional The parameter chemical variable, by default None. + chem_long_name : str, optional + The long name of the parameter chemical variable, by default None. wavelength : int or 2-tuple of ints, optional The optical parameter wavelength in nanometers, or a wavelength range in nanometers, by default None. + wave_direction : float, optional + The wave direction in degrees of the 2D spectra parameter, by default None. + wave_frequency : float, optional + The wave frequency in Hz of the 2D spectra parameter, by default None. """ _chem_variable = None diff --git a/tests/xr_engine/test_xr_engine_dims.py b/tests/xr_engine/test_xr_engine_dims.py index bf0f676f5..7cda5e8a0 100644 --- a/tests/xr_engine/test_xr_engine_dims.py +++ b/tests/xr_engine/test_xr_engine_dims.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +import datetime # (C) Copyright 2020 ECMWF. # @@ -8,8 +9,6 @@ # granted to it by virtue of its status as an intergovernmental organisation # nor does it submit to any jurisdiction. # - - import pandas as pd import pytest from xr_engine_fixtures import ( @@ -22,6 +21,96 @@ from earthkit.data.utils.testing import earthkit_remote_test_data_file +@pytest.fixture(scope="module") +def pl_grib_source(): + print("HAHA pl.grib") + return from_source("url", earthkit_remote_test_data_file("xr_engine/level/pl.grib")) + + +@pytest.fixture(scope="module") +def pl_small_grib_source(): + print("HAHA pl_small.grib") + return from_source("url", earthkit_remote_test_data_file("xr_engine/level/pl_small.grib")) + + +@pytest.fixture(scope="module") +def pl_sfc_grib_source(): + print("HAHA pl_sfc.grib1") + return from_source("url", earthkit_remote_test_data_file("xr_engine/level/pl_sfc.grib1")) + + +@pytest.fixture(scope="module") +def quantiles_pd_grib_source(): + print("HAHA quantiles_pd.grib") + return from_source("url", earthkit_remote_test_data_file("xr_engine/ens/quantiles_pd.grib")) + + +@pytest.fixture(scope="module") +def wave_spectra_grib_source(): + print("HAHA wave_spectra.grib") + return from_source("url", earthkit_remote_test_data_file("xr_engine/wave_spectra.grib")) + + +@pytest.fixture(scope="module") +def aifs_sfc_grib_source(): + print("HAHA aifs-sfc.grib") + return from_source("url", earthkit_remote_test_data_file("xr_engine/level/aifs-sfc.grib")) + + +@pytest.fixture(scope="module") +def aifs_pl_sfc_grib_source(): + print("HAHA aifs-sfc.grib") + return from_source("url", earthkit_remote_test_data_file("xr_engine/level/aifs-pl_sfc.grib")) + + +@pytest.fixture(scope="module") +def chem_cams_grib_source(): + print("HAHA chem-cams.grib") + return from_source("sample", "chem-cams.grib") + + +@pytest.fixture(scope="module") +def optical_cams_grib_source(): + print("HAHA chem-cams.grib") + return from_source("sample", "optical-cams.grib") + + +@pytest.fixture(scope="module") +def source( + request, + pl_grib_source, + pl_small_grib_source, + pl_sfc_grib_source, + quantiles_pd_grib_source, + wave_spectra_grib_source, + aifs_sfc_grib_source, + aifs_pl_sfc_grib_source, + chem_cams_grib_source, + optical_cams_grib_source, +): + print(f"HAHA...: {request.param}") + if request.param == "pl.grib": + return pl_grib_source + elif request.param == "pl_small.grib": + return pl_small_grib_source + elif request.param == "pl_sfc.grib1": + return pl_sfc_grib_source + elif request.param == "quantiles_pd.grib": + return quantiles_pd_grib_source + elif request.param == "wave_spectra.grib": + return wave_spectra_grib_source + elif request.param == "aifs-sfc.grib": + return aifs_sfc_grib_source + elif request.param == "aifs-pl_sfc.grib": + return aifs_pl_sfc_grib_source + elif request.param == "chem-cams.grib": + return chem_cams_grib_source + elif request.param == "optical-cams.grib": + return optical_cams_grib_source + else: + raise ValueError(f"No fixture for {request.param}") + + @pytest.mark.cache @pytest.mark.parametrize("allow_holes", [False, True]) @pytest.mark.parametrize("lazy_load", [True, False]) @@ -47,8 +136,8 @@ ), ], ) -def test_xr_rename_dims(allow_holes, lazy_load, kwargs, dim_keys): - ds_ek = from_source("url", earthkit_remote_test_data_file("xr_engine/level/pl.grib")).to_fieldlist() +def test_xr_rename_dims(pl_grib_source, allow_holes, lazy_load, kwargs, dim_keys): + ds_ek = pl_grib_source.to_fieldlist() ds = ds_ek.to_xarray(allow_holes=allow_holes, lazy_load=lazy_load, **kwargs) num = len(ds) @@ -111,8 +200,8 @@ def test_xr_rename_dims(allow_holes, lazy_load, kwargs, dim_keys): ), ], ) -def test_xr_fixed_dims(allow_holes, lazy_load, kwargs, dim_keys): - ds_ek = from_source("url", earthkit_remote_test_data_file("xr_engine/level/pl.grib")).to_fieldlist() +def test_xr_fixed_dims(pl_grib_source, allow_holes, lazy_load, kwargs, dim_keys): + ds_ek = pl_grib_source.to_fieldlist() ds = ds_ek.to_xarray(allow_holes=allow_holes, lazy_load=lazy_load, **kwargs) num = len(ds) @@ -171,8 +260,8 @@ def test_xr_fixed_dims(allow_holes, lazy_load, kwargs, dim_keys): ), ], ) -def test_xr_drop_dims(allow_holes, lazy_load, kwargs, dim_keys): - ds_ek = from_source("url", earthkit_remote_test_data_file("xr_engine/level/pl.grib")).to_fieldlist() +def test_xr_drop_dims(pl_grib_source, allow_holes, lazy_load, kwargs, dim_keys): + ds_ek = pl_grib_source.to_fieldlist() ds = ds_ek.to_xarray(allow_holes=allow_holes, lazy_load=lazy_load, **kwargs) num = len(ds) @@ -187,10 +276,9 @@ def test_xr_drop_dims(allow_holes, lazy_load, kwargs, dim_keys): @pytest.mark.parametrize("allow_holes", [False, True]) @pytest.mark.parametrize("lazy_load", [True, False]) @pytest.mark.parametrize( - "path,sel,kwargs,coords,dims,var_attrs,global_attrs", + "sel,kwargs,coords,dims,var_attrs,global_attrs", [ ( - "level/pl_small.grib", { "metadata.shortName": "t", "metadata.dataDate": 20240603, @@ -228,7 +316,6 @@ def test_xr_drop_dims(allow_holes, lazy_load, kwargs, dim_keys): {"Conventions": "CF-1.8", "institution": "ECMWF"}, ), ( - "level/pl_small.grib", { "metadata.shortName": "t", "metadata.dataDate": 20240603, @@ -263,8 +350,10 @@ def test_xr_drop_dims(allow_holes, lazy_load, kwargs, dim_keys): ), ], ) -def test_xr_ensure_dims(allow_holes, lazy_load, path, sel, kwargs, coords, dims, var_attrs, global_attrs): - ds0 = from_source("url", earthkit_remote_test_data_file("xr_engine", path)).to_fieldlist() +def test_xr_ensure_dims( + pl_small_grib_source, allow_holes, lazy_load, sel, kwargs, coords, dims, var_attrs, global_attrs +): + ds0 = pl_small_grib_source.to_fieldlist() if sel: ds0 = ds0.sel(**sel) ds = ds0.to_xarray(lazy_load=lazy_load, allow_holes=allow_holes, **kwargs) @@ -282,10 +371,10 @@ def test_xr_ensure_dims(allow_holes, lazy_load, path, sel, kwargs, coords, dims, @pytest.mark.parametrize("allow_holes", [False, True]) @pytest.mark.parametrize("lazy_load", [True, False]) @pytest.mark.parametrize( - "path,sel,kwargs,coords,dims,var_attrs,global_attrs", + "source,sel,kwargs,coords,dims,var_attrs,global_attrs", [ ( - "level/pl_small.grib", + "pl_small.grib", None, { "profile": "grib", @@ -331,7 +420,7 @@ def test_xr_ensure_dims(allow_holes, lazy_load, path, sel, kwargs, coords, dims, {"Conventions": "CF-1.8", "institution": "ECMWF"}, ), ( - "level/pl_small.grib", + "pl_small.grib", None, { "profile": "grib", @@ -369,7 +458,7 @@ def test_xr_ensure_dims(allow_holes, lazy_load, path, sel, kwargs, coords, dims, {"Conventions": "CF-1.8", "institution": "ECMWF"}, ), ( - "level/pl_small.grib", + "pl_small.grib", None, { "profile": "grib", @@ -408,7 +497,7 @@ def test_xr_ensure_dims(allow_holes, lazy_load, path, sel, kwargs, coords, dims, {"Conventions": "CF-1.8", "institution": "ECMWF"}, ), ( - "ens/quantiles_pd.grib", + "quantiles_pd.grib", None, { "profile": "grib", @@ -459,6 +548,7 @@ def test_xr_ensure_dims(allow_holes, lazy_load, path, sel, kwargs, coords, dims, "profile": "grib", "time_dims": "valid_time", "extra_dims": ["metadata.directionNumber", "metadata.frequencyNumber"], + "drop_dims": ["wave_direction", "wave_frequency"], "squeeze": False, "add_earthkit_attrs": False, }, @@ -489,9 +579,10 @@ def test_xr_ensure_dims(allow_holes, lazy_load, path, sel, kwargs, coords, dims, {"Conventions": "CF-1.8", "institution": "ECMWF"}, ), ], + indirect=["source"], ) -def test_xr_extra_dims(allow_holes, lazy_load, path, sel, kwargs, coords, dims, var_attrs, global_attrs): - ds0 = from_source("url", earthkit_remote_test_data_file("xr_engine", path)).to_fieldlist() +def test_xr_extra_dims(source, allow_holes, lazy_load, sel, kwargs, coords, dims, var_attrs, global_attrs): + ds0 = source.to_fieldlist() if sel: ds0 = ds0.sel(**sel) ds = ds0.to_xarray(lazy_load=lazy_load, allow_holes=allow_holes, **kwargs) @@ -509,7 +600,7 @@ def test_xr_extra_dims(allow_holes, lazy_load, path, sel, kwargs, coords, dims, @pytest.mark.parametrize("allow_holes", [False, True]) @pytest.mark.parametrize("lazy_load", [True, False]) @pytest.mark.parametrize( - "path,sel,kwargs,coords,dims,var_attrs,global_attrs", + "source,sel,kwargs,coords,dims,var_attrs,global_attrs", [ ( "wave_spectra.grib", @@ -542,14 +633,126 @@ def test_xr_extra_dims(allow_holes, lazy_load, path, sel, kwargs, coords, dims, "long_name": "2D wave spectra (single)", "units": "meter ** 2 * second / radian", "typeOfLevel": "meanSea", - } + }, + "wave_direction": { + "standard_name": "sea_surface_wave_from_direction", + "long_name": "wave direction", + "units": "degrees", + }, + "wave_frequency": { + "standard_name": "sea_surface_wave_frequency", + "long_name": "wave frequency", + "units": "s-1", + }, + }, + {"Conventions": "CF-1.8", "institution": "ECMWF"}, + ), + ], + indirect=["source"], +) +def test_xr_wave_dims(source, allow_holes, lazy_load, sel, kwargs, coords, dims, var_attrs, global_attrs): + ds0 = source.to_fieldlist() + if sel: + ds0 = ds0.sel(**sel) + ds = ds0.to_xarray(lazy_load=lazy_load, allow_holes=allow_holes, **kwargs) + compare_coords(ds, coords) + compare_dims(ds, dims, sizes=True) + + for v in var_attrs: + v_attrs = dict(ds[v].attrs) + v_attrs.pop("_earthkit", None) + assert v_attrs == var_attrs[v] + assert ds.attrs == global_attrs + + +@pytest.mark.cache +@pytest.mark.parametrize("allow_holes", [False, True]) +@pytest.mark.parametrize("lazy_load", [True, False]) +@pytest.mark.parametrize( + "source,sel,kwargs,coords,dims,var_attrs,global_attrs", + [ + ( + "chem-cams.grib", + None, + { + "profile": "earthkit", + "squeeze": False, + "add_earthkit_attrs": False, + }, + { + "forecast_reference_time": [datetime.datetime(2011, 9, 29, 0, 0)], + "step": [datetime.timedelta(0)], + "level": [1], + "level_type": ["hybrid"], + "chem_variable": ["CO", "HCHO", "O3"], + }, + { + "forecast_reference_time": 1, + "step": 1, + "level": 1, + "level_type": 1, + "chem_variable": 3, + }, + { + "mass_mixrat": { + "standard_name": "mass_fraction_of_carbon_monoxide_in_air", + "long_name": "Mass mixing ratio", + "units": "dimensionless", + "level_type": "hybrid", + }, + "chem_variable": { + "long_name": "atmospheric chemical or physical constituent type", + }, + }, + {"Conventions": "CF-1.8", "institution": "ECMWF"}, + ), + ( + "optical-cams.grib", + None, + { + "profile": "earthkit", + "squeeze": False, + "add_earthkit_attrs": False, + }, + { + "forecast_reference_time": [datetime.datetime(2011, 9, 29, 0, 0)], + "step": [datetime.timedelta(0)], + "level": [0], + "level_type": ["surface"], + "chem_variable": ["aer_sm", "aer_total"], + "wavelength": [550, 800], + }, + { + "forecast_reference_time": 1, + "step": 1, + "level": 1, + "level_type": 1, + "chem_variable": 2, + "wavelength": 2, + }, + { + "aod": { + "standard_name": "unknown", + "long_name": "Aerosol optical depth", + "units": "Numeric", + "level_type": "surface", + }, + "chem_variable": { + "long_name": "atmospheric chemical or physical constituent type", + }, + "wavelength": { + "standard_name": "radiation_wavelength", + "long_name": "wavelength", + "units": "nm", + }, }, {"Conventions": "CF-1.8", "institution": "ECMWF"}, ), ], + indirect=["source"], ) -def test_xr_rare_builtin_dims(allow_holes, lazy_load, path, sel, kwargs, coords, dims, var_attrs, global_attrs): - ds0 = from_source("url", earthkit_remote_test_data_file("xr_engine", path)).to_fieldlist() +def test_xr_chem_dims(source, allow_holes, lazy_load, sel, kwargs, coords, dims, var_attrs, global_attrs): + ds0 = source.to_fieldlist() if sel: ds0 = ds0.sel(**sel) ds = ds0.to_xarray(lazy_load=lazy_load, allow_holes=allow_holes, **kwargs) @@ -566,10 +769,10 @@ def test_xr_rare_builtin_dims(allow_holes, lazy_load, path, sel, kwargs, coords, @pytest.mark.cache @pytest.mark.parametrize("lazy_load", [True, False]) @pytest.mark.parametrize( - "path,sel,kwargs,coords,dims,var_attrs,global_attrs", + "source,sel,kwargs,coords,dims,var_attrs,global_attrs", [ ( - "level/pl_sfc.grib1", + "pl_sfc.grib1", { "metadata.dataDate": 20240603, "metadata.dataTime": 0, @@ -596,7 +799,7 @@ def test_xr_rare_builtin_dims(allow_holes, lazy_load, path, sel, kwargs, coords, {}, ), ( - "level/pl_sfc.grib1", + "pl_sfc.grib1", None, { "profile": "grib", @@ -633,7 +836,7 @@ def test_xr_rare_builtin_dims(allow_holes, lazy_load, path, sel, kwargs, coords, {"gridType": "regular_ll"}, ), ( - "level/pl_sfc.grib1", + "pl_sfc.grib1", { "metadata.dataDate": 20240603, "metadata.dataTime": 0, @@ -666,7 +869,7 @@ def test_xr_rare_builtin_dims(allow_holes, lazy_load, path, sel, kwargs, coords, {}, ), ( - "level/pl_sfc.grib1", + "pl_sfc.grib1", { "metadata.dataDate": 20240603, "metadata.dataTime": 0, @@ -704,9 +907,10 @@ def test_xr_rare_builtin_dims(allow_holes, lazy_load, path, sel, kwargs, coords, {}, ), ], + indirect=["source"], ) -def test_xr_engine_level_per_type_dim(lazy_load, path, sel, kwargs, coords, dims, var_attrs, global_attrs): - ds0 = from_source("url", earthkit_remote_test_data_file("xr_engine", path)).to_fieldlist() +def test_xr_engine_level_per_type_dim(source, lazy_load, sel, kwargs, coords, dims, var_attrs, global_attrs): + ds0 = source.to_fieldlist() if sel: ds0 = ds0.sel(**sel) ds = ds0.to_xarray(lazy_load=lazy_load, **kwargs) @@ -724,10 +928,10 @@ def test_xr_engine_level_per_type_dim(lazy_load, path, sel, kwargs, coords, dims @pytest.mark.parametrize("allow_holes", [False, True]) @pytest.mark.parametrize("lazy_load", [True, False]) @pytest.mark.parametrize( - "path,sel,idx,kwargs,coords,dims,var_attrs,global_attrs", + "source,sel,idx,kwargs,coords,dims,var_attrs,global_attrs", [ ( - "level/pl_small.grib", + "pl_small.grib", { "metadata.shortName": ["t", "r"], "metadata.dataDate": 20240603, @@ -765,7 +969,7 @@ def test_xr_engine_level_per_type_dim(lazy_load, path, sel, kwargs, coords, dims {"Conventions": "CF-1.8", "institution": "ECMWF"}, ), ( - "level/pl_small.grib", + "pl_small.grib", { "metadata.shortName": ["t", "r"], "metadata.dataDate": 20240603, @@ -805,11 +1009,12 @@ def test_xr_engine_level_per_type_dim(lazy_load, path, sel, kwargs, coords, dims {"Conventions": "CF-1.8", "institution": "ECMWF"}, ), ], + indirect=["source"], ) def test_xr_engine_dims_as_attrs_1( - allow_holes, lazy_load, path, sel, idx, kwargs, coords, dims, var_attrs, global_attrs + source, allow_holes, lazy_load, sel, idx, kwargs, coords, dims, var_attrs, global_attrs ): - ds0 = from_source("url", earthkit_remote_test_data_file("xr_engine", path)).to_fieldlist() + ds0 = source.to_fieldlist() if sel: ds0 = ds0.sel(**sel) if idx: @@ -828,10 +1033,10 @@ def test_xr_engine_dims_as_attrs_1( @pytest.mark.cache @pytest.mark.parametrize("lazy_load", [True, False]) @pytest.mark.parametrize( - "path,sel,idx,kwargs,coords,dims,var_attrs,global_attrs", + "source,sel,idx,kwargs,coords,dims,var_attrs,global_attrs", [ ( - "level/aifs-sfc.grib", + "aifs-sfc.grib", None, None, { @@ -858,7 +1063,7 @@ def test_xr_engine_dims_as_attrs_1( {}, ), ( - "level/aifs-pl_sfc.grib", + "aifs-pl_sfc.grib", None, None, { @@ -887,7 +1092,7 @@ def test_xr_engine_dims_as_attrs_1( {}, ), ( - "level/aifs-sfc.grib", + "aifs-sfc.grib", None, None, { @@ -914,7 +1119,7 @@ def test_xr_engine_dims_as_attrs_1( {}, ), ( - "level/aifs-pl_sfc.grib", + "aifs-pl_sfc.grib", None, None, { @@ -943,7 +1148,7 @@ def test_xr_engine_dims_as_attrs_1( {}, ), ( - "level/aifs-sfc.grib", + "aifs-sfc.grib", None, None, { @@ -970,7 +1175,7 @@ def test_xr_engine_dims_as_attrs_1( {}, ), ( - "level/aifs-pl_sfc.grib", + "aifs-pl_sfc.grib", None, None, { @@ -999,9 +1204,10 @@ def test_xr_engine_dims_as_attrs_1( {}, ), ], + indirect=["source"], ) -def test_xr_engine_dims_as_attrs2(lazy_load, path, sel, idx, kwargs, coords, dims, var_attrs, global_attrs): - ds0 = from_source("url", earthkit_remote_test_data_file("xr_engine", path)).to_fieldlist() +def test_xr_engine_dims_as_attrs2(source, lazy_load, sel, idx, kwargs, coords, dims, var_attrs, global_attrs): + ds0 = source.to_fieldlist() if sel: ds0 = ds0.sel(**sel) if idx: From d59412a8965b4a2cb1c575285340bd1b82ba9d5d Mon Sep 17 00:00:00 2001 From: Pawel Wolff Date: Tue, 2 Jun 2026 16:01:35 +0200 Subject: [PATCH 08/21] Cleanup --- tests/xr_engine/test_xr_engine_dims.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/tests/xr_engine/test_xr_engine_dims.py b/tests/xr_engine/test_xr_engine_dims.py index 7cda5e8a0..6f234fb0c 100644 --- a/tests/xr_engine/test_xr_engine_dims.py +++ b/tests/xr_engine/test_xr_engine_dims.py @@ -23,55 +23,46 @@ @pytest.fixture(scope="module") def pl_grib_source(): - print("HAHA pl.grib") return from_source("url", earthkit_remote_test_data_file("xr_engine/level/pl.grib")) @pytest.fixture(scope="module") def pl_small_grib_source(): - print("HAHA pl_small.grib") return from_source("url", earthkit_remote_test_data_file("xr_engine/level/pl_small.grib")) @pytest.fixture(scope="module") def pl_sfc_grib_source(): - print("HAHA pl_sfc.grib1") return from_source("url", earthkit_remote_test_data_file("xr_engine/level/pl_sfc.grib1")) @pytest.fixture(scope="module") def quantiles_pd_grib_source(): - print("HAHA quantiles_pd.grib") return from_source("url", earthkit_remote_test_data_file("xr_engine/ens/quantiles_pd.grib")) @pytest.fixture(scope="module") def wave_spectra_grib_source(): - print("HAHA wave_spectra.grib") return from_source("url", earthkit_remote_test_data_file("xr_engine/wave_spectra.grib")) @pytest.fixture(scope="module") def aifs_sfc_grib_source(): - print("HAHA aifs-sfc.grib") return from_source("url", earthkit_remote_test_data_file("xr_engine/level/aifs-sfc.grib")) @pytest.fixture(scope="module") def aifs_pl_sfc_grib_source(): - print("HAHA aifs-sfc.grib") return from_source("url", earthkit_remote_test_data_file("xr_engine/level/aifs-pl_sfc.grib")) @pytest.fixture(scope="module") def chem_cams_grib_source(): - print("HAHA chem-cams.grib") return from_source("sample", "chem-cams.grib") @pytest.fixture(scope="module") def optical_cams_grib_source(): - print("HAHA chem-cams.grib") return from_source("sample", "optical-cams.grib") @@ -88,7 +79,6 @@ def source( chem_cams_grib_source, optical_cams_grib_source, ): - print(f"HAHA...: {request.param}") if request.param == "pl.grib": return pl_grib_source elif request.param == "pl_small.grib": From 4e7c6685a26f0a3119cdeaed4e0d51d884e244ed Mon Sep 17 00:00:00 2001 From: Pawel Wolff Date: Fri, 5 Jun 2026 19:20:26 +0200 Subject: [PATCH 09/21] chem_variable -> chem --- docs/source/concepts/xarray/dim.rst | 6 +- .../xr_engine/xarray_engine_chem.ipynb | 8 +-- .../data/field/component/parameter.py | 62 +++++++++---------- src/earthkit/data/field/grib/parameter.py | 2 +- src/earthkit/data/field/mars/parameter.py | 4 +- src/earthkit/data/field/xarray/parameter.py | 2 +- src/earthkit/data/xr_engine/dim.py | 6 +- .../data/xr_engine/profiles/defaults.yaml | 4 +- .../data/xr_engine/profiles/earthkit.yaml | 2 +- .../data/xr_engine/profiles/grib.yaml | 2 +- .../data/xr_engine/profiles/mars.yaml | 2 +- tests/field/test_parameter_component.py | 16 ++--- tests/grib/test_grib_parameter.py | 4 +- tests/xr_engine/test_xr_engine_dims.py | 12 ++-- 14 files changed, 66 insertions(+), 66 deletions(-) diff --git a/docs/source/concepts/xarray/dim.rst b/docs/source/concepts/xarray/dim.rst index af165950c..772269fb0 100644 --- a/docs/source/concepts/xarray/dim.rst +++ b/docs/source/concepts/xarray/dim.rst @@ -35,9 +35,9 @@ The predefined dimensions are based on the ``dim_roles``, which is a mapping bet * - "member" - Ensemble forecast member - "ensemble.member" - * - "chem_variable" + * - "chem" - Aerosol type, or chemical or physical constituent type - - "parameter.chem_variable" + - "parameter.chem" * - "wavelength" - Optical wavelength in nanometers (e.g. for aerosol optical depth) - "parameter.wavelength" @@ -159,7 +159,7 @@ Chemical and optical dimensions The following dimensions are applicable for chemical and optical parameters (see for example CAMS datasets): -- ``"chem_variable"``: Indicates an aerosol type, chemical specie, etc. (for example, for the parameter representing *mass mixing ratio*, the coordinates can be ``"CO"``, ``"O3"``, etc.). +- ``"chem"``: Indicates an aerosol type, chemical specie, etc. (for example, for the parameter representing *mass mixing ratio*, the coordinates can be ``"CO"``, ``"O3"``, etc.). - ``"wavelength"`` (*nm*): Wavelength at which the optical parameter is measured, modelled or reported. diff --git a/docs/source/how-tos/xr_engine/xarray_engine_chem.ipynb b/docs/source/how-tos/xr_engine/xarray_engine_chem.ipynb index 420311ec4..81c8dc7d3 100644 --- a/docs/source/how-tos/xr_engine/xarray_engine_chem.ipynb +++ b/docs/source/how-tos/xr_engine/xarray_engine_chem.ipynb @@ -27,10 +27,10 @@ "id": "c1d2e3f4", "metadata": {}, "source": [ - "### Chemical variable dimension\n", + "### Chemical dimension\n", "\n", "Parameters which involve aerosol type or chemical constituent type should have a relevant metadata exposed via\n", - "earthkit's `parameter.chem_variable` and `parameter.chem_long_name` metadata keys. To illustrate this, consider\n", + "earthkit's `parameter.chem` and `parameter.chem_long_name` metadata keys. To illustrate this, consider\n", "the following CAMS atmospheric composition dataset:" ] }, @@ -120,7 +120,7 @@ } ], "source": [ - "fl_chem.ls(keys=[\"parameter.variable\", \"parameter.chem_variable\", \"parameter.chem_long_name\"])" + "fl_chem.ls(keys=[\"parameter.variable\", \"parameter.chem\", \"parameter.chem_long_name\"])" ] }, { @@ -400,7 +400,7 @@ "id": "843a0476-db89-4c2d-a8b8-745ed7f927b0", "metadata": {}, "source": [ - "The conversion into Xarray constructs the ``\"chem_variable\"`` dimension automatically:" + "The conversion into Xarray constructs the ``\"chem\"`` dimension automatically:" ] }, { diff --git a/src/earthkit/data/field/component/parameter.py b/src/earthkit/data/field/component/parameter.py index 808e282ed..8dd5d18a5 100644 --- a/src/earthkit/data/field/component/parameter.py +++ b/src/earthkit/data/field/component/parameter.py @@ -26,10 +26,10 @@ class ParameterBase(SimpleFieldComponent): This class defines the interface for parameter components, which can represent different types of parameter information. Some of the methods may not be applicable to all parameter - types (e.g. :meth:`chem_variable`), and may return None. + types (e.g. :meth:`chem`), and may return None. The parameter information can be accessed by methods like :meth:`variable`, - :meth:`units`, and :meth:`chem_variable`. Each of these methods has an associated key + :meth:`units`, and :meth:`chem`. Each of these methods has an associated key that can be used in the :meth:`get` method to retrieve the corresponding information. The list of supported keys are as follows: @@ -38,8 +38,8 @@ class ParameterBase(SimpleFieldComponent): on the CF standard name - "long_name": string representing the long name of the parameter variable - "units": as a string or a :class:`Units` object representing the parameter units - - "chem_variable": string representing the parameter chemical variable, or None - - "chem_long_name": string representing the long name of the parameter chemical variable, or None + - "chem": string representing the parameter chemical constituent or aerosol type, or None + - "chem_long_name": string representing the long name of the parameter chemical constituent or aerosol type, or None - "wavelength": int representing the optical parameter wavelength in nanometers, or a 2-tuple of ints representing the wavelength range in nanometers, or None - "wave_direction": float representing the wave direction in degrees of the 2D spectra parameter, or None @@ -47,7 +47,7 @@ class ParameterBase(SimpleFieldComponent): - "param": alias of "variable" Depending on the type of parameter information available, some of these keys may not be supported - and will return None in the subclasses. For example, the "chem_variable" key is only supported + and will return None in the subclasses. For example, the "chem" key is only supported for chemical parameters, and will return None for other parameter types. Typically, this object is used as a component of a field, and can be accessed via the :attr:`parameter` @@ -98,21 +98,21 @@ def variable(self) -> Optional[str]: def units(self) -> Optional["Units"]: r"""Return the parameter units. - The parameter units are :class:`Units` objects. The units are are based on Pint (when possible) + The parameter units are :class:`Units` objects. The units are based on Pint (when possible) and are normalised to a standard form. They can be used for unit conversions and comparisons. """ pass @mark_get_key @abstractmethod - def chem_variable(self) -> Optional[str]: - r"""Return the parameter chemical variable.""" + def chem(self) -> Optional[str]: + r"""Return the parameter chemical constituent or aerosol type.""" pass @mark_get_key @abstractmethod def chem_long_name(self) -> Optional[str]: - r"""Return the long name of the parameter chemical variable.""" + r"""Return the long name of the parameter chemical constituent or aerosol type.""" pass @mark_alias("variable") @@ -179,7 +179,7 @@ def create_parameter(d: dict) -> "ParameterBase": allowed_keys=( "variable", "units", - "chem_variable", + "chem", "chem_long_name", "standard_name", "long_name", @@ -225,15 +225,15 @@ def units(self) -> None: """ return None - def chem_variable(self) -> None: - r"""Return the parameter chemical variable. + def chem(self) -> None: + r"""Return the parameter chemical constituent or aerosol type. An EmptyParameter does not contain any parameter information, and this method returns None. """ return None def chem_long_name(self) -> None: - r"""Return the long name of the parameter chemical variable. + r"""Return the long name of the parameter chemical constituent or aerosol type. An EmptyParameter does not contain any parameter information, and this method returns None. """ @@ -298,10 +298,10 @@ class Parameter(ParameterBase): The long name of the parameter variable, by default None. units : str or Units, optional The parameter units, by default None. Can be provided as a string or a Units object. - chem_variable : str, optional - The parameter chemical variable, by default None. + chem : str, optional + The parameter chemical constituent or aerosol type, by default None. chem_long_name : str, optional - The long name of the parameter chemical variable, by default None. + The long name of the parameter chemical constituent or aerosol type, by default None. wavelength : int or 2-tuple of ints, optional The optical parameter wavelength in nanometers, or a wavelength range in nanometers, by default None. wave_direction : float, optional @@ -310,7 +310,7 @@ class Parameter(ParameterBase): The wave frequency in Hz of the 2D spectra parameter, by default None. """ - _chem_variable = None + _chem = None _chem_long_name = None _wavelength = None _wave_direction = None @@ -322,7 +322,7 @@ def __init__( standard_name: str = None, long_name: str = None, units: Union[str, "Units"] = None, - chem_variable: str = None, + chem: str = None, chem_long_name: str = None, wavelength: Union[int, tuple[int, int]] = None, wave_direction: float = None, @@ -332,8 +332,8 @@ def __init__( self._standard_name = standard_name self._long_name = long_name self._units = Units.from_any(units) - if chem_variable is not None: - self._chem_variable = chem_variable + if chem is not None: + self._chem = chem if chem_long_name is not None: self._chem_long_name = chem_long_name if wavelength is not None: @@ -355,8 +355,8 @@ def long_name(self) -> Optional[str]: def units(self) -> Optional["Units"]: return self._units - def chem_variable(self) -> Optional[str]: - return self._chem_variable + def chem(self) -> Optional[str]: + return self._chem def chem_long_name(self) -> Optional[str]: return self._chem_long_name @@ -385,8 +385,8 @@ def from_dict(cls, d: dict) -> "Parameter": - "standard_name": The standard name of the parameter variable. - "long_name": The long name of the parameter variable. - "units": The parameter units, as a string or a Units object. - - "chem_variable": The chemical variable of the parameter. - - "chem_long_name": The long name of the chemical variable of the parameter. + - "chem": The chemical constituent or aerosol type of the parameter. + - "chem_long_name": The long name of the chemical constituent or aerosol type of the parameter. - "wavelength": The optical parameter wavelength in nanometers, or a wavelength range in nanometers, as an int or a 2-tuple of ints. - "wave_direction": The wave direction in degrees of the 2D spectra parameter. @@ -405,7 +405,7 @@ def to_dict(self): "standard_name": self._standard_name, "long_name": self._long_name, "units": str(self._units), - "chem_variable": self._chem_variable, + "chem": self._chem, "chem_long_name": self._chem_long_name, "wavelength": self._wavelength, "wave_direction": self._wave_direction, @@ -418,7 +418,7 @@ def __getstate__(self): state["standard_name"] = self._standard_name state["long_name"] = self._long_name state["units"] = str(self._units) - state["chem_variable"] = self._chem_variable + state["chem"] = self._chem state["chem_long_name"] = self._chem_long_name state["wavelength"] = self._wavelength state["wave_direction"] = self._wave_direction @@ -431,7 +431,7 @@ def __setstate__(self, state): standard_name=state["standard_name"], long_name=state["long_name"], units=state["units"], - chem_variable=state["chem_variable"], + chem=state["chem"], chem_long_name=state["chem_long_name"], wavelength=state["wavelength"], wave_direction=state.get("wave_direction"), @@ -454,8 +454,8 @@ def set(self, *args, **kwargs): - "units": The parameter units, as a string or a Units object. - "standard_name": The standard name of the parameter variable. - "long_name": The long name of the parameter variable. - - "chem_variable": The chemical variable of the parameter. - - "chem_long_name": The long name of the chemical variable of the parameter. + - "chem": The chemical constituent or aerosol type of the parameter. + - "chem_long_name": The long name of the chemical constituent or aerosol type of the parameter. - "wavelength": The optical parameter wavelength in nanometers, or a wavelength range in nanometers. - "wave_direction": The wave direction in degrees of the 2D spectra parameter. - "wave_frequency": The wave frequency in Hz of the 2D spectra parameter. @@ -465,7 +465,7 @@ def set(self, *args, **kwargs): allowed_keys=( "variable", "units", - "chem_variable", + "chem", "chem_long_name", "standard_name", "long_name", @@ -481,7 +481,7 @@ def set(self, *args, **kwargs): "standard_name": self._standard_name, "long_name": self._long_name, "units": self._units, - "chem_variable": self._chem_variable, + "chem": self._chem, "chem_long_name": self._chem_long_name, "wavelength": self._wavelength, "wave_direction": self._wave_direction, diff --git a/src/earthkit/data/field/grib/parameter.py b/src/earthkit/data/field/grib/parameter.py index b7e3ff09a..8a89ff9da 100644 --- a/src/earthkit/data/field/grib/parameter.py +++ b/src/earthkit/data/field/grib/parameter.py @@ -93,7 +93,7 @@ def _scale_value(v, scaling_factor): standard_name=standard_name, long_name=long_name, units=units, - chem_variable=chem_name, + chem=chem_name, chem_long_name=chem_long_name, wavelength=wavelength, wave_direction=wave_direction, diff --git a/src/earthkit/data/field/mars/parameter.py b/src/earthkit/data/field/mars/parameter.py index d9e0b8033..95970d56c 100644 --- a/src/earthkit/data/field/mars/parameter.py +++ b/src/earthkit/data/field/mars/parameter.py @@ -28,8 +28,8 @@ def build(request, build_empty=False): @staticmethod def _build_dict(request): - # TODO: add chem_variable and wavelength? - # TODO: chem_variable would require an unaliasing table "grib-chemid.csv" + # TODO: add chem and wavelength? + # TODO: chem would require an unaliasing table "grib-chemid.csv" param = request.get("param", None) if param is None: diff --git a/src/earthkit/data/field/xarray/parameter.py b/src/earthkit/data/field/xarray/parameter.py index fb26af66d..31925fcab 100644 --- a/src/earthkit/data/field/xarray/parameter.py +++ b/src/earthkit/data/field/xarray/parameter.py @@ -35,7 +35,7 @@ def __init__(self, owner, selection=None) -> None: standard_name = owner.variable.attrs.get("standard_name", "unknown") long_name = owner.variable.attrs.get("long_name", "unknown") units = owner.variable.attrs.get("units", None) - # TODO: add "chem_variable", "wavelength", "wave_direction", "wave_frequency" + # TODO: add "chem", "wavelength", "wave_direction", "wave_frequency" # would need a similar mechanism to the one in the field/xarray/ensemble.py module p = Parameter.from_dict(dict(variable=name, standard_name=standard_name, long_name=long_name, units=units)) super().__init__(p) diff --git a/src/earthkit/data/xr_engine/dim.py b/src/earthkit/data/xr_engine/dim.py index 98ba3a397..1365d550b 100644 --- a/src/earthkit/data/xr_engine/dim.py +++ b/src/earthkit/data/xr_engine/dim.py @@ -48,7 +48,7 @@ def _get_metadata_keys(keys): _ENS_KEYS = ["member", "realisation", "realization"] ENS_KEYS = ["member"] + _get_component_keys("ensemble", _ENS_KEYS) + _get_metadata_keys(_GRIB_ENS_KEYS) -CHEM_KEYS = ["chem_variable"] + _get_component_keys("parameter", ["chem_variable"]) +CHEM_KEYS = ["chem"] + _get_component_keys("parameter", ["chem"]) WAVELENGTH_KEYS = ["wavelength"] + _get_component_keys("parameter", ["wavelength"]) WAVE_DIRECTION_KEYS = ["wave_direction"] + _get_component_keys("parameter", ["wave_direction"]) WAVE_FREQUENCY_KEYS = ["wave_frequency"] + _get_component_keys("parameter", ["wave_frequency"]) @@ -483,7 +483,7 @@ class DimRole: "level_type", "forecast_reference_time", "valid_time", - "chem_variable", + "chem", "wavelength", "wave_direction", "wave_frequency", @@ -623,7 +623,7 @@ class MemberDimBuilder(DimBuilder): class ChemDimBuilder(DimBuilder): - name = "chem_variable" + name = "chem" dim_class = ChemDim diff --git a/src/earthkit/data/xr_engine/profiles/defaults.yaml b/src/earthkit/data/xr_engine/profiles/defaults.yaml index 9cd79a634..3b23f2094 100644 --- a/src/earthkit/data/xr_engine/profiles/defaults.yaml +++ b/src/earthkit/data/xr_engine/profiles/defaults.yaml @@ -47,7 +47,7 @@ dim_roles: time: time.base_time level: vertical.level level_type: vertical.level_type - chem_variable: parameter.chem_variable + chem: parameter.chem wavelength: parameter.wavelength wave_direction: parameter.wave_direction wave_frequency: parameter.wave_frequency @@ -56,7 +56,7 @@ coord_attrs: member: standard_name: realization long_name: ensemble member id - chem_variable: + chem: long_name: atmospheric chemical or physical constituent type wavelength: standard_name: radiation_wavelength diff --git a/src/earthkit/data/xr_engine/profiles/earthkit.yaml b/src/earthkit/data/xr_engine/profiles/earthkit.yaml index ac0cf42b1..9324aecdd 100644 --- a/src/earthkit/data/xr_engine/profiles/earthkit.yaml +++ b/src/earthkit/data/xr_engine/profiles/earthkit.yaml @@ -7,7 +7,7 @@ dim_roles: time: time.base_time level: vertical.level level_type: vertical.level_type - chem_variable: parameter.chem_variable + chem: parameter.chem wavelength: parameter.wavelength wave_direction: parameter.wave_direction wave_frequency: parameter.wave_frequency diff --git a/src/earthkit/data/xr_engine/profiles/grib.yaml b/src/earthkit/data/xr_engine/profiles/grib.yaml index e88cc0206..356e12a7e 100644 --- a/src/earthkit/data/xr_engine/profiles/grib.yaml +++ b/src/earthkit/data/xr_engine/profiles/grib.yaml @@ -7,7 +7,7 @@ dim_roles: time: metadata.time level: metadata.level level_type: metadata.typeOfLevel - chem_variable: parameter.chem_variable + chem: parameter.chem wavelength: parameter.wavelength wave_direction: parameter.wave_direction wave_frequency: parameter.wave_frequency diff --git a/src/earthkit/data/xr_engine/profiles/mars.yaml b/src/earthkit/data/xr_engine/profiles/mars.yaml index 5196c0c71..6362dc15b 100644 --- a/src/earthkit/data/xr_engine/profiles/mars.yaml +++ b/src/earthkit/data/xr_engine/profiles/mars.yaml @@ -7,7 +7,7 @@ dim_roles: time: metadata.time level: metadata.levelist level_type: metadata.levtype - chem_variable: parameter.chem_variable + chem: parameter.chem wavelength: parameter.wavelength wave_direction: parameter.wave_direction wave_frequency: parameter.wave_frequency diff --git a/tests/field/test_parameter_component.py b/tests/field/test_parameter_component.py index bfff8493a..7fe823b03 100644 --- a/tests/field/test_parameter_component.py +++ b/tests/field/test_parameter_component.py @@ -21,7 +21,7 @@ def test_parameter_component_alias_1(): assert r.units() == "K" assert r.standard_name() is None assert r.long_name() is None - assert r.chem_variable() is None + assert r.chem() is None assert r.chem_long_name() is None assert r.wavelength() is None assert r.wave_direction() is None @@ -51,7 +51,7 @@ def test_parameter_component_alias_1(): "standard_name": "unknown", "long_name": "Aerosol optical depth", "units": "Numeric", - "chem_variable": "aer_total", + "chem": "aer_total", "chem_long_name": "Total aerosol", "wavelength": 550, "wave_direction": None, @@ -63,7 +63,7 @@ def test_parameter_component_alias_1(): "standard_name": "unknown", "long_name": "Aerosol optical depth", "units": "Numeric", - "chem_variable": "aer_total", + "chem": "aer_total", "chem_long_name": "Total aerosol", "wavelength": 550, "wave_direction": None, @@ -86,7 +86,7 @@ def test_parameter_component_from_dict_ok(input_d, ref): assert r.units() == ref["units"] assert r.standard_name() == ref["standard_name"] assert r.long_name() == ref["long_name"] - assert r.chem_variable() == ref.get("chem_variable", None) + assert r.chem() == ref.get("chem", None) assert r.chem_long_name() == ref.get("chem_long_name", None) assert r.wavelength() == ref.get("wavelength", None) assert r.wave_direction() == ref.get("wave_direction", None) @@ -113,7 +113,7 @@ def test_parameter_component_from_dict_ok(input_d, ref): "units": "K", "standard_name": None, "long_name": None, - "chem_variable": None, + "chem": None, "chem_long_name": None, "wavelength": None, "wave_direction": None, @@ -127,7 +127,7 @@ def test_parameter_component_from_dict_ok(input_d, ref): "standard_name": "unknown", "long_name": "Aerosol optical depth", "units": "Numeric", - "chem_variable": "aer_total", + "chem": "aer_total", "chem_long_name": "Total aerosol", "wavelength": 550, } @@ -138,7 +138,7 @@ def test_parameter_component_from_dict_ok(input_d, ref): "standard_name": "unknown", "long_name": "Aerosol optical depth", "units": "Numeric", - "chem_variable": "aer_total", + "chem": "aer_total", "chem_long_name": "Total aerosol", "wavelength": 550, "wave_direction": None, @@ -162,7 +162,7 @@ def test_parameter_component_from_dict_ok(input_d, ref): "standard_name": "unknown", "long_name": "2D wave spectra (single)", "units": "meter ** 2 * second / radian", - "chem_variable": None, + "chem": None, "chem_long_name": None, "wavelength": None, "wave_direction": 5.0, diff --git a/tests/grib/test_grib_parameter.py b/tests/grib/test_grib_parameter.py index 437cd56a1..068058f37 100644 --- a/tests/grib/test_grib_parameter.py +++ b/tests/grib/test_grib_parameter.py @@ -63,7 +63,7 @@ def test_grib_parameter_chem(fl_type): assert f.parameter.variable() == "tcvimd" assert f.parameter.param() == "tcvimd" - assert f.parameter.chem_variable() == "CO" + assert f.parameter.chem() == "CO" assert f.parameter.units() == "kg m**-2" @@ -84,7 +84,7 @@ def test_grib_parameter_chem_long_name(fl_type): assert f.parameter.variable() == var assert f.parameter.param() == var assert f.parameter.units() == "dimensionless" - assert f.parameter.chem_variable() == chem + assert f.parameter.chem() == chem assert f.parameter.chem_long_name() == chem_long assert f.parameter.wavelength() is None assert f.parameter.wave_direction() is None diff --git a/tests/xr_engine/test_xr_engine_dims.py b/tests/xr_engine/test_xr_engine_dims.py index 6f234fb0c..66349c667 100644 --- a/tests/xr_engine/test_xr_engine_dims.py +++ b/tests/xr_engine/test_xr_engine_dims.py @@ -674,14 +674,14 @@ def test_xr_wave_dims(source, allow_holes, lazy_load, sel, kwargs, coords, dims, "step": [datetime.timedelta(0)], "level": [1], "level_type": ["hybrid"], - "chem_variable": ["CO", "HCHO", "O3"], + "chem": ["CO", "HCHO", "O3"], }, { "forecast_reference_time": 1, "step": 1, "level": 1, "level_type": 1, - "chem_variable": 3, + "chem": 3, }, { "mass_mixrat": { @@ -690,7 +690,7 @@ def test_xr_wave_dims(source, allow_holes, lazy_load, sel, kwargs, coords, dims, "units": "dimensionless", "level_type": "hybrid", }, - "chem_variable": { + "chem": { "long_name": "atmospheric chemical or physical constituent type", }, }, @@ -709,7 +709,7 @@ def test_xr_wave_dims(source, allow_holes, lazy_load, sel, kwargs, coords, dims, "step": [datetime.timedelta(0)], "level": [0], "level_type": ["surface"], - "chem_variable": ["aer_sm", "aer_total"], + "chem": ["aer_sm", "aer_total"], "wavelength": [550, 800], }, { @@ -717,7 +717,7 @@ def test_xr_wave_dims(source, allow_holes, lazy_load, sel, kwargs, coords, dims, "step": 1, "level": 1, "level_type": 1, - "chem_variable": 2, + "chem": 2, "wavelength": 2, }, { @@ -727,7 +727,7 @@ def test_xr_wave_dims(source, allow_holes, lazy_load, sel, kwargs, coords, dims, "units": "Numeric", "level_type": "surface", }, - "chem_variable": { + "chem": { "long_name": "atmospheric chemical or physical constituent type", }, "wavelength": { From 559e5052ac8d7d0f9594b24516c26ffc653c8873 Mon Sep 17 00:00:00 2001 From: Pawel Wolff Date: Fri, 5 Jun 2026 22:15:36 +0200 Subject: [PATCH 10/21] Vertical: __print__() -> __str__() --- src/earthkit/data/field/component/vertical.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/earthkit/data/field/component/vertical.py b/src/earthkit/data/field/component/vertical.py index 8ca890ec7..12dce4d2a 100644 --- a/src/earthkit/data/field/component/vertical.py +++ b/src/earthkit/data/field/component/vertical.py @@ -313,8 +313,8 @@ def positive(self) -> Optional[str]: def level_type(self) -> str: return self._type.name - def __print__(self) -> str: - return f"{self.level} {self.units} ({self.abbreviation})" + def __str__(self) -> str: + return f"{self.level()} {self.units()} ({self.abbreviation()})" def __repr__(self) -> str: return f"{self.__class__.__name__}(level={self.level()}, units={self.units()}, level_type={self._type.name})" From 7d42c329c47bb3f28d026a50df54b35fbf846328 Mon Sep 17 00:00:00 2001 From: Pawel Wolff Date: Mon, 8 Jun 2026 19:00:42 +0200 Subject: [PATCH 11/21] Parameter is subclassed --- src/earthkit/data/core/field.py | 4 +- .../data/field/component/parameter.py | 367 +++++++++++++----- src/earthkit/data/field/geotiff/create.py | 4 +- src/earthkit/data/field/geotiff/parameter.py | 4 +- src/earthkit/data/field/grib/parameter.py | 15 +- src/earthkit/data/field/mars/parameter.py | 10 +- src/earthkit/data/field/xarray/parameter.py | 4 +- tests/field/test_parameter_component.py | 117 +++++- 8 files changed, 423 insertions(+), 102 deletions(-) diff --git a/src/earthkit/data/core/field.py b/src/earthkit/data/core/field.py index 337ea5b7f..60d377915 100644 --- a/src/earthkit/data/core/field.py +++ b/src/earthkit/data/core/field.py @@ -1729,8 +1729,8 @@ def describe(self, component=all, filter=None, **kwargs): Returns ------- NamespaceDump - Dict-like object with one item s per component. In a Jupyter notebook represented - as a tabbed interface to browse the dump con stents. + Dict-like object with one item per component. In a Jupyter notebook represented + as a tabbed interface to browse the dump contents. See Also diff --git a/src/earthkit/data/field/component/parameter.py b/src/earthkit/data/field/component/parameter.py index 8dd5d18a5..b869a8846 100644 --- a/src/earthkit/data/field/component/parameter.py +++ b/src/earthkit/data/field/component/parameter.py @@ -9,6 +9,7 @@ from __future__ import annotations +import inspect from abc import abstractmethod from typing import TYPE_CHECKING, Optional, Union @@ -160,6 +161,14 @@ def wave_frequency(self) -> Optional[float]: def create_parameter(d: dict) -> "ParameterBase": """Create a ParameterBase object from a dictionary. + The appropriate subclass is determined automatically based on the dictionary contents: + + - If both ``chem`` and ``wavelength`` are present, a :class:`ChemicalOpticalParameter` is created. + - If only ``chem`` is present, a :class:`ChemicalParameter` is created. + - If only ``wavelength`` is present, an :class:`OpticalParameter` is created. + - If ``wave_direction`` or ``wave_frequency`` is present, a :class:`WaveSpectraParameter` is created. + - Otherwise, a :class:`Parameter` is created. + Parameters ---------- d : dict @@ -168,7 +177,7 @@ def create_parameter(d: dict) -> "ParameterBase": Returns ------- ParameterBase - The created ParameterBase instance. + The created parameter instance. The actual type depends on the dictionary contents. """ if not isinstance(d, dict): raise TypeError(f"Cannot create Parameter from {type(d)}, expected dict") @@ -191,7 +200,27 @@ def create_parameter(d: dict) -> "ParameterBase": if "variable" not in d1: raise ValueError("Cannot create Parameter without variable") - return cls(**d1) + has_chem = d1.get("chem") is not None + has_wavelength = d1.get("wavelength") is not None + has_wave_spectra = d1.get("wave_direction") is not None or d1.get("wave_frequency") is not None + + if has_chem and has_wavelength: + cls = ChemicalOpticalParameter + elif has_chem: + cls = ChemicalParameter + elif has_wavelength: + cls = OpticalParameter + elif has_wave_spectra: + cls = WaveSpectraParameter + else: + cls = Parameter + + # Filter d1 to only include keys accepted by the chosen class's __init__ + sig = inspect.signature(cls.__init__) + valid_params = set(sig.parameters.keys()) - {"self"} + filtered = {k: v for k, v in d1.items() if k in valid_params} + + return cls(**filtered) class EmptyParameter(ParameterBase): @@ -286,7 +315,13 @@ def __setstate__(self, state): class Parameter(ParameterBase): - """Parameter component representing parameter information. + """Parameter component representing a regular parameter. + + A regular parameter is one that does not have any chemical, optical, or wave spectra + properties. For parameters with chemical constituents, use :class:`ChemicalParameter`. + For parameters with optical wavelength information, use :class:`OpticalParameter`. + For parameters with both chemical and optical properties, use :class:`ChemicalOpticalParameter`. + For parameters with wave spectra properties, use :class:`WaveSpectraParameter`. Parameters ---------- @@ -298,50 +333,19 @@ class Parameter(ParameterBase): The long name of the parameter variable, by default None. units : str or Units, optional The parameter units, by default None. Can be provided as a string or a Units object. - chem : str, optional - The parameter chemical constituent or aerosol type, by default None. - chem_long_name : str, optional - The long name of the parameter chemical constituent or aerosol type, by default None. - wavelength : int or 2-tuple of ints, optional - The optical parameter wavelength in nanometers, or a wavelength range in nanometers, by default None. - wave_direction : float, optional - The wave direction in degrees of the 2D spectra parameter, by default None. - wave_frequency : float, optional - The wave frequency in Hz of the 2D spectra parameter, by default None. """ - _chem = None - _chem_long_name = None - _wavelength = None - _wave_direction = None - _wave_frequency = None - def __init__( self, variable: str = None, standard_name: str = None, long_name: str = None, units: Union[str, "Units"] = None, - chem: str = None, - chem_long_name: str = None, - wavelength: Union[int, tuple[int, int]] = None, - wave_direction: float = None, - wave_frequency: float = None, ) -> None: self._variable = variable self._standard_name = standard_name self._long_name = long_name self._units = Units.from_any(units) - if chem is not None: - self._chem = chem - if chem_long_name is not None: - self._chem_long_name = chem_long_name - if wavelength is not None: - self._wavelength = wavelength - if wave_direction is not None: - self._wave_direction = wave_direction - if wave_frequency is not None: - self._wave_frequency = wave_frequency def variable(self) -> Optional[str]: return self._variable @@ -355,24 +359,46 @@ def long_name(self) -> Optional[str]: def units(self) -> Optional["Units"]: return self._units - def chem(self) -> Optional[str]: - return self._chem + def chem(self) -> None: + r"""Return the parameter chemical constituent or aerosol type. - def chem_long_name(self) -> Optional[str]: - return self._chem_long_name + A regular Parameter does not have chemical information, and this method returns None. + """ + return None - def wavelength(self) -> Optional[Union[int, tuple[int, int]]]: - return self._wavelength + def chem_long_name(self) -> None: + r"""Return the long name of the parameter chemical constituent or aerosol type. - def wave_direction(self) -> Optional[float]: - return self._wave_direction + A regular Parameter does not have chemical information, and this method returns None. + """ + return None - def wave_frequency(self) -> Optional[float]: - return self._wave_frequency + def wavelength(self) -> None: + r"""Return the optical parameter wavelength or wavelength interval in nanometers. + + A regular Parameter does not have optical information, and this method returns None. + """ + return None + + def wave_direction(self) -> None: + r"""Return the wave direction in degrees of the 2D spectra parameter. + + A regular Parameter does not have wave spectra information, and this method returns None. + """ + return None + + def wave_frequency(self) -> None: + r"""Return the wave frequency in Hz of the 2D spectra parameter. + + A regular Parameter does not have wave spectra information, and this method returns None. + """ + return None @classmethod - def from_dict(cls, d: dict) -> "Parameter": - """Create a Parameter object from a dictionary. + def from_dict(cls, d: dict) -> "ParameterBase": + """Create a parameter object from a dictionary. + + The appropriate subclass is determined automatically based on the dictionary contents. Parameters ---------- @@ -394,53 +420,34 @@ def from_dict(cls, d: dict) -> "Parameter": Returns ------- - Parameter - The created Parameter instance. + ParameterBase + The created parameter instance. The actual type depends on the dictionary contents: + :class:`ChemicalOpticalParameter`, :class:`ChemicalParameter`, + :class:`OpticalParameter`, :class:`WaveSpectraParameter`, or :class:`Parameter`. """ return create_parameter(d) def to_dict(self): + """Return a dictionary representation of the parameter.""" return { "variable": self._variable, "standard_name": self._standard_name, "long_name": self._long_name, "units": str(self._units), - "chem": self._chem, - "chem_long_name": self._chem_long_name, - "wavelength": self._wavelength, - "wave_direction": self._wave_direction, - "wave_frequency": self._wave_frequency, } def __getstate__(self): - state = {} - state["variable"] = self._variable - state["standard_name"] = self._standard_name - state["long_name"] = self._long_name - state["units"] = str(self._units) - state["chem"] = self._chem - state["chem_long_name"] = self._chem_long_name - state["wavelength"] = self._wavelength - state["wave_direction"] = self._wave_direction - state["wave_frequency"] = self._wave_frequency - return state + return self.to_dict() def __setstate__(self, state): - self.__init__( - variable=state["variable"], - standard_name=state["standard_name"], - long_name=state["long_name"], - units=state["units"], - chem=state["chem"], - chem_long_name=state["chem_long_name"], - wavelength=state["wavelength"], - wave_direction=state.get("wave_direction"), - wave_frequency=state.get("wave_frequency"), - ) + self.__init__(**state) def set(self, *args, **kwargs): """Create a new instance with updated data. + The returned instance type is determined by the resulting dictionary contents, + which may differ from the current instance type. + Parameters ---------- args : tuple @@ -476,17 +483,201 @@ def set(self, *args, **kwargs): **kwargs, ) - current = { - "variable": self._variable, - "standard_name": self._standard_name, - "long_name": self._long_name, - "units": self._units, - "chem": self._chem, - "chem_long_name": self._chem_long_name, - "wavelength": self._wavelength, - "wave_direction": self._wave_direction, - "wave_frequency": self._wave_frequency, - } - + current = self.to_dict() current.update(d) - return self.from_dict(current) + return create_parameter(current) + + +class ChemicalParameter(Parameter): + """Parameter component representing a chemical parameter. + + A chemical parameter includes a chemical constituent or aerosol type identifier. + For parameters that also have optical wavelength information, use + :class:`ChemicalOpticalParameter`. + + Parameters + ---------- + variable : str, optional + The parameter variable, by default None. + standard_name : str, optional + The standard name of the parameter variable, by default None. + long_name : str, optional + The long name of the parameter variable, by default None. + units : str or Units, optional + The parameter units, by default None. Can be provided as a string or a Units object. + chem : str, optional + The parameter chemical constituent or aerosol type, by default None. + chem_long_name : str, optional + The long name of the parameter chemical constituent or aerosol type, by default None. + """ + + def __init__( + self, + variable: str = None, + standard_name: str = None, + long_name: str = None, + units: Union[str, "Units"] = None, + chem: str = None, + chem_long_name: str = None, + ) -> None: + Parameter.__init__(self, variable=variable, standard_name=standard_name, long_name=long_name, units=units) + self._chem = chem + self._chem_long_name = chem_long_name + + def chem(self) -> Optional[str]: + r"""Return the parameter chemical constituent or aerosol type.""" + return self._chem + + def chem_long_name(self) -> Optional[str]: + r"""Return the long name of the parameter chemical constituent or aerosol type.""" + return self._chem_long_name + + def to_dict(self): + """Return a dictionary representation of the chemical parameter.""" + d = Parameter.to_dict(self) + d["chem"] = self._chem + d["chem_long_name"] = self._chem_long_name + return d + + +class OpticalParameter(Parameter): + """Parameter component representing an optical parameter. + + An optical parameter includes a wavelength or wavelength range but no chemical + constituent. For parameters that have both chemical and optical properties, use + :class:`ChemicalOpticalParameter`. + + Parameters + ---------- + variable : str, optional + The parameter variable, by default None. + standard_name : str, optional + The standard name of the parameter variable, by default None. + long_name : str, optional + The long name of the parameter variable, by default None. + units : str or Units, optional + The parameter units, by default None. Can be provided as a string or a Units object. + wavelength : int or 2-tuple of ints, optional + The optical parameter wavelength in nanometers, or a wavelength range in nanometers, by default None. + """ + + def __init__( + self, + variable: str = None, + standard_name: str = None, + long_name: str = None, + units: Union[str, "Units"] = None, + wavelength: Union[int, tuple[int, int]] = None, + ) -> None: + Parameter.__init__(self, variable=variable, standard_name=standard_name, long_name=long_name, units=units) + self._wavelength = wavelength + + def wavelength(self) -> Optional[Union[int, tuple[int, int]]]: + r"""Return the optical parameter wavelength or wavelength interval in nanometers.""" + return self._wavelength + + def to_dict(self): + """Return a dictionary representation of the optical parameter.""" + d = Parameter.to_dict(self) + d["wavelength"] = self._wavelength + return d + + +class ChemicalOpticalParameter(ChemicalParameter, OpticalParameter): + """Parameter component representing a chemical-optical parameter. + + A chemical-optical parameter includes both a chemical constituent or aerosol type + and an optical wavelength or wavelength range. It inherits chemical properties from + :class:`ChemicalParameter` and optical properties from :class:`OpticalParameter`. + + Parameters + ---------- + variable : str, optional + The parameter variable, by default None. + standard_name : str, optional + The standard name of the parameter variable, by default None. + long_name : str, optional + The long name of the parameter variable, by default None. + units : str or Units, optional + The parameter units, by default None. Can be provided as a string or a Units object. + chem : str, optional + The parameter chemical constituent or aerosol type, by default None. + chem_long_name : str, optional + The long name of the parameter chemical constituent or aerosol type, by default None. + wavelength : int or 2-tuple of ints, optional + The optical parameter wavelength in nanometers, or a wavelength range in nanometers, by default None. + """ + + def __init__( + self, + variable: str = None, + standard_name: str = None, + long_name: str = None, + units: Union[str, "Units"] = None, + chem: str = None, + chem_long_name: str = None, + wavelength: Union[int, tuple[int, int]] = None, + ) -> None: + Parameter.__init__(self, variable=variable, standard_name=standard_name, long_name=long_name, units=units) + self._chem = chem + self._chem_long_name = chem_long_name + self._wavelength = wavelength + + def to_dict(self): + """Return a dictionary representation of the chemical-optical parameter.""" + d = Parameter.to_dict(self) + d["chem"] = self._chem + d["chem_long_name"] = self._chem_long_name + d["wavelength"] = self._wavelength + return d + + +class WaveSpectraParameter(Parameter): + """Parameter component representing a wave spectra parameter. + + A wave spectra parameter includes wave direction and/or wave frequency information + from 2D wave spectra fields. + + Parameters + ---------- + variable : str, optional + The parameter variable, by default None. + standard_name : str, optional + The standard name of the parameter variable, by default None. + long_name : str, optional + The long name of the parameter variable, by default None. + units : str or Units, optional + The parameter units, by default None. Can be provided as a string or a Units object. + wave_direction : float, optional + The wave direction in degrees of the 2D spectra parameter, by default None. + wave_frequency : float, optional + The wave frequency in Hz of the 2D spectra parameter, by default None. + """ + + def __init__( + self, + variable: str = None, + standard_name: str = None, + long_name: str = None, + units: Union[str, "Units"] = None, + wave_direction: float = None, + wave_frequency: float = None, + ) -> None: + Parameter.__init__(self, variable=variable, standard_name=standard_name, long_name=long_name, units=units) + self._wave_direction = wave_direction + self._wave_frequency = wave_frequency + + def wave_direction(self) -> Optional[float]: + r"""Return the wave direction in degrees of the 2D spectra parameter.""" + return self._wave_direction + + def wave_frequency(self) -> Optional[float]: + r"""Return the wave frequency in Hz of the 2D spectra parameter.""" + return self._wave_frequency + + def to_dict(self): + """Return a dictionary representation of the wave spectra parameter.""" + d = Parameter.to_dict(self) + d["wave_direction"] = self._wave_direction + d["wave_frequency"] = self._wave_frequency + return d diff --git a/src/earthkit/data/field/geotiff/create.py b/src/earthkit/data/field/geotiff/create.py index d52d29610..809842620 100644 --- a/src/earthkit/data/field/geotiff/create.py +++ b/src/earthkit/data/field/geotiff/create.py @@ -11,13 +11,13 @@ def create_geotiff_field(band, da): r"""Create a Field object from GeoTIFF Xarray dataarray.""" from earthkit.data.core.field import Field - from earthkit.data.field.component.parameter import Parameter + from earthkit.data.field.component.parameter import create_parameter from earthkit.data.field.geotiff.data import GeoTIFFData from earthkit.data.field.geotiff.geography import GeoTIFFGeography from earthkit.data.field.handler.labels import SimpleLabels data = GeoTIFFData(da) - parameter = Parameter.from_dict({"variable": da.name}) + parameter = create_parameter({"variable": da.name}) geography = GeoTIFFGeography(da) labels = SimpleLabels(band=band, **da.attrs) diff --git a/src/earthkit/data/field/geotiff/parameter.py b/src/earthkit/data/field/geotiff/parameter.py index a1b791529..fd1b078e3 100644 --- a/src/earthkit/data/field/geotiff/parameter.py +++ b/src/earthkit/data/field/geotiff/parameter.py @@ -9,7 +9,7 @@ import logging -from earthkit.data.field.component.parameter import Parameter +from earthkit.data.field.component.parameter import create_parameter from earthkit.data.field.handler.parameter import ParameterFieldComponent LOG = logging.getLogger(__name__) @@ -33,5 +33,5 @@ def __init__(self, owner, selection=None) -> None: # self.owner = owner name = owner.name units = owner.variable.attrs.get("units", None) - spec = Parameter.from_dict(dict(variable=name, units=units)) + spec = create_parameter(dict(variable=name, units=units)) super().__init__(spec) diff --git a/src/earthkit/data/field/grib/parameter.py b/src/earthkit/data/field/grib/parameter.py index 8a89ff9da..e73d1ac8c 100644 --- a/src/earthkit/data/field/grib/parameter.py +++ b/src/earthkit/data/field/grib/parameter.py @@ -12,13 +12,24 @@ class GribParameterBuilder: + """Builder for creating parameter components from GRIB message handles. + + This builder extracts parameter metadata from GRIB messages and creates the appropriate + parameter component subclass (:class:`~earthkit.data.field.component.parameter.Parameter`, + :class:`~earthkit.data.field.component.parameter.ChemicalParameter`, + :class:`~earthkit.data.field.component.parameter.OpticalParameter`, + :class:`~earthkit.data.field.component.parameter.ChemicalOpticalParameter`, or + :class:`~earthkit.data.field.component.parameter.WaveSpectraParameter`) based on the + metadata contents. + """ + @staticmethod def build(handle): - from earthkit.data.field.component.parameter import Parameter + from earthkit.data.field.component.parameter import create_parameter from earthkit.data.field.handler.parameter import ParameterFieldComponentHandler d = GribParameterBuilder._build_dict(handle) - component = Parameter.from_dict(d) + component = create_parameter(d) handler = ParameterFieldComponentHandler.from_component(component) return handler diff --git a/src/earthkit/data/field/mars/parameter.py b/src/earthkit/data/field/mars/parameter.py index 95970d56c..81f4d18e9 100644 --- a/src/earthkit/data/field/mars/parameter.py +++ b/src/earthkit/data/field/mars/parameter.py @@ -13,16 +13,22 @@ class MarsParameterBuilder: + """Builder for creating parameter components from MARS requests. + + This builder extracts parameter metadata from MARS request dictionaries and creates + the appropriate parameter component subclass using :func:`create_parameter`. + """ + @staticmethod def build(request, build_empty=False): - from earthkit.data.field.component.parameter import Parameter + from earthkit.data.field.component.parameter import create_parameter from earthkit.data.field.handler.parameter import ParameterFieldComponentHandler d = MarsParameterBuilder._build_dict(request) if not d and not build_empty: return None - component = Parameter.from_dict(d) + component = create_parameter(d) handler = ParameterFieldComponentHandler.from_component(component) return handler diff --git a/src/earthkit/data/field/xarray/parameter.py b/src/earthkit/data/field/xarray/parameter.py index 31925fcab..07b9f6a2f 100644 --- a/src/earthkit/data/field/xarray/parameter.py +++ b/src/earthkit/data/field/xarray/parameter.py @@ -9,7 +9,7 @@ import logging -from earthkit.data.field.component.parameter import Parameter +from earthkit.data.field.component.parameter import create_parameter from earthkit.data.field.handler.parameter import ParameterFieldComponentHandler LOG = logging.getLogger(__name__) @@ -37,5 +37,5 @@ def __init__(self, owner, selection=None) -> None: units = owner.variable.attrs.get("units", None) # TODO: add "chem", "wavelength", "wave_direction", "wave_frequency" # would need a similar mechanism to the one in the field/xarray/ensemble.py module - p = Parameter.from_dict(dict(variable=name, standard_name=standard_name, long_name=long_name, units=units)) + p = create_parameter(dict(variable=name, standard_name=standard_name, long_name=long_name, units=units)) super().__init__(p) diff --git a/tests/field/test_parameter_component.py b/tests/field/test_parameter_component.py index 7fe823b03..4e071c111 100644 --- a/tests/field/test_parameter_component.py +++ b/tests/field/test_parameter_component.py @@ -11,7 +11,14 @@ import pytest -from earthkit.data.field.component.parameter import Parameter +from earthkit.data.field.component.parameter import ( + ChemicalOpticalParameter, + ChemicalParameter, + OpticalParameter, + Parameter, + WaveSpectraParameter, + create_parameter, +) def test_parameter_component_alias_1(): @@ -192,5 +199,111 @@ def test_parameter_component_set(input_d, ref): def test_parameter_component_wavelength_tuple(): """Test wavelength as a tuple (wavelength range).""" - p = Parameter(variable="aod", wavelength=(400, 700)) + p = OpticalParameter(variable="aod", wavelength=(400, 700)) assert p.wavelength() == (400, 700) + + +def test_parameter_component_create_parameter_regular(): + """Test create_parameter returns a Parameter for regular parameters.""" + p = create_parameter({"variable": "t", "units": "K"}) + assert isinstance(p, Parameter) + assert p.variable() == "t" + assert p.units() == "K" + assert p.chem() is None + assert p.wavelength() is None + assert p.wave_direction() is None + assert p.wave_frequency() is None + + +def test_parameter_component_create_parameter_chemical(): + """Test create_parameter returns a ChemicalParameter for chemical parameters.""" + p = create_parameter({"variable": "co", "units": "kg/kg", "chem": "carbon_monoxide", "chem_long_name": "CO"}) + assert isinstance(p, ChemicalParameter) + assert p.variable() == "co" + assert p.chem() == "carbon_monoxide" + assert p.chem_long_name() == "CO" + assert p.wavelength() is None + assert p.wave_direction() is None + + +def test_parameter_component_create_parameter_optical(): + """Test create_parameter returns an OpticalParameter for optical parameters.""" + p = create_parameter({"variable": "aod", "units": "Numeric", "wavelength": 550}) + assert isinstance(p, OpticalParameter) + assert p.variable() == "aod" + assert p.wavelength() == 550 + assert p.chem() is None + + +def test_parameter_component_create_parameter_chemical_optical(): + """Test create_parameter returns a ChemicalOpticalParameter for chemical-optical parameters.""" + p = create_parameter({ + "variable": "aod", + "units": "Numeric", + "chem": "aer_total", + "chem_long_name": "Total aerosol", + "wavelength": 550, + }) + assert isinstance(p, ChemicalOpticalParameter) + assert p.variable() == "aod" + assert p.chem() == "aer_total" + assert p.chem_long_name() == "Total aerosol" + assert p.wavelength() == 550 + assert p.wave_direction() is None + + +def test_parameter_component_create_parameter_wave_spectra(): + """Test create_parameter returns a WaveSpectraParameter for wave spectra parameters.""" + p = create_parameter({ + "variable": "2dfd", + "units": "m**2 s / rad", + "wave_direction": 5.0, + "wave_frequency": 0.034523, + }) + assert isinstance(p, WaveSpectraParameter) + assert p.variable() == "2dfd" + assert p.wave_direction() == 5.0 + assert p.wave_frequency() == 0.034523 + assert p.chem() is None + assert p.wavelength() is None + + +def test_parameter_component_set_changes_type(): + """Test that set() can change the parameter type.""" + p = Parameter(variable="t", units="K") + assert isinstance(p, Parameter) + + # Add chem -> becomes ChemicalParameter + p2 = p.set(variable="co", chem="carbon_monoxide") + assert isinstance(p2, ChemicalParameter) + assert p2.chem() == "carbon_monoxide" + + # Add wavelength to chem -> becomes ChemicalOpticalParameter + p3 = p2.set(wavelength=550) + assert isinstance(p3, ChemicalOpticalParameter) + assert p3.chem() == "carbon_monoxide" + assert p3.wavelength() == 550 + + +def test_parameter_component_inheritance(): + """Test that subclasses have the correct inheritance relationships.""" + cp = ChemicalParameter(variable="co", chem="co") + op = OpticalParameter(variable="aod", wavelength=550) + cop = ChemicalOpticalParameter(variable="aod", chem="aer", wavelength=550) + wp = WaveSpectraParameter(variable="2dfd", wave_direction=5.0) + + # All are instances of Parameter + assert isinstance(cp, Parameter) + assert isinstance(op, Parameter) + assert isinstance(cop, Parameter) + assert isinstance(wp, Parameter) + + # ChemicalOpticalParameter is both Chemical and Optical + assert isinstance(cop, ChemicalParameter) + assert isinstance(cop, OpticalParameter) + + # But not cross-contaminated + assert not isinstance(cp, OpticalParameter) + assert not isinstance(op, ChemicalParameter) + assert not isinstance(wp, ChemicalParameter) + assert not isinstance(wp, OpticalParameter) From c3f698d65f1bca99ade7b3f471aae687494f39dc Mon Sep 17 00:00:00 2001 From: Pawel Wolff Date: Tue, 9 Jun 2026 14:46:14 +0200 Subject: [PATCH 12/21] Inspection of the `__init__()` signature of `ParameterBase` subclasses moved from `create_parameter()` to `SimpleFieldComponent.__init_subclass__()` hook Validation of keys for a component `__init__()` factored out into `SimpleFieldComponent._create_component()` TODO: use the above pattern for other components --- .../data/field/component/component.py | 26 +++++- .../data/field/component/parameter.py | 79 +++++++++---------- 2 files changed, 60 insertions(+), 45 deletions(-) diff --git a/src/earthkit/data/field/component/component.py b/src/earthkit/data/field/component/component.py index c5e766913..9048c582c 100644 --- a/src/earthkit/data/field/component/component.py +++ b/src/earthkit/data/field/component/component.py @@ -6,8 +6,7 @@ # granted to it by virtue of its status as an intergovernmental organisation # nor does it submit to any jurisdiction. # - - +import inspect from abc import ABCMeta, abstractmethod from functools import wraps @@ -223,6 +222,13 @@ def __setstate__(self, state): class SimpleFieldComponent(FieldComponent): _KEYS = tuple() _ALIASES = dict() + # keys accepted by a class __init__(); set by __init_subclass__() for every subclass + _ALLOWED_CREATE_KEYS = None + + def __init_subclass__(cls): + super().__init_subclass__() + sig = inspect.signature(cls.__init__) + cls._ALLOWED_CREATE_KEYS = tuple(key for key in sig.parameters.keys() if key not in {"self", "args", "kwargs"}) def __contains__(self, name): """Check if the key is in the component.""" @@ -314,3 +320,19 @@ def _normalise_set_kwargs(cls, *args, allowed_keys=None, **kwargs): _kwargs[k] = v return _kwargs + + @classmethod + def _create_component(cls, d: dict): + filtered_keys = {} + not_allowed_keys = {} + allowed_keys = cls._ALLOWED_CREATE_KEYS + for k, v in d.items(): + if k in allowed_keys: + filtered_keys[k] = v + elif v is not None: + not_allowed_keys[k] = v + + if not_allowed_keys: + raise ValueError(f"Cannot create {cls.__name__} with {not_allowed_keys}. Allowed keys are: {allowed_keys}") + + return cls(**filtered_keys) diff --git a/src/earthkit/data/field/component/parameter.py b/src/earthkit/data/field/component/parameter.py index b869a8846..21452dda4 100644 --- a/src/earthkit/data/field/component/parameter.py +++ b/src/earthkit/data/field/component/parameter.py @@ -9,7 +9,6 @@ from __future__ import annotations -import inspect from abc import abstractmethod from typing import TYPE_CHECKING, Optional, Union @@ -157,8 +156,41 @@ def wave_frequency(self) -> Optional[float]: """Return the wave frequency in Hz of the 2D spectra parameter.""" pass + @classmethod + def from_dict(cls, d: dict) -> "ParameterBase": + """Create a parameter object from a dictionary. + + The appropriate subclass is determined automatically based on the dictionary contents. + + Parameters + ---------- + d : dict + Dictionary containing parameter data. + + The dictionary can contain the following keys: + + - "variable": The parameter variable. + - "standard_name": The standard name of the parameter variable. + - "long_name": The long name of the parameter variable. + - "units": The parameter units, as a string or a Units object. + - "chem": The chemical constituent or aerosol type of the parameter. + - "chem_long_name": The long name of the chemical constituent or aerosol type of the parameter. + - "wavelength": The optical parameter wavelength in nanometers, or a wavelength range in nanometers, + as an int or a 2-tuple of ints. + - "wave_direction": The wave direction in degrees of the 2D spectra parameter. + - "wave_frequency": The wave frequency in Hz of the 2D spectra parameter. -def create_parameter(d: dict) -> "ParameterBase": + Returns + ------- + ParameterBase + The created parameter instance. The actual type depends on the dictionary contents: + :class:`ChemicalOpticalParameter`, :class:`ChemicalParameter`, + :class:`OpticalParameter`, :class:`WaveSpectraParameter`, or :class:`Parameter`. + """ + return create_parameter(d) + + +def create_parameter(d: dict) -> ParameterBase: """Create a ParameterBase object from a dictionary. The appropriate subclass is determined automatically based on the dictionary contents: @@ -182,8 +214,7 @@ def create_parameter(d: dict) -> "ParameterBase": if not isinstance(d, dict): raise TypeError(f"Cannot create Parameter from {type(d)}, expected dict") - cls = Parameter - d1 = cls._normalise_create_kwargs( + d1 = ParameterBase._normalise_create_kwargs( d, allowed_keys=( "variable", @@ -215,12 +246,7 @@ def create_parameter(d: dict) -> "ParameterBase": else: cls = Parameter - # Filter d1 to only include keys accepted by the chosen class's __init__ - sig = inspect.signature(cls.__init__) - valid_params = set(sig.parameters.keys()) - {"self"} - filtered = {k: v for k, v in d1.items() if k in valid_params} - - return cls(**filtered) + return cls._create_component(d1) class EmptyParameter(ParameterBase): @@ -394,39 +420,6 @@ def wave_frequency(self) -> None: """ return None - @classmethod - def from_dict(cls, d: dict) -> "ParameterBase": - """Create a parameter object from a dictionary. - - The appropriate subclass is determined automatically based on the dictionary contents. - - Parameters - ---------- - d : dict - Dictionary containing parameter data. - - The dictionary can contain the following keys: - - - "variable": The parameter variable. - - "standard_name": The standard name of the parameter variable. - - "long_name": The long name of the parameter variable. - - "units": The parameter units, as a string or a Units object. - - "chem": The chemical constituent or aerosol type of the parameter. - - "chem_long_name": The long name of the chemical constituent or aerosol type of the parameter. - - "wavelength": The optical parameter wavelength in nanometers, or a wavelength range in nanometers, - as an int or a 2-tuple of ints. - - "wave_direction": The wave direction in degrees of the 2D spectra parameter. - - "wave_frequency": The wave frequency in Hz of the 2D spectra parameter. - - Returns - ------- - ParameterBase - The created parameter instance. The actual type depends on the dictionary contents: - :class:`ChemicalOpticalParameter`, :class:`ChemicalParameter`, - :class:`OpticalParameter`, :class:`WaveSpectraParameter`, or :class:`Parameter`. - """ - return create_parameter(d) - def to_dict(self): """Return a dictionary representation of the parameter.""" return { From d0e25789dac700dc3d1ba938abf017b212d5f646 Mon Sep 17 00:00:00 2001 From: Pawel Wolff Date: Wed, 10 Jun 2026 00:35:14 +0200 Subject: [PATCH 13/21] New metadata keys added to the parameter component of Field: wavelength_bounds, wave_direction_index, wave_direction_bounds, wave_frequency_index, wave_frequency_bounds --- .../data/field/component/parameter.py | 168 +++++++++++++++++- src/earthkit/data/field/grib/parameter.py | 124 +++++++++---- 2 files changed, 257 insertions(+), 35 deletions(-) diff --git a/src/earthkit/data/field/component/parameter.py b/src/earthkit/data/field/component/parameter.py index 21452dda4..5bc1aae47 100644 --- a/src/earthkit/data/field/component/parameter.py +++ b/src/earthkit/data/field/component/parameter.py @@ -42,8 +42,14 @@ class ParameterBase(SimpleFieldComponent): - "chem_long_name": string representing the long name of the parameter chemical constituent or aerosol type, or None - "wavelength": int representing the optical parameter wavelength in nanometers, or a 2-tuple of ints representing the wavelength range in nanometers, or None + - "wavelength_bounds": 2-tuple of ints representing the optical parameter wavelength bounds + in nanometers, or None - "wave_direction": float representing the wave direction in degrees of the 2D spectra parameter, or None + - "wave_direction_index": int representing the 0-based index of the wave direction bin, or None + - "wave_direction_bounds": 2-tuple of floats representing the wave direction bounds in degrees, or None - "wave_frequency": float representing the wave frequency in Hz of the 2D spectra parameter, or None + - "wave_frequency_index": int representing the 0-based index of the wave frequency bin, or None + - "wave_frequency_bounds": 2-tuple of floats representing the wave frequency bounds in Hz, or None - "param": alias of "variable" Depending on the type of parameter information available, some of these keys may not be supported @@ -144,18 +150,48 @@ def wavelength(self) -> Optional[Union[int, tuple[int, int]]]: """Return the optical parameter wavelength or wavelength interval in nanometers.""" pass + @mark_get_key + @abstractmethod + def wavelength_bounds(self) -> Optional[tuple[int, int]]: + """Return the optical parameter wavelength bounds in nanometers.""" + pass + @mark_get_key @abstractmethod def wave_direction(self) -> Optional[float]: """Return the wave direction in degrees of the 2D spectra parameter.""" pass + @mark_get_key + @abstractmethod + def wave_direction_index(self) -> Optional[int]: + """Return the 0-based index of the wave direction bin.""" + pass + + @mark_get_key + @abstractmethod + def wave_direction_bounds(self) -> Optional[tuple[float, float]]: + """Return the wave direction bounds in degrees of the 2D spectra parameter.""" + pass + @mark_get_key @abstractmethod def wave_frequency(self) -> Optional[float]: """Return the wave frequency in Hz of the 2D spectra parameter.""" pass + @mark_get_key + @abstractmethod + def wave_frequency_index(self) -> Optional[int]: + """Return the 0-based index of the wave frequency bin.""" + pass + + @mark_get_key + @abstractmethod + def wave_frequency_bounds(self) -> Optional[tuple[float, float]]: + """Return the wave frequency bounds in Hz of the 2D spectra parameter.""" + pass + @classmethod def from_dict(cls, d: dict) -> "ParameterBase": """Create a parameter object from a dictionary. @@ -214,7 +250,7 @@ def create_parameter(d: dict) -> ParameterBase: if not isinstance(d, dict): raise TypeError(f"Cannot create Parameter from {type(d)}, expected dict") - d1 = ParameterBase._normalise_create_kwargs( + d1 = Parameter._normalise_create_kwargs( d, allowed_keys=( "variable", @@ -224,8 +260,13 @@ def create_parameter(d: dict) -> ParameterBase: "standard_name", "long_name", "wavelength", + "wavelength_bounds", "wave_direction", + "wave_direction_index", + "wave_direction_bounds", "wave_frequency", + "wave_frequency_index", + "wave_frequency_bounds", ), ) if "variable" not in d1: @@ -301,6 +342,13 @@ def wavelength(self) -> None: """ return None + def wavelength_bounds(self) -> None: + r"""Return the optical parameter wavelength bounds in nanometers. + + An EmptyParameter does not contain any parameter information, and this method returns None. + """ + return None + def wave_direction(self) -> None: r"""Return the wave direction in degrees of the 2D spectra parameter. @@ -308,6 +356,20 @@ def wave_direction(self) -> None: """ return None + def wave_direction_index(self) -> None: + r"""Return the 0-based index of the wave direction bin. + + An EmptyParameter does not contain any parameter information, and this method returns None. + """ + return None + + def wave_direction_bounds(self) -> None: + r"""Return the wave direction bounds in degrees of the 2D spectra parameter. + + An EmptyParameter does not contain any parameter information, and this method returns None. + """ + return None + def wave_frequency(self) -> None: r"""Return the wave frequency in Hz of the 2D spectra parameter. @@ -315,6 +377,20 @@ def wave_frequency(self) -> None: """ return None + def wave_frequency_index(self) -> None: + r"""Return the 0-based index of the wave frequency bin. + + An EmptyParameter does not contain any parameter information, and this method returns None. + """ + return None + + def wave_frequency_bounds(self) -> None: + r"""Return the wave frequency bounds in Hz of the 2D spectra parameter. + + An EmptyParameter does not contain any parameter information, and this method returns None. + """ + return None + @classmethod def from_dict(cls, d: dict) -> "ParameterBase": """Create an EmptyParameter object from a dictionary.""" @@ -406,6 +482,13 @@ def wavelength(self) -> None: """ return None + def wavelength_bounds(self) -> None: + r"""Return the optical parameter wavelength bounds in nanometers. + + A regular Parameter does not have optical information, and this method returns None. + """ + return None + def wave_direction(self) -> None: r"""Return the wave direction in degrees of the 2D spectra parameter. @@ -413,6 +496,20 @@ def wave_direction(self) -> None: """ return None + def wave_direction_index(self) -> None: + r"""Return the 0-based index of the wave direction bin. + + A regular Parameter does not have wave spectra information, and this method returns None. + """ + return None + + def wave_direction_bounds(self) -> None: + r"""Return the wave direction bounds in degrees of the 2D spectra parameter. + + A regular Parameter does not have wave spectra information, and this method returns None. + """ + return None + def wave_frequency(self) -> None: r"""Return the wave frequency in Hz of the 2D spectra parameter. @@ -420,6 +517,20 @@ def wave_frequency(self) -> None: """ return None + def wave_frequency_index(self) -> None: + r"""Return the 0-based index of the wave frequency bin. + + A regular Parameter does not have wave spectra information, and this method returns None. + """ + return None + + def wave_frequency_bounds(self) -> None: + r"""Return the wave frequency bounds in Hz of the 2D spectra parameter. + + A regular Parameter does not have wave spectra information, and this method returns None. + """ + return None + def to_dict(self): """Return a dictionary representation of the parameter.""" return { @@ -470,8 +581,13 @@ def set(self, *args, **kwargs): "standard_name", "long_name", "wavelength", + "wavelength_bounds", "wave_direction", + "wave_direction_index", + "wave_direction_bounds", "wave_frequency", + "wave_frequency_index", + "wave_frequency_bounds", ), **kwargs, ) @@ -552,6 +668,8 @@ class OpticalParameter(Parameter): The parameter units, by default None. Can be provided as a string or a Units object. wavelength : int or 2-tuple of ints, optional The optical parameter wavelength in nanometers, or a wavelength range in nanometers, by default None. + wavelength_bounds : 2-tuple of ints, optional + The optical parameter wavelength bounds in nanometers, by default None. """ def __init__( @@ -561,18 +679,25 @@ def __init__( long_name: str = None, units: Union[str, "Units"] = None, wavelength: Union[int, tuple[int, int]] = None, + wavelength_bounds: Optional[tuple[int, int]] = None, ) -> None: Parameter.__init__(self, variable=variable, standard_name=standard_name, long_name=long_name, units=units) self._wavelength = wavelength + self._wavelength_bounds = wavelength_bounds def wavelength(self) -> Optional[Union[int, tuple[int, int]]]: r"""Return the optical parameter wavelength or wavelength interval in nanometers.""" return self._wavelength + def wavelength_bounds(self) -> Optional[tuple[int, int]]: + r"""Return the optical parameter wavelength bounds in nanometers.""" + return self._wavelength_bounds + def to_dict(self): """Return a dictionary representation of the optical parameter.""" d = Parameter.to_dict(self) d["wavelength"] = self._wavelength + d["wavelength_bounds"] = self._wavelength_bounds return d @@ -599,6 +724,8 @@ class ChemicalOpticalParameter(ChemicalParameter, OpticalParameter): The long name of the parameter chemical constituent or aerosol type, by default None. wavelength : int or 2-tuple of ints, optional The optical parameter wavelength in nanometers, or a wavelength range in nanometers, by default None. + wavelength_bounds : 2-tuple of ints, optional + The optical parameter wavelength bounds in nanometers, by default None. """ def __init__( @@ -610,11 +737,13 @@ def __init__( chem: str = None, chem_long_name: str = None, wavelength: Union[int, tuple[int, int]] = None, + wavelength_bounds: Optional[tuple[int, int]] = None, ) -> None: Parameter.__init__(self, variable=variable, standard_name=standard_name, long_name=long_name, units=units) self._chem = chem self._chem_long_name = chem_long_name self._wavelength = wavelength + self._wavelength_bounds = wavelength_bounds def to_dict(self): """Return a dictionary representation of the chemical-optical parameter.""" @@ -622,6 +751,7 @@ def to_dict(self): d["chem"] = self._chem d["chem_long_name"] = self._chem_long_name d["wavelength"] = self._wavelength + d["wavelength_bounds"] = self._wavelength_bounds return d @@ -643,8 +773,16 @@ class WaveSpectraParameter(Parameter): The parameter units, by default None. Can be provided as a string or a Units object. wave_direction : float, optional The wave direction in degrees of the 2D spectra parameter, by default None. + wave_direction_index : int, optional + The 0-based index of the wave direction bin, by default None. + wave_direction_bounds : 2-tuple of floats, optional + The wave direction bounds in degrees of the 2D spectra parameter, by default None. wave_frequency : float, optional The wave frequency in Hz of the 2D spectra parameter, by default None. + wave_frequency_index : int, optional + The 0-based index of the wave frequency bin, by default None. + wave_frequency_bounds : 2-tuple of floats, optional + The wave frequency bounds in Hz of the 2D spectra parameter, by default None. """ def __init__( @@ -654,23 +792,51 @@ def __init__( long_name: str = None, units: Union[str, "Units"] = None, wave_direction: float = None, + wave_direction_index: Optional[int] = None, + wave_direction_bounds: Optional[tuple[float, float]] = None, wave_frequency: float = None, + wave_frequency_index: Optional[int] = None, + wave_frequency_bounds: Optional[tuple[float, float]] = None, ) -> None: Parameter.__init__(self, variable=variable, standard_name=standard_name, long_name=long_name, units=units) self._wave_direction = wave_direction + self._wave_direction_index = wave_direction_index + self._wave_direction_bounds = wave_direction_bounds self._wave_frequency = wave_frequency + self._wave_frequency_index = wave_frequency_index + self._wave_frequency_bounds = wave_frequency_bounds def wave_direction(self) -> Optional[float]: r"""Return the wave direction in degrees of the 2D spectra parameter.""" return self._wave_direction + def wave_direction_index(self) -> Optional[int]: + r"""Return the 0-based index of the wave direction bin.""" + return self._wave_direction_index + + def wave_direction_bounds(self) -> Optional[tuple[float, float]]: + r"""Return the wave direction bounds in degrees of the 2D spectra parameter.""" + return self._wave_direction_bounds + def wave_frequency(self) -> Optional[float]: r"""Return the wave frequency in Hz of the 2D spectra parameter.""" return self._wave_frequency + def wave_frequency_index(self) -> Optional[int]: + r"""Return the 0-based index of the wave frequency bin.""" + return self._wave_frequency_index + + def wave_frequency_bounds(self) -> Optional[tuple[float, float]]: + r"""Return the wave frequency bounds in Hz of the 2D spectra parameter.""" + return self._wave_frequency_bounds + def to_dict(self): """Return a dictionary representation of the wave spectra parameter.""" d = Parameter.to_dict(self) d["wave_direction"] = self._wave_direction + d["wave_direction_index"] = self._wave_direction_index + d["wave_direction_bounds"] = self._wave_direction_bounds d["wave_frequency"] = self._wave_frequency + d["wave_frequency_index"] = self._wave_frequency_index + d["wave_frequency_bounds"] = self._wave_frequency_bounds return d diff --git a/src/earthkit/data/field/grib/parameter.py b/src/earthkit/data/field/grib/parameter.py index e73d1ac8c..a81228e4f 100644 --- a/src/earthkit/data/field/grib/parameter.py +++ b/src/earthkit/data/field/grib/parameter.py @@ -7,6 +7,9 @@ # nor does it submit to any jurisdiction. # +from earthkit.data.field.component.parameter import create_parameter +from earthkit.data.field.handler.parameter import ParameterFieldComponentHandler + from .collector import GribContextCollector from .core import GribFieldComponentHandler @@ -25,9 +28,6 @@ class GribParameterBuilder: @staticmethod def build(handle): - from earthkit.data.field.component.parameter import create_parameter - from earthkit.data.field.handler.parameter import ParameterFieldComponentHandler - d = GribParameterBuilder._build_dict(handle) component = create_parameter(d) handler = ParameterFieldComponentHandler.from_component(component) @@ -38,6 +38,7 @@ def _build_dict(handle): def _get(key, default=None): return handle.get(key, default=default) + # Core metadata keys for identifying the parameter v = _get("shortName", None) if v == "~": v = handle.get("paramId", ktype=str, default=None) @@ -49,15 +50,32 @@ def _get(key, default=None): units = _get("units", None) - chem_name = _get("parameter.chemShortName", None) - # using "parameter.chemShortName" instead of "chemShortName" avoids getting "unknown" if this key is not defined - # cf. https://github.com/ecmwf/eccodes/blob/eac2eb507b5b44fcc3d3c58e382efde3a274b1c4/definitions/grib2/parameters.def#L29 + d = dict( + variable=variable, + standard_name=standard_name, + long_name=long_name, + units=units, + ) + + # Metadata for chemical parameters + if _get("chemId", None) is not None: + # "chemId" is defined for chemical parameters + chem = _get("parameter.chemShortName", None) + # using "parameter.chemShortName" instead of "chemShortName" + # avoids getting "unknown" if this key is not defined + # cf. https://github.com/ecmwf/eccodes/blob/eac2eb507b5b44fcc3d3c58e38/definitions/grib2/parameters.def#L29 - chem_long_name = _get("chemName", None) - if chem_long_name == "unknown": - chem_long_name = None + chem_long_name = _get("chemName", None) + if chem_long_name == "unknown": + chem_long_name = None + d["chem"] = chem + d["chem_long_name"] = chem_long_name + + # Metadata for optical parameters _wavelength = _get("mars.wavelength", None) + wavelength = None + wavelength_bounds = None # The logic below follows the "mars.wavelength" key definition: # https://github.com/ecmwf/eccodes/blob/develop/definitions/mars/mars.wavelength.def if isinstance(_wavelength, (int, float)): @@ -66,11 +84,14 @@ def _get(key, default=None): # expected format is "-" try: wlen1, wlen2 = _wavelength.split("-") - wavelength = round(float(wlen1)), round(float(wlen2)) + wavelength_bounds = round(float(wlen1)), round(float(wlen2)) + wavelength = round((wavelength_bounds[1] + wavelength_bounds[0]) / 2) except Exception: - wavelength = None - else: - wavelength = None + pass + + if wavelength is not None: + d["wavelength"] = wavelength + d["wavelength_bounds"] = wavelength_bounds _grib_edition = _get("edition", None) @@ -81,35 +102,70 @@ def _scale_value(v, scaling_factor): return float(v * 10 ** (-scaling_factor)) raise ValueError(f"Unsupported GRIB edition: {_grib_edition}") - # Wave direction + # 2D wave spectra: direction try: - scaled_directions = _get("scaledDirections", None) direction_number = _get("directionNumber", None) - direction_scaling_factor = _get("directionScalingFactor", None) - wave_direction = _scale_value(scaled_directions[direction_number - 1], direction_scaling_factor) + if direction_number is not None: + direction_index = direction_number - 1 # convert to 0-based index + number_of_directions = _get("numberOfDirections", None) + direction_scaling_factor = _get("directionScalingFactor", None) + scaled_directions = _get("scaledDirections", None) + wave_direction = _scale_value(scaled_directions[direction_index], direction_scaling_factor) + + d["wave_direction"] = wave_direction + d["wave_direction_index"] = direction_index + + # wave direction bounds + if number_of_directions > 1: + if direction_index > 0: + prev_wave_direction = _scale_value( + scaled_directions[direction_index - 1], direction_scaling_factor + ) + delta = (wave_direction - prev_wave_direction) / 2 + else: + next_wave_direction = _scale_value( + scaled_directions[direction_index + 1], direction_scaling_factor + ) + delta = (next_wave_direction - wave_direction) / 2 + d["wave_direction_bounds"] = (wave_direction - delta, wave_direction + delta) + else: + d["wave_direction_bounds"] = None except Exception: - wave_direction = None + pass - # Wave frequency + # 2D wave spectra: frequency try: - scaled_frequencies = _get("scaledFrequencies", None) frequency_number = _get("frequencyNumber", None) - frequency_scaling_factor = _get("frequencyScalingFactor", None) - wave_frequency = _scale_value(scaled_frequencies[frequency_number - 1], frequency_scaling_factor) + if frequency_number is not None: + frequency_index = frequency_number - 1 # convert to 0-based index + number_of_frequencies = _get("numberOfFrequencies", None) + frequency_scaling_factor = _get("frequencyScalingFactor", None) + scaled_frequencies = _get("scaledFrequencies", None) + wave_frequency = _scale_value(scaled_frequencies[frequency_index], frequency_scaling_factor) + + d["wave_frequency"] = wave_frequency + d["wave_frequency_index"] = frequency_index + + # wave frequency bounds: frequencies are equally spaced on the log scale + if number_of_frequencies > 1: + if frequency_index > 0: + prev_wave_frequency = _scale_value( + scaled_frequencies[frequency_index - 1], frequency_scaling_factor + ) + factor = (wave_frequency / prev_wave_frequency) ** 0.5 + else: + next_wave_frequency = _scale_value( + scaled_frequencies[frequency_index + 1], frequency_scaling_factor + ) + factor = (next_wave_frequency / wave_frequency) ** 0.5 + d["wave_frequency_bounds"] = (round(wave_frequency / factor, 6), round(wave_frequency * factor, 6)) + else: + d["wave_frequency_bounds"] = None + except Exception: - wave_frequency = None + pass - return dict( - variable=variable, - standard_name=standard_name, - long_name=long_name, - units=units, - chem=chem_name, - chem_long_name=chem_long_name, - wavelength=wavelength, - wave_direction=wave_direction, - wave_frequency=wave_frequency, - ) + return d class GribParameterContextCollector(GribContextCollector): From b271c5dc996cc371ae7ea97ac5c23dd2e1cb29e0 Mon Sep 17 00:00:00 2001 From: Pawel Wolff Date: Wed, 10 Jun 2026 00:40:39 +0200 Subject: [PATCH 14/21] Docstrings for set() and from_dict() for parameter component updated with 5 new metadata keys --- src/earthkit/data/field/component/parameter.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/earthkit/data/field/component/parameter.py b/src/earthkit/data/field/component/parameter.py index 5bc1aae47..824c51f36 100644 --- a/src/earthkit/data/field/component/parameter.py +++ b/src/earthkit/data/field/component/parameter.py @@ -213,8 +213,13 @@ def from_dict(cls, d: dict) -> "ParameterBase": - "chem_long_name": The long name of the chemical constituent or aerosol type of the parameter. - "wavelength": The optical parameter wavelength in nanometers, or a wavelength range in nanometers, as an int or a 2-tuple of ints. + - "wavelength_bounds": The optical parameter wavelength bounds in nanometers, as a 2-tuple of ints. - "wave_direction": The wave direction in degrees of the 2D spectra parameter. + - "wave_direction_index": The 0-based index of the wave direction bin. + - "wave_direction_bounds": The wave direction bounds in degrees, as a 2-tuple of floats. - "wave_frequency": The wave frequency in Hz of the 2D spectra parameter. + - "wave_frequency_index": The 0-based index of the wave frequency bin. + - "wave_frequency_bounds": The wave frequency bounds in Hz, as a 2-tuple of floats. Returns ------- @@ -568,8 +573,13 @@ def set(self, *args, **kwargs): - "chem": The chemical constituent or aerosol type of the parameter. - "chem_long_name": The long name of the chemical constituent or aerosol type of the parameter. - "wavelength": The optical parameter wavelength in nanometers, or a wavelength range in nanometers. + - "wavelength_bounds": The optical parameter wavelength bounds in nanometers, as a 2-tuple of ints. - "wave_direction": The wave direction in degrees of the 2D spectra parameter. + - "wave_direction_index": The 0-based index of the wave direction bin. + - "wave_direction_bounds": The wave direction bounds in degrees, as a 2-tuple of floats. - "wave_frequency": The wave frequency in Hz of the 2D spectra parameter. + - "wave_frequency_index": The 0-based index of the wave frequency bin. + - "wave_frequency_bounds": The wave frequency bounds in Hz, as a 2-tuple of floats. """ d = self._normalise_set_kwargs( *args, From cef88559709f5dbd370880701d4dfedccfad194c Mon Sep 17 00:00:00 2001 From: Pawel Wolff Date: Wed, 10 Jun 2026 01:45:15 +0200 Subject: [PATCH 15/21] The keys added: wavelength_units, wave_direction_units, wave_frequency_units Tests updated --- .../data/field/component/parameter.py | 145 +++++++++-- src/earthkit/data/field/grib/parameter.py | 9 + tests/field/test_parameter_component.py | 233 +++++++++++++++++- 3 files changed, 361 insertions(+), 26 deletions(-) diff --git a/src/earthkit/data/field/component/parameter.py b/src/earthkit/data/field/component/parameter.py index 824c51f36..4c6273fe3 100644 --- a/src/earthkit/data/field/component/parameter.py +++ b/src/earthkit/data/field/component/parameter.py @@ -40,16 +40,18 @@ class ParameterBase(SimpleFieldComponent): - "units": as a string or a :class:`Units` object representing the parameter units - "chem": string representing the parameter chemical constituent or aerosol type, or None - "chem_long_name": string representing the long name of the parameter chemical constituent or aerosol type, or None - - "wavelength": int representing the optical parameter wavelength in nanometers, - or a 2-tuple of ints representing the wavelength range in nanometers, or None + - "wavelength": int representing the optical parameter wavelength in nanometers, or None - "wavelength_bounds": 2-tuple of ints representing the optical parameter wavelength bounds in nanometers, or None + - "wavelength_units": :class:`Units` object representing the wavelength units (e.g. nanometers), or None - "wave_direction": float representing the wave direction in degrees of the 2D spectra parameter, or None - "wave_direction_index": int representing the 0-based index of the wave direction bin, or None - "wave_direction_bounds": 2-tuple of floats representing the wave direction bounds in degrees, or None + - "wave_direction_units": :class:`Units` object representing the wave direction units (e.g. degrees), or None - "wave_frequency": float representing the wave frequency in Hz of the 2D spectra parameter, or None - "wave_frequency_index": int representing the 0-based index of the wave frequency bin, or None - "wave_frequency_bounds": 2-tuple of floats representing the wave frequency bounds in Hz, or None + - "wave_frequency_units": :class:`Units` object representing the wave frequency units (e.g. 1/s), or None - "param": alias of "variable" Depending on the type of parameter information available, some of these keys may not be supported @@ -146,8 +148,8 @@ def long_name(self) -> Optional[str]: @mark_get_key @abstractmethod - def wavelength(self) -> Optional[Union[int, tuple[int, int]]]: - """Return the optical parameter wavelength or wavelength interval in nanometers.""" + def wavelength(self) -> Optional[int]: + """Return the optical parameter wavelength in nanometers.""" pass @mark_get_key @@ -156,6 +158,12 @@ def wavelength_bounds(self) -> Optional[tuple[int, int]]: """Return the optical parameter wavelength bounds in nanometers.""" pass + @mark_get_key + @abstractmethod + def wavelength_units(self) -> Optional["Units"]: + """Return the units of the optical parameter wavelength.""" + pass + @mark_get_key @abstractmethod def wave_direction(self) -> Optional[float]: @@ -174,6 +182,12 @@ def wave_direction_bounds(self) -> Optional[tuple[float, float]]: """Return the wave direction bounds in degrees of the 2D spectra parameter.""" pass + @mark_get_key + @abstractmethod + def wave_direction_units(self) -> Optional["Units"]: + """Return the units of the wave direction.""" + pass + @mark_get_key @abstractmethod def wave_frequency(self) -> Optional[float]: @@ -192,6 +206,12 @@ def wave_frequency_bounds(self) -> Optional[tuple[float, float]]: """Return the wave frequency bounds in Hz of the 2D spectra parameter.""" pass + @mark_get_key + @abstractmethod + def wave_frequency_units(self) -> Optional["Units"]: + """Return the units of the wave frequency.""" + pass + @classmethod def from_dict(cls, d: dict) -> "ParameterBase": """Create a parameter object from a dictionary. @@ -211,15 +231,17 @@ def from_dict(cls, d: dict) -> "ParameterBase": - "units": The parameter units, as a string or a Units object. - "chem": The chemical constituent or aerosol type of the parameter. - "chem_long_name": The long name of the chemical constituent or aerosol type of the parameter. - - "wavelength": The optical parameter wavelength in nanometers, or a wavelength range in nanometers, - as an int or a 2-tuple of ints. + - "wavelength": The optical parameter wavelength in nanometers as an int. - "wavelength_bounds": The optical parameter wavelength bounds in nanometers, as a 2-tuple of ints. + - "wavelength_units": The wavelength units, as a string or a Units object. - "wave_direction": The wave direction in degrees of the 2D spectra parameter. - "wave_direction_index": The 0-based index of the wave direction bin. - "wave_direction_bounds": The wave direction bounds in degrees, as a 2-tuple of floats. + - "wave_direction_units": The wave direction units, as a string or a Units object. - "wave_frequency": The wave frequency in Hz of the 2D spectra parameter. - "wave_frequency_index": The 0-based index of the wave frequency bin. - "wave_frequency_bounds": The wave frequency bounds in Hz, as a 2-tuple of floats. + - "wave_frequency_units": The wave frequency units, as a string or a Units object. Returns ------- @@ -266,12 +288,15 @@ def create_parameter(d: dict) -> ParameterBase: "long_name", "wavelength", "wavelength_bounds", + "wavelength_units", "wave_direction", "wave_direction_index", "wave_direction_bounds", + "wave_direction_units", "wave_frequency", "wave_frequency_index", "wave_frequency_bounds", + "wave_frequency_units", ), ) if "variable" not in d1: @@ -341,7 +366,7 @@ def chem_long_name(self) -> None: return None def wavelength(self) -> None: - r"""Return the optical parameter wavelength or wavelength interval in nanometers. + r"""Return the optical parameter wavelength in nanometers. An EmptyParameter does not contain any parameter information, and this method returns None. """ @@ -354,6 +379,13 @@ def wavelength_bounds(self) -> None: """ return None + def wavelength_units(self) -> None: + r"""Return the units of the optical parameter wavelength. + + An EmptyParameter does not contain any parameter information, and this method returns None. + """ + return None + def wave_direction(self) -> None: r"""Return the wave direction in degrees of the 2D spectra parameter. @@ -375,6 +407,13 @@ def wave_direction_bounds(self) -> None: """ return None + def wave_direction_units(self) -> None: + r"""Return the units of the wave direction. + + An EmptyParameter does not contain any parameter information, and this method returns None. + """ + return None + def wave_frequency(self) -> None: r"""Return the wave frequency in Hz of the 2D spectra parameter. @@ -396,6 +435,13 @@ def wave_frequency_bounds(self) -> None: """ return None + def wave_frequency_units(self) -> None: + r"""Return the units of the wave frequency. + + An EmptyParameter does not contain any parameter information, and this method returns None. + """ + return None + @classmethod def from_dict(cls, d: dict) -> "ParameterBase": """Create an EmptyParameter object from a dictionary.""" @@ -481,7 +527,7 @@ def chem_long_name(self) -> None: return None def wavelength(self) -> None: - r"""Return the optical parameter wavelength or wavelength interval in nanometers. + r"""Return the optical parameter wavelength in nanometers. A regular Parameter does not have optical information, and this method returns None. """ @@ -494,6 +540,13 @@ def wavelength_bounds(self) -> None: """ return None + def wavelength_units(self) -> None: + r"""Return the units of the optical parameter wavelength. + + A regular Parameter does not have optical information, and this method returns None. + """ + return None + def wave_direction(self) -> None: r"""Return the wave direction in degrees of the 2D spectra parameter. @@ -515,6 +568,13 @@ def wave_direction_bounds(self) -> None: """ return None + def wave_direction_units(self) -> None: + r"""Return the units of the wave direction. + + A regular Parameter does not have wave spectra information, and this method returns None. + """ + return None + def wave_frequency(self) -> None: r"""Return the wave frequency in Hz of the 2D spectra parameter. @@ -536,6 +596,13 @@ def wave_frequency_bounds(self) -> None: """ return None + def wave_frequency_units(self) -> None: + r"""Return the units of the wave frequency. + + A regular Parameter does not have wave spectra information, and this method returns None. + """ + return None + def to_dict(self): """Return a dictionary representation of the parameter.""" return { @@ -572,14 +639,17 @@ def set(self, *args, **kwargs): - "long_name": The long name of the parameter variable. - "chem": The chemical constituent or aerosol type of the parameter. - "chem_long_name": The long name of the chemical constituent or aerosol type of the parameter. - - "wavelength": The optical parameter wavelength in nanometers, or a wavelength range in nanometers. + - "wavelength": The optical parameter wavelength in nanometers. - "wavelength_bounds": The optical parameter wavelength bounds in nanometers, as a 2-tuple of ints. + - "wavelength_units": The wavelength units, as a string or a Units object. - "wave_direction": The wave direction in degrees of the 2D spectra parameter. - "wave_direction_index": The 0-based index of the wave direction bin. - "wave_direction_bounds": The wave direction bounds in degrees, as a 2-tuple of floats. + - "wave_direction_units": The wave direction units, as a string or a Units object. - "wave_frequency": The wave frequency in Hz of the 2D spectra parameter. - "wave_frequency_index": The 0-based index of the wave frequency bin. - "wave_frequency_bounds": The wave frequency bounds in Hz, as a 2-tuple of floats. + - "wave_frequency_units": The wave frequency units, as a string or a Units object. """ d = self._normalise_set_kwargs( *args, @@ -592,12 +662,15 @@ def set(self, *args, **kwargs): "long_name", "wavelength", "wavelength_bounds", + "wavelength_units", "wave_direction", "wave_direction_index", "wave_direction_bounds", + "wave_direction_units", "wave_frequency", "wave_frequency_index", "wave_frequency_bounds", + "wave_frequency_units", ), **kwargs, ) @@ -662,8 +735,8 @@ def to_dict(self): class OpticalParameter(Parameter): """Parameter component representing an optical parameter. - An optical parameter includes a wavelength or wavelength range but no chemical - constituent. For parameters that have both chemical and optical properties, use + An optical parameter includes a wavelength but no chemical constituent. + For parameters that have both chemical and optical properties, use :class:`ChemicalOpticalParameter`. Parameters @@ -676,10 +749,12 @@ class OpticalParameter(Parameter): The long name of the parameter variable, by default None. units : str or Units, optional The parameter units, by default None. Can be provided as a string or a Units object. - wavelength : int or 2-tuple of ints, optional - The optical parameter wavelength in nanometers, or a wavelength range in nanometers, by default None. + wavelength : int, optional + The optical parameter wavelength in nanometers, by default None. wavelength_bounds : 2-tuple of ints, optional The optical parameter wavelength bounds in nanometers, by default None. + wavelength_units : str or Units, optional + The wavelength units, by default None. Can be provided as a string or a Units object. """ def __init__( @@ -688,26 +763,33 @@ def __init__( standard_name: str = None, long_name: str = None, units: Union[str, "Units"] = None, - wavelength: Union[int, tuple[int, int]] = None, + wavelength: Optional[int] = None, wavelength_bounds: Optional[tuple[int, int]] = None, + wavelength_units: Union[str, "Units"] = None, ) -> None: Parameter.__init__(self, variable=variable, standard_name=standard_name, long_name=long_name, units=units) self._wavelength = wavelength self._wavelength_bounds = wavelength_bounds + self._wavelength_units = Units.from_any(wavelength_units) - def wavelength(self) -> Optional[Union[int, tuple[int, int]]]: - r"""Return the optical parameter wavelength or wavelength interval in nanometers.""" + def wavelength(self) -> Optional[int]: + r"""Return the optical parameter wavelength in nanometers.""" return self._wavelength def wavelength_bounds(self) -> Optional[tuple[int, int]]: r"""Return the optical parameter wavelength bounds in nanometers.""" return self._wavelength_bounds + def wavelength_units(self) -> Optional["Units"]: + r"""Return the units of the optical parameter wavelength.""" + return self._wavelength_units + def to_dict(self): """Return a dictionary representation of the optical parameter.""" d = Parameter.to_dict(self) d["wavelength"] = self._wavelength d["wavelength_bounds"] = self._wavelength_bounds + d["wavelength_units"] = str(self._wavelength_units) return d @@ -715,7 +797,7 @@ class ChemicalOpticalParameter(ChemicalParameter, OpticalParameter): """Parameter component representing a chemical-optical parameter. A chemical-optical parameter includes both a chemical constituent or aerosol type - and an optical wavelength or wavelength range. It inherits chemical properties from + and an optical wavelength. It inherits chemical properties from :class:`ChemicalParameter` and optical properties from :class:`OpticalParameter`. Parameters @@ -732,10 +814,12 @@ class ChemicalOpticalParameter(ChemicalParameter, OpticalParameter): The parameter chemical constituent or aerosol type, by default None. chem_long_name : str, optional The long name of the parameter chemical constituent or aerosol type, by default None. - wavelength : int or 2-tuple of ints, optional - The optical parameter wavelength in nanometers, or a wavelength range in nanometers, by default None. + wavelength : int, optional + The optical parameter wavelength in nanometers, by default None. wavelength_bounds : 2-tuple of ints, optional The optical parameter wavelength bounds in nanometers, by default None. + wavelength_units : str or Units, optional + The wavelength units, by default None. Can be provided as a string or a Units object. """ def __init__( @@ -746,14 +830,16 @@ def __init__( units: Union[str, "Units"] = None, chem: str = None, chem_long_name: str = None, - wavelength: Union[int, tuple[int, int]] = None, + wavelength: Optional[int] = None, wavelength_bounds: Optional[tuple[int, int]] = None, + wavelength_units: Union[str, "Units"] = None, ) -> None: Parameter.__init__(self, variable=variable, standard_name=standard_name, long_name=long_name, units=units) self._chem = chem self._chem_long_name = chem_long_name self._wavelength = wavelength self._wavelength_bounds = wavelength_bounds + self._wavelength_units = Units.from_any(wavelength_units) def to_dict(self): """Return a dictionary representation of the chemical-optical parameter.""" @@ -762,6 +848,7 @@ def to_dict(self): d["chem_long_name"] = self._chem_long_name d["wavelength"] = self._wavelength d["wavelength_bounds"] = self._wavelength_bounds + d["wavelength_units"] = str(self._wavelength_units) return d @@ -787,12 +874,16 @@ class WaveSpectraParameter(Parameter): The 0-based index of the wave direction bin, by default None. wave_direction_bounds : 2-tuple of floats, optional The wave direction bounds in degrees of the 2D spectra parameter, by default None. + wave_direction_units : str or Units, optional + The wave direction units, by default None. Can be provided as a string or a Units object. wave_frequency : float, optional The wave frequency in Hz of the 2D spectra parameter, by default None. wave_frequency_index : int, optional The 0-based index of the wave frequency bin, by default None. wave_frequency_bounds : 2-tuple of floats, optional The wave frequency bounds in Hz of the 2D spectra parameter, by default None. + wave_frequency_units : str or Units, optional + The wave frequency units, by default None. Can be provided as a string or a Units object. """ def __init__( @@ -804,17 +895,21 @@ def __init__( wave_direction: float = None, wave_direction_index: Optional[int] = None, wave_direction_bounds: Optional[tuple[float, float]] = None, + wave_direction_units: Union[str, "Units"] = None, wave_frequency: float = None, wave_frequency_index: Optional[int] = None, wave_frequency_bounds: Optional[tuple[float, float]] = None, + wave_frequency_units: Union[str, "Units"] = None, ) -> None: Parameter.__init__(self, variable=variable, standard_name=standard_name, long_name=long_name, units=units) self._wave_direction = wave_direction self._wave_direction_index = wave_direction_index self._wave_direction_bounds = wave_direction_bounds + self._wave_direction_units = Units.from_any(wave_direction_units) self._wave_frequency = wave_frequency self._wave_frequency_index = wave_frequency_index self._wave_frequency_bounds = wave_frequency_bounds + self._wave_frequency_units = Units.from_any(wave_frequency_units) def wave_direction(self) -> Optional[float]: r"""Return the wave direction in degrees of the 2D spectra parameter.""" @@ -828,6 +923,10 @@ def wave_direction_bounds(self) -> Optional[tuple[float, float]]: r"""Return the wave direction bounds in degrees of the 2D spectra parameter.""" return self._wave_direction_bounds + def wave_direction_units(self) -> Optional["Units"]: + r"""Return the units of the wave direction.""" + return self._wave_direction_units + def wave_frequency(self) -> Optional[float]: r"""Return the wave frequency in Hz of the 2D spectra parameter.""" return self._wave_frequency @@ -840,13 +939,19 @@ def wave_frequency_bounds(self) -> Optional[tuple[float, float]]: r"""Return the wave frequency bounds in Hz of the 2D spectra parameter.""" return self._wave_frequency_bounds + def wave_frequency_units(self) -> Optional["Units"]: + r"""Return the units of the wave frequency.""" + return self._wave_frequency_units + def to_dict(self): """Return a dictionary representation of the wave spectra parameter.""" d = Parameter.to_dict(self) d["wave_direction"] = self._wave_direction d["wave_direction_index"] = self._wave_direction_index d["wave_direction_bounds"] = self._wave_direction_bounds + d["wave_direction_units"] = str(self._wave_direction_units) d["wave_frequency"] = self._wave_frequency d["wave_frequency_index"] = self._wave_frequency_index d["wave_frequency_bounds"] = self._wave_frequency_bounds + d["wave_frequency_units"] = str(self._wave_frequency_units) return d diff --git a/src/earthkit/data/field/grib/parameter.py b/src/earthkit/data/field/grib/parameter.py index a81228e4f..e69d5a633 100644 --- a/src/earthkit/data/field/grib/parameter.py +++ b/src/earthkit/data/field/grib/parameter.py @@ -92,6 +92,7 @@ def _get(key, default=None): if wavelength is not None: d["wavelength"] = wavelength d["wavelength_bounds"] = wavelength_bounds + d["wavelength_units"] = "nm" _grib_edition = _get("edition", None) @@ -114,6 +115,7 @@ def _scale_value(v, scaling_factor): d["wave_direction"] = wave_direction d["wave_direction_index"] = direction_index + d["wave_direction_units"] = "degree" # wave direction bounds if number_of_directions > 1: @@ -145,6 +147,7 @@ def _scale_value(v, scaling_factor): d["wave_frequency"] = wave_frequency d["wave_frequency_index"] = frequency_index + d["wave_frequency_units"] = "s ** -1" # wave frequency bounds: frequencies are equally spaced on the log scale if number_of_frequencies > 1: @@ -169,6 +172,12 @@ def _scale_value(v, scaling_factor): class GribParameterContextCollector(GribContextCollector): + """Collector for extracting GRIB context keys from parameter components. + + Collects the "shortName" key from the parameter component's variable for use + in GRIB encoding context. + """ + @staticmethod def collect_keys(handler, context): component = handler.component diff --git a/tests/field/test_parameter_component.py b/tests/field/test_parameter_component.py index 4e071c111..977b56156 100644 --- a/tests/field/test_parameter_component.py +++ b/tests/field/test_parameter_component.py @@ -31,8 +31,16 @@ def test_parameter_component_alias_1(): assert r.chem() is None assert r.chem_long_name() is None assert r.wavelength() is None + assert r.wavelength_bounds() is None + assert r.wavelength_units() is None assert r.wave_direction() is None + assert r.wave_direction_index() is None + assert r.wave_direction_bounds() is None + assert r.wave_direction_units() is None assert r.wave_frequency() is None + assert r.wave_frequency_index() is None + assert r.wave_frequency_bounds() is None + assert r.wave_frequency_units() is None @pytest.mark.parametrize( @@ -61,8 +69,16 @@ def test_parameter_component_alias_1(): "chem": "aer_total", "chem_long_name": "Total aerosol", "wavelength": 550, + "wavelength_bounds": None, + "wavelength_units": "nm", "wave_direction": None, + "wave_direction_index": None, + "wave_direction_bounds": None, + "wave_direction_units": None, "wave_frequency": None, + "wave_frequency_index": None, + "wave_frequency_bounds": None, + "wave_frequency_units": None, }, { "variable": "aod", @@ -73,8 +89,16 @@ def test_parameter_component_alias_1(): "chem": "aer_total", "chem_long_name": "Total aerosol", "wavelength": 550, + "wavelength_bounds": None, + "wavelength_units": "nm", "wave_direction": None, + "wave_direction_index": None, + "wave_direction_bounds": None, + "wave_direction_units": None, "wave_frequency": None, + "wave_frequency_index": None, + "wave_frequency_bounds": None, + "wave_frequency_units": None, }, ), ], @@ -96,8 +120,25 @@ def test_parameter_component_from_dict_ok(input_d, ref): assert r.chem() == ref.get("chem", None) assert r.chem_long_name() == ref.get("chem_long_name", None) assert r.wavelength() == ref.get("wavelength", None) + assert r.wavelength_bounds() == ref.get("wavelength_bounds", None) + if ref.get("wavelength_units") is not None: + assert r.wavelength_units() == ref["wavelength_units"] + else: + assert r.wavelength_units() is None assert r.wave_direction() == ref.get("wave_direction", None) + assert r.wave_direction_index() == ref.get("wave_direction_index", None) + assert r.wave_direction_bounds() == ref.get("wave_direction_bounds", None) + if ref.get("wave_direction_units") is not None: + assert r.wave_direction_units() == ref["wave_direction_units"] + else: + assert r.wave_direction_units() is None assert r.wave_frequency() == ref.get("wave_frequency", None) + assert r.wave_frequency_index() == ref.get("wave_frequency_index", None) + assert r.wave_frequency_bounds() == ref.get("wave_frequency_bounds", None) + if ref.get("wave_frequency_units") is not None: + assert r.wave_frequency_units() == ref["wave_frequency_units"] + else: + assert r.wave_frequency_units() is None @pytest.mark.parametrize( @@ -123,8 +164,16 @@ def test_parameter_component_from_dict_ok(input_d, ref): "chem": None, "chem_long_name": None, "wavelength": None, + "wavelength_bounds": None, + "wavelength_units": None, "wave_direction": None, + "wave_direction_index": None, + "wave_direction_bounds": None, + "wave_direction_units": None, "wave_frequency": None, + "wave_frequency_index": None, + "wave_frequency_bounds": None, + "wave_frequency_units": None, }, ), ( @@ -137,6 +186,8 @@ def test_parameter_component_from_dict_ok(input_d, ref): "chem": "aer_total", "chem_long_name": "Total aerosol", "wavelength": 550, + "wavelength_bounds": (400, 700), + "wavelength_units": "nm", } ], { @@ -148,8 +199,16 @@ def test_parameter_component_from_dict_ok(input_d, ref): "chem": "aer_total", "chem_long_name": "Total aerosol", "wavelength": 550, + "wavelength_bounds": (400, 700), + "wavelength_units": "nm", "wave_direction": None, + "wave_direction_index": None, + "wave_direction_bounds": None, + "wave_direction_units": None, "wave_frequency": None, + "wave_frequency_index": None, + "wave_frequency_bounds": None, + "wave_frequency_units": None, }, ), ( @@ -160,7 +219,13 @@ def test_parameter_component_from_dict_ok(input_d, ref): "standard_name": "unknown", "long_name": "2D wave spectra (single)", "wave_direction": 5.0, + "wave_direction_index": 0, + "wave_direction_bounds": (0.0, 7.5), + "wave_direction_units": "degrees", "wave_frequency": 0.034523, + "wave_frequency_index": 1, + "wave_frequency_bounds": (0.03, 0.04), + "wave_frequency_units": "s-1", } ], { @@ -172,8 +237,16 @@ def test_parameter_component_from_dict_ok(input_d, ref): "chem": None, "chem_long_name": None, "wavelength": None, + "wavelength_bounds": None, + "wavelength_units": None, "wave_direction": 5.0, + "wave_direction_index": 0, + "wave_direction_bounds": (0.0, 7.5), + "wave_direction_units": "degrees", "wave_frequency": 0.034523, + "wave_frequency_index": 1, + "wave_frequency_bounds": (0.03, 0.04), + "wave_frequency_units": "s-1", }, ), ], @@ -190,7 +263,10 @@ def test_parameter_component_set(input_d, ref): for k, v in ref.items(): rv = getattr(r1, k)() - assert rv == v, f"key {k} expected {v} got {rv}" + if v is None: + assert rv is None, f"key {k} expected None got {rv}" + else: + assert rv == v, f"key {k} expected {v} got {rv}" # the original object is unchanged assert r.variable() == "p" @@ -211,8 +287,16 @@ def test_parameter_component_create_parameter_regular(): assert p.units() == "K" assert p.chem() is None assert p.wavelength() is None + assert p.wavelength_bounds() is None + assert p.wavelength_units() is None assert p.wave_direction() is None + assert p.wave_direction_index() is None + assert p.wave_direction_bounds() is None + assert p.wave_direction_units() is None assert p.wave_frequency() is None + assert p.wave_frequency_index() is None + assert p.wave_frequency_bounds() is None + assert p.wave_frequency_units() is None def test_parameter_component_create_parameter_chemical(): @@ -223,16 +307,40 @@ def test_parameter_component_create_parameter_chemical(): assert p.chem() == "carbon_monoxide" assert p.chem_long_name() == "CO" assert p.wavelength() is None + assert p.wavelength_bounds() is None + assert p.wavelength_units() is None assert p.wave_direction() is None + assert p.wave_direction_units() is None + assert p.wave_frequency_units() is None def test_parameter_component_create_parameter_optical(): """Test create_parameter returns an OpticalParameter for optical parameters.""" - p = create_parameter({"variable": "aod", "units": "Numeric", "wavelength": 550}) + p = create_parameter({"variable": "aod", "units": "Numeric", "wavelength": 550, "wavelength_units": "nm"}) assert isinstance(p, OpticalParameter) assert p.variable() == "aod" assert p.wavelength() == 550 + assert p.wavelength_bounds() is None + assert p.wavelength_units() == "nm" assert p.chem() is None + assert p.wave_direction() is None + assert p.wave_direction_units() is None + assert p.wave_frequency_units() is None + + +def test_parameter_component_create_parameter_optical_with_bounds(): + """Test create_parameter returns an OpticalParameter with wavelength_bounds.""" + p = create_parameter({ + "variable": "aod", + "units": "Numeric", + "wavelength": 550, + "wavelength_bounds": (400, 700), + "wavelength_units": "nm", + }) + assert isinstance(p, OpticalParameter) + assert p.wavelength() == 550 + assert p.wavelength_bounds() == (400, 700) + assert p.wavelength_units() == "nm" def test_parameter_component_create_parameter_chemical_optical(): @@ -243,13 +351,19 @@ def test_parameter_component_create_parameter_chemical_optical(): "chem": "aer_total", "chem_long_name": "Total aerosol", "wavelength": 550, + "wavelength_bounds": (400, 700), + "wavelength_units": "nm", }) assert isinstance(p, ChemicalOpticalParameter) assert p.variable() == "aod" assert p.chem() == "aer_total" assert p.chem_long_name() == "Total aerosol" assert p.wavelength() == 550 + assert p.wavelength_bounds() == (400, 700) + assert p.wavelength_units() == "nm" assert p.wave_direction() is None + assert p.wave_direction_units() is None + assert p.wave_frequency_units() is None def test_parameter_component_create_parameter_wave_spectra(): @@ -258,14 +372,27 @@ def test_parameter_component_create_parameter_wave_spectra(): "variable": "2dfd", "units": "m**2 s / rad", "wave_direction": 5.0, + "wave_direction_index": 0, + "wave_direction_bounds": (0.0, 7.5), + "wave_direction_units": "degrees", "wave_frequency": 0.034523, + "wave_frequency_index": 1, + "wave_frequency_bounds": (0.03, 0.04), + "wave_frequency_units": "s-1", }) assert isinstance(p, WaveSpectraParameter) assert p.variable() == "2dfd" assert p.wave_direction() == 5.0 + assert p.wave_direction_index() == 0 + assert p.wave_direction_bounds() == (0.0, 7.5) + assert p.wave_direction_units() == "degrees" assert p.wave_frequency() == 0.034523 + assert p.wave_frequency_index() == 1 + assert p.wave_frequency_bounds() == (0.03, 0.04) + assert p.wave_frequency_units() == "s-1" assert p.chem() is None assert p.wavelength() is None + assert p.wavelength_units() is None def test_parameter_component_set_changes_type(): @@ -279,18 +406,24 @@ def test_parameter_component_set_changes_type(): assert p2.chem() == "carbon_monoxide" # Add wavelength to chem -> becomes ChemicalOpticalParameter - p3 = p2.set(wavelength=550) + p3 = p2.set(wavelength=550, wavelength_units="nm") assert isinstance(p3, ChemicalOpticalParameter) assert p3.chem() == "carbon_monoxide" assert p3.wavelength() == 550 + assert p3.wavelength_units() == "nm" def test_parameter_component_inheritance(): """Test that subclasses have the correct inheritance relationships.""" cp = ChemicalParameter(variable="co", chem="co") - op = OpticalParameter(variable="aod", wavelength=550) - cop = ChemicalOpticalParameter(variable="aod", chem="aer", wavelength=550) - wp = WaveSpectraParameter(variable="2dfd", wave_direction=5.0) + op = OpticalParameter(variable="aod", wavelength=550, wavelength_units="nm") + cop = ChemicalOpticalParameter(variable="aod", chem="aer", wavelength=550, wavelength_units="nm") + wp = WaveSpectraParameter( + variable="2dfd", + wave_direction=5.0, + wave_direction_units="degrees", + wave_frequency_units="s-1", + ) # All are instances of Parameter assert isinstance(cp, Parameter) @@ -307,3 +440,91 @@ def test_parameter_component_inheritance(): assert not isinstance(op, ChemicalParameter) assert not isinstance(wp, ChemicalParameter) assert not isinstance(wp, OpticalParameter) + + +def test_parameter_component_optical_units(): + """Test that OpticalParameter stores wavelength_units as a Units object.""" + p = OpticalParameter(variable="aod", wavelength=550, wavelength_units="nm") + assert p.wavelength_units() == "nm" + assert p.wavelength_units() == "nanometer" + + # get() access + assert p.get("wavelength_units") == "nm" + + +def test_parameter_component_wave_spectra_units(): + """Test that WaveSpectraParameter stores wave direction and frequency units.""" + p = WaveSpectraParameter( + variable="2dfd", + wave_direction=15.0, + wave_direction_index=1, + wave_direction_bounds=(7.5, 22.5), + wave_direction_units="degrees", + wave_frequency=0.05, + wave_frequency_index=2, + wave_frequency_bounds=(0.04, 0.06), + wave_frequency_units="s-1", + ) + assert p.wave_direction_units() == "degrees" + assert p.wave_frequency_units() == "s-1" + assert p.wave_frequency_units() == "1 / second" + + # get() access + assert p.get("wave_direction_units") == "degrees" + assert p.get("wave_frequency_units") == "s-1" + + +def test_parameter_component_to_dict_optical(): + """Test to_dict includes wavelength_units for OpticalParameter.""" + p = OpticalParameter( + variable="aod", + wavelength=550, + wavelength_bounds=(400, 700), + wavelength_units="nm", + ) + d = p.to_dict() + assert d["wavelength"] == 550 + assert d["wavelength_bounds"] == (400, 700) + assert d["wavelength_units"] == "nanometer" + + +def test_parameter_component_to_dict_wave_spectra(): + """Test to_dict includes direction/frequency units for WaveSpectraParameter.""" + p = WaveSpectraParameter( + variable="2dfd", + wave_direction=15.0, + wave_direction_index=1, + wave_direction_bounds=(7.5, 22.5), + wave_direction_units="degrees", + wave_frequency=0.05, + wave_frequency_index=2, + wave_frequency_bounds=(0.04, 0.06), + wave_frequency_units="s-1", + ) + d = p.to_dict() + assert d["wave_direction"] == 15.0 + assert d["wave_direction_index"] == 1 + assert d["wave_direction_bounds"] == (7.5, 22.5) + assert d["wave_direction_units"] == "degree" + assert d["wave_frequency"] == 0.05 + assert d["wave_frequency_index"] == 2 + assert d["wave_frequency_bounds"] == (0.04, 0.06) + assert d["wave_frequency_units"] == "1 / second" + + +def test_parameter_component_to_dict_chemical_optical(): + """Test to_dict includes wavelength_units for ChemicalOpticalParameter.""" + p = ChemicalOpticalParameter( + variable="aod", + chem="aer_total", + chem_long_name="Total aerosol", + wavelength=550, + wavelength_bounds=(400, 700), + wavelength_units="nm", + ) + d = p.to_dict() + assert d["chem"] == "aer_total" + assert d["chem_long_name"] == "Total aerosol" + assert d["wavelength"] == 550 + assert d["wavelength_bounds"] == (400, 700) + assert d["wavelength_units"] == "nanometer" From ab398f524706ec41492086526fa25a0a60bae67e Mon Sep 17 00:00:00 2001 From: Pawel Wolff Date: Wed, 10 Jun 2026 10:10:47 +0200 Subject: [PATCH 16/21] wavelength, wave_direction, wave_frequency and their bounds accept optional argument `units` earthkit-utils>=1.0.0rc2 Tests updated --- pyproject.toml | 2 +- .../data/field/component/parameter.py | 246 +++++++++++++----- tests/field/test_parameter_component.py | 124 +++++++++ tests/grib/test_grib_parameter.py | 114 ++++++++ 4 files changed, 417 insertions(+), 69 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index f7f2c6d5c..5dddd3b22 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,7 +22,7 @@ dependencies = [ "cfgrib>=0.9.10.1", "dask", "deprecation", - "earthkit-utils>=1.0.0rc0", + "earthkit-utils>=1.0.0rc2", "eccodeslib==2.46.2.17", "eckit==2.0.6.17", "entrypoints", diff --git a/src/earthkit/data/field/component/parameter.py b/src/earthkit/data/field/component/parameter.py index 4c6273fe3..c42021fcd 100644 --- a/src/earthkit/data/field/component/parameter.py +++ b/src/earthkit/data/field/component/parameter.py @@ -12,7 +12,7 @@ from abc import abstractmethod from typing import TYPE_CHECKING, Optional, Union -from earthkit.utils.units import Units +from earthkit.utils.units import Units, convert_array from .component import SimpleFieldComponent, component_keys, mark_alias, mark_get_key @@ -40,17 +40,16 @@ class ParameterBase(SimpleFieldComponent): - "units": as a string or a :class:`Units` object representing the parameter units - "chem": string representing the parameter chemical constituent or aerosol type, or None - "chem_long_name": string representing the long name of the parameter chemical constituent or aerosol type, or None - - "wavelength": int representing the optical parameter wavelength in nanometers, or None - - "wavelength_bounds": 2-tuple of ints representing the optical parameter wavelength bounds - in nanometers, or None + - "wavelength": int or float representing the optical parameter wavelength, or None + - "wavelength_bounds": 2-tuple of ints or floats representing the optical parameter wavelength bounds, or None - "wavelength_units": :class:`Units` object representing the wavelength units (e.g. nanometers), or None - - "wave_direction": float representing the wave direction in degrees of the 2D spectra parameter, or None + - "wave_direction": float representing the wave direction of the 2D spectra parameter, or None - "wave_direction_index": int representing the 0-based index of the wave direction bin, or None - - "wave_direction_bounds": 2-tuple of floats representing the wave direction bounds in degrees, or None + - "wave_direction_bounds": 2-tuple of floats representing the wave direction bounds, or None - "wave_direction_units": :class:`Units` object representing the wave direction units (e.g. degrees), or None - - "wave_frequency": float representing the wave frequency in Hz of the 2D spectra parameter, or None + - "wave_frequency": float representing the wave frequency of the 2D spectra parameter, or None - "wave_frequency_index": int representing the 0-based index of the wave frequency bin, or None - - "wave_frequency_bounds": 2-tuple of floats representing the wave frequency bounds in Hz, or None + - "wave_frequency_bounds": 2-tuple of floats representing the wave frequency bounds, or None - "wave_frequency_units": :class:`Units` object representing the wave frequency units (e.g. 1/s), or None - "param": alias of "variable" @@ -148,14 +147,28 @@ def long_name(self) -> Optional[str]: @mark_get_key @abstractmethod - def wavelength(self) -> Optional[int]: - """Return the optical parameter wavelength in nanometers.""" + def wavelength(self, units=None) -> Optional[Union[int, float]]: + """Return the optical parameter wavelength. + + Parameters + ---------- + units : str or Units, optional + If provided, convert the wavelength to the specified units and return as float. + If None, return the value in native units. + """ pass @mark_get_key @abstractmethod - def wavelength_bounds(self) -> Optional[tuple[int, int]]: - """Return the optical parameter wavelength bounds in nanometers.""" + def wavelength_bounds(self, units=None) -> Optional[Union[tuple[int, int], tuple[float, float]]]: + """Return the optical parameter wavelength bounds. + + Parameters + ---------- + units : str or Units, optional + If provided, convert the bounds to the specified units and return as tuple of floats. + If None, return the value in native units. + """ pass @mark_get_key @@ -166,8 +179,15 @@ def wavelength_units(self) -> Optional["Units"]: @mark_get_key @abstractmethod - def wave_direction(self) -> Optional[float]: - """Return the wave direction in degrees of the 2D spectra parameter.""" + def wave_direction(self, units=None) -> Optional[float]: + """Return the wave direction of the 2D spectra parameter. + + Parameters + ---------- + units : str or Units, optional + If provided, convert the direction to the specified units and return as float. + If None, return the value in native units. + """ pass @mark_get_key @@ -178,8 +198,15 @@ def wave_direction_index(self) -> Optional[int]: @mark_get_key @abstractmethod - def wave_direction_bounds(self) -> Optional[tuple[float, float]]: - """Return the wave direction bounds in degrees of the 2D spectra parameter.""" + def wave_direction_bounds(self, units=None) -> Optional[tuple[float, float]]: + """Return the wave direction bounds of the 2D spectra parameter. + + Parameters + ---------- + units : str or Units, optional + If provided, convert the bounds to the specified units and return as tuple of floats. + If None, return the value in native units. + """ pass @mark_get_key @@ -190,8 +217,15 @@ def wave_direction_units(self) -> Optional["Units"]: @mark_get_key @abstractmethod - def wave_frequency(self) -> Optional[float]: - """Return the wave frequency in Hz of the 2D spectra parameter.""" + def wave_frequency(self, units=None) -> Optional[float]: + """Return the wave frequency of the 2D spectra parameter. + + Parameters + ---------- + units : str or Units, optional + If provided, convert the frequency to the specified units and return as float. + If None, return the value in native units. + """ pass @mark_get_key @@ -202,8 +236,15 @@ def wave_frequency_index(self) -> Optional[int]: @mark_get_key @abstractmethod - def wave_frequency_bounds(self) -> Optional[tuple[float, float]]: - """Return the wave frequency bounds in Hz of the 2D spectra parameter.""" + def wave_frequency_bounds(self, units=None) -> Optional[tuple[float, float]]: + """Return the wave frequency bounds of the 2D spectra parameter. + + Parameters + ---------- + units : str or Units, optional + If provided, convert the bounds to the specified units and return as tuple of floats. + If None, return the value in native units. + """ pass @mark_get_key @@ -231,16 +272,16 @@ def from_dict(cls, d: dict) -> "ParameterBase": - "units": The parameter units, as a string or a Units object. - "chem": The chemical constituent or aerosol type of the parameter. - "chem_long_name": The long name of the chemical constituent or aerosol type of the parameter. - - "wavelength": The optical parameter wavelength in nanometers as an int. - - "wavelength_bounds": The optical parameter wavelength bounds in nanometers, as a 2-tuple of ints. + - "wavelength": The optical parameter wavelength, as an int or a float. + - "wavelength_bounds": The optical parameter wavelength bounds, as a 2-tuple of ints or floats. - "wavelength_units": The wavelength units, as a string or a Units object. - - "wave_direction": The wave direction in degrees of the 2D spectra parameter. + - "wave_direction": The wave direction of the 2D spectra parameter, as a float. - "wave_direction_index": The 0-based index of the wave direction bin. - - "wave_direction_bounds": The wave direction bounds in degrees, as a 2-tuple of floats. + - "wave_direction_bounds": The wave direction bounds, as a 2-tuple of floats. - "wave_direction_units": The wave direction units, as a string or a Units object. - - "wave_frequency": The wave frequency in Hz of the 2D spectra parameter. + - "wave_frequency": The wave frequency of the 2D spectra parameter, as a float. - "wave_frequency_index": The 0-based index of the wave frequency bin. - - "wave_frequency_bounds": The wave frequency bounds in Hz, as a 2-tuple of floats. + - "wave_frequency_bounds": The wave frequency bounds, as a 2-tuple of floats. - "wave_frequency_units": The wave frequency units, as a string or a Units object. Returns @@ -365,15 +406,15 @@ def chem_long_name(self) -> None: """ return None - def wavelength(self) -> None: - r"""Return the optical parameter wavelength in nanometers. + def wavelength(self, units=None) -> None: + r"""Return the optical parameter wavelength. An EmptyParameter does not contain any parameter information, and this method returns None. """ return None - def wavelength_bounds(self) -> None: - r"""Return the optical parameter wavelength bounds in nanometers. + def wavelength_bounds(self, units=None) -> None: + r"""Return the optical parameter wavelength bounds. An EmptyParameter does not contain any parameter information, and this method returns None. """ @@ -386,8 +427,8 @@ def wavelength_units(self) -> None: """ return None - def wave_direction(self) -> None: - r"""Return the wave direction in degrees of the 2D spectra parameter. + def wave_direction(self, units=None) -> None: + r"""Return the wave direction of the 2D spectra parameter. An EmptyParameter does not contain any parameter information, and this method returns None. """ @@ -400,8 +441,8 @@ def wave_direction_index(self) -> None: """ return None - def wave_direction_bounds(self) -> None: - r"""Return the wave direction bounds in degrees of the 2D spectra parameter. + def wave_direction_bounds(self, units=None) -> None: + r"""Return the wave direction bounds of the 2D spectra parameter. An EmptyParameter does not contain any parameter information, and this method returns None. """ @@ -414,8 +455,8 @@ def wave_direction_units(self) -> None: """ return None - def wave_frequency(self) -> None: - r"""Return the wave frequency in Hz of the 2D spectra parameter. + def wave_frequency(self, units=None) -> None: + r"""Return the wave frequency of the 2D spectra parameter. An EmptyParameter does not contain any parameter information, and this method returns None. """ @@ -428,8 +469,8 @@ def wave_frequency_index(self) -> None: """ return None - def wave_frequency_bounds(self) -> None: - r"""Return the wave frequency bounds in Hz of the 2D spectra parameter. + def wave_frequency_bounds(self, units=None) -> None: + r"""Return the wave frequency bounds of the 2D spectra parameter. An EmptyParameter does not contain any parameter information, and this method returns None. """ @@ -526,15 +567,15 @@ def chem_long_name(self) -> None: """ return None - def wavelength(self) -> None: - r"""Return the optical parameter wavelength in nanometers. + def wavelength(self, units=None) -> None: + r"""Return the optical parameter wavelength. A regular Parameter does not have optical information, and this method returns None. """ return None - def wavelength_bounds(self) -> None: - r"""Return the optical parameter wavelength bounds in nanometers. + def wavelength_bounds(self, units=None) -> None: + r"""Return the optical parameter wavelength bounds. A regular Parameter does not have optical information, and this method returns None. """ @@ -547,8 +588,8 @@ def wavelength_units(self) -> None: """ return None - def wave_direction(self) -> None: - r"""Return the wave direction in degrees of the 2D spectra parameter. + def wave_direction(self, units=None) -> None: + r"""Return the wave direction of the 2D spectra parameter. A regular Parameter does not have wave spectra information, and this method returns None. """ @@ -561,8 +602,8 @@ def wave_direction_index(self) -> None: """ return None - def wave_direction_bounds(self) -> None: - r"""Return the wave direction bounds in degrees of the 2D spectra parameter. + def wave_direction_bounds(self, units=None) -> None: + r"""Return the wave direction bounds of the 2D spectra parameter. A regular Parameter does not have wave spectra information, and this method returns None. """ @@ -575,8 +616,8 @@ def wave_direction_units(self) -> None: """ return None - def wave_frequency(self) -> None: - r"""Return the wave frequency in Hz of the 2D spectra parameter. + def wave_frequency(self, units=None) -> None: + r"""Return the wave frequency of the 2D spectra parameter. A regular Parameter does not have wave spectra information, and this method returns None. """ @@ -589,8 +630,8 @@ def wave_frequency_index(self) -> None: """ return None - def wave_frequency_bounds(self) -> None: - r"""Return the wave frequency bounds in Hz of the 2D spectra parameter. + def wave_frequency_bounds(self, units=None) -> None: + r"""Return the wave frequency bounds of the 2D spectra parameter. A regular Parameter does not have wave spectra information, and this method returns None. """ @@ -772,13 +813,36 @@ def __init__( self._wavelength_bounds = wavelength_bounds self._wavelength_units = Units.from_any(wavelength_units) - def wavelength(self) -> Optional[int]: - r"""Return the optical parameter wavelength in nanometers.""" - return self._wavelength + def wavelength(self, units=None) -> Optional[Union[int, float]]: + r"""Return the optical parameter wavelength. - def wavelength_bounds(self) -> Optional[tuple[int, int]]: - r"""Return the optical parameter wavelength bounds in nanometers.""" - return self._wavelength_bounds + Parameters + ---------- + units : str or Units, optional + If provided, convert the wavelength to the specified units and return as float. + If None, return the value in native units. + """ + if self._wavelength is None: + return None + if units is None: + return self._wavelength + return convert_array(self._wavelength, units, self._wavelength_units) + + def wavelength_bounds(self, units=None) -> Optional[Union[tuple[int, int], tuple[float, float]]]: + r"""Return the optical parameter wavelength bounds. + + Parameters + ---------- + units : str or Units, optional + If provided, convert the bounds to the specified units and return as tuple of floats. + If None, return the value in native units (tuple of ints in nanometers). + """ + if self._wavelength_bounds is None: + return None + if units is None: + return self._wavelength_bounds + a, b = convert_array(self._wavelength_bounds, units, self._wavelength_units) + return float(a), float(b) def wavelength_units(self) -> Optional["Units"]: r"""Return the units of the optical parameter wavelength.""" @@ -911,33 +975,79 @@ def __init__( self._wave_frequency_bounds = wave_frequency_bounds self._wave_frequency_units = Units.from_any(wave_frequency_units) - def wave_direction(self) -> Optional[float]: - r"""Return the wave direction in degrees of the 2D spectra parameter.""" - return self._wave_direction + def wave_direction(self, units=None) -> Optional[float]: + r"""Return the wave direction of the 2D spectra parameter. + + Parameters + ---------- + units : str or Units, optional + If provided, convert the direction to the specified units and return as float. + If None, return the value in native units. + """ + if self._wave_direction is None: + return None + if units is None: + return self._wave_direction + return convert_array(self._wave_direction, units, self._wave_direction_units) def wave_direction_index(self) -> Optional[int]: r"""Return the 0-based index of the wave direction bin.""" return self._wave_direction_index - def wave_direction_bounds(self) -> Optional[tuple[float, float]]: - r"""Return the wave direction bounds in degrees of the 2D spectra parameter.""" - return self._wave_direction_bounds + def wave_direction_bounds(self, units=None) -> Optional[tuple[float, float]]: + r"""Return the wave direction bounds of the 2D spectra parameter. + + Parameters + ---------- + units : str or Units, optional + If provided, convert the bounds to the specified units and return as tuple of floats. + If None, return the value in native units. + """ + if self._wave_direction_bounds is None: + return None + if units is None: + return self._wave_direction_bounds + a, b = convert_array(self._wave_direction_bounds, units, self._wave_direction_units) + return float(a), float(b) def wave_direction_units(self) -> Optional["Units"]: r"""Return the units of the wave direction.""" return self._wave_direction_units - def wave_frequency(self) -> Optional[float]: - r"""Return the wave frequency in Hz of the 2D spectra parameter.""" - return self._wave_frequency + def wave_frequency(self, units=None) -> Optional[float]: + r"""Return the wave frequency of the 2D spectra parameter. + + Parameters + ---------- + units : str or Units, optional + If provided, convert the frequency to the specified units and return as float. + If None, return the value in native units. + """ + if self._wave_frequency is None: + return None + if units is None: + return self._wave_frequency + return convert_array(self._wave_frequency, units, self._wave_frequency_units) def wave_frequency_index(self) -> Optional[int]: r"""Return the 0-based index of the wave frequency bin.""" return self._wave_frequency_index - def wave_frequency_bounds(self) -> Optional[tuple[float, float]]: - r"""Return the wave frequency bounds in Hz of the 2D spectra parameter.""" - return self._wave_frequency_bounds + def wave_frequency_bounds(self, units=None) -> Optional[tuple[float, float]]: + r"""Return the wave frequency bounds of the 2D spectra parameter. + + Parameters + ---------- + units : str or Units, optional + If provided, convert the bounds to the specified units and return as tuple of floats. + If None, return the value in native units. + """ + if self._wave_frequency_bounds is None: + return None + if units is None: + return self._wave_frequency_bounds + a, b = convert_array(self._wave_frequency_bounds, units, self._wave_frequency_units) + return float(a), float(b) def wave_frequency_units(self) -> Optional["Units"]: r"""Return the units of the wave frequency.""" diff --git a/tests/field/test_parameter_component.py b/tests/field/test_parameter_component.py index 977b56156..cc8f153be 100644 --- a/tests/field/test_parameter_component.py +++ b/tests/field/test_parameter_component.py @@ -528,3 +528,127 @@ def test_parameter_component_to_dict_chemical_optical(): assert d["wavelength"] == 550 assert d["wavelength_bounds"] == (400, 700) assert d["wavelength_units"] == "nanometer" + + +def test_parameter_component_wavelength_unit_conversion(): + """Test wavelength conversion to different units.""" + import math + + p = OpticalParameter(variable="aod", wavelength=550, wavelength_bounds=(400, 700), wavelength_units="nm") + + # Native units: int + assert p.wavelength() == 550 + assert isinstance(p.wavelength(), int) + assert p.wavelength_bounds() == (400, 700) + assert isinstance(p.wavelength_bounds()[0], int) + + # Convert to micrometers + wl = p.wavelength(units="um") + assert isinstance(wl, float) + assert math.isclose(wl, 0.55, rel_tol=1e-9) + + wb = p.wavelength_bounds(units="um") + assert isinstance(wb[0], float) + assert isinstance(wb[1], float) + assert math.isclose(wb[0], 0.4, rel_tol=1e-9) + assert math.isclose(wb[1], 0.7, rel_tol=1e-9) + + # Convert to meters + wl_m = p.wavelength(units="m") + assert math.isclose(wl_m, 550e-9, rel_tol=1e-9) + + # None wavelength stays None + p2 = OpticalParameter(variable="aod", wavelength_units="nm") + assert p2.wavelength(units="um") is None + assert p2.wavelength_bounds(units="um") is None + + +def test_parameter_component_wave_direction_unit_conversion(): + """Test wave direction conversion to different units.""" + import math + + p = WaveSpectraParameter( + variable="2dfd", + wave_direction=180.0, + wave_direction_bounds=(170.0, 190.0), + wave_direction_units="degrees", + ) + + # Native units: float + assert p.wave_direction() == 180.0 + assert isinstance(p.wave_direction(), float) + assert p.wave_direction_bounds() == (170.0, 190.0) + + # Convert to radians + wd = p.wave_direction(units="radian") + assert isinstance(wd, float) + assert math.isclose(wd, math.pi, rel_tol=1e-6) + + wdb = p.wave_direction_bounds(units="radian") + assert isinstance(wdb[0], float) + assert math.isclose(wdb[0], math.radians(170.0), rel_tol=1e-6) + assert math.isclose(wdb[1], math.radians(190.0), rel_tol=1e-6) + + # None stays None + p2 = WaveSpectraParameter(variable="2dfd", wave_direction_units="degrees") + assert p2.wave_direction(units="radian") is None + assert p2.wave_direction_bounds(units="radian") is None + + +def test_parameter_component_wave_frequency_unit_conversion(): + """Test wave frequency conversion to different units.""" + import math + + p = WaveSpectraParameter( + variable="2dfd", + wave_frequency=0.05, + wave_frequency_bounds=(0.04, 0.06), + wave_frequency_units="s-1", + ) + + # Native units: float + assert p.wave_frequency() == 0.05 + assert isinstance(p.wave_frequency(), float) + assert p.wave_frequency_bounds() == (0.04, 0.06) + + # Convert to Hz (same dimensionality, should be identity) + wf = p.wave_frequency(units="Hz") + assert isinstance(wf, float) + assert math.isclose(wf, 0.05, rel_tol=1e-9) + + # Convert bounds + wfb = p.wave_frequency_bounds(units="Hz") + assert isinstance(wfb[0], float) + assert math.isclose(wfb[0], 0.04, rel_tol=1e-9) + assert math.isclose(wfb[1], 0.06, rel_tol=1e-9) + + # None stays None + p2 = WaveSpectraParameter(variable="2dfd", wave_frequency_units="s-1") + assert p2.wave_frequency(units="Hz") is None + assert p2.wave_frequency_bounds(units="Hz") is None + + +def test_parameter_component_chemical_optical_wavelength_conversion(): + """Test wavelength conversion works through ChemicalOpticalParameter.""" + import math + + p = ChemicalOpticalParameter( + variable="aod", + chem="aer_total", + wavelength=550, + wavelength_bounds=(400, 700), + wavelength_units="nm", + ) + + # Native: int + assert p.wavelength() == 550 + assert isinstance(p.wavelength(), int) + + # Converted: float + wl = p.wavelength(units="um") + assert isinstance(wl, float) + assert math.isclose(wl, 0.55, rel_tol=1e-9) + + wb = p.wavelength_bounds(units="um") + assert math.isclose(wb[0], 0.4, rel_tol=1e-9) + assert math.isclose(wb[1], 0.7, rel_tol=1e-9) diff --git a/tests/grib/test_grib_parameter.py b/tests/grib/test_grib_parameter.py index 068058f37..f752ff0c4 100644 --- a/tests/grib/test_grib_parameter.py +++ b/tests/grib/test_grib_parameter.py @@ -9,6 +9,7 @@ # nor does it submit to any jurisdiction. # +import math import pytest from grib_fixtures import ( @@ -87,8 +88,16 @@ def test_grib_parameter_chem_long_name(fl_type): assert f.parameter.chem() == chem assert f.parameter.chem_long_name() == chem_long assert f.parameter.wavelength() is None + assert f.parameter.wavelength_bounds() is None + assert f.parameter.wavelength_units() is None assert f.parameter.wave_direction() is None + assert f.parameter.wave_direction_index() is None + assert f.parameter.wave_direction_bounds() is None + assert f.parameter.wave_direction_units() is None assert f.parameter.wave_frequency() is None + assert f.parameter.wave_frequency_index() is None + assert f.parameter.wave_frequency_bounds() is None + assert f.parameter.wave_frequency_units() is None @pytest.mark.parametrize("fl_type", FL_FILE) @@ -101,11 +110,32 @@ def test_grib_parameter_wavelength(fl_type): for f in ds: assert f.parameter.wavelength() in (550, 800) assert isinstance(f.parameter.wavelength(), int) + assert f.parameter.wavelength_units() == "nm" result = ds.unique("parameter.wavelength") assert set(result["parameter.wavelength"]) == {550, 800} +@pytest.mark.parametrize("fl_type", FL_FILE) +def test_grib_parameter_wavelength_conversion(fl_type): + """Test wavelength unit conversion from CAMS optical GRIB2 data.""" + ds, _ = load_grib_data("optical-cams.grib", fl_type, folder="data") + + for f in ds: + wl_nm = f.parameter.wavelength() + wl_um = f.parameter.wavelength(units="um") + assert isinstance(wl_um, float) + assert math.isclose(wl_um, wl_nm / 1000.0, rel_tol=1e-9) + + # wavelength_bounds may be None (single wavelength) + wb = f.parameter.wavelength_bounds() + if wb is not None: + wb_um = f.parameter.wavelength_bounds(units="um") + assert isinstance(wb_um[0], float) + assert math.isclose(wb_um[0], wb[0] / 1000.0, rel_tol=1e-9) + assert math.isclose(wb_um[1], wb[1] / 1000.0, rel_tol=1e-9) + + @pytest.mark.parametrize("fl_type", FL_FILE) def test_grib_parameter_wave_direction(fl_type): """Test wave_direction extraction from 2D wave spectra GRIB data.""" @@ -116,11 +146,51 @@ def test_grib_parameter_wave_direction(fl_type): for f in ds: assert f.parameter.wave_direction() is not None assert isinstance(f.parameter.wave_direction(), float) + assert f.parameter.wave_direction_units() == "degrees" + assert f.parameter.wave_direction_index() is not None + assert isinstance(f.parameter.wave_direction_index(), int) + assert f.parameter.wave_direction_index() >= 0 result = ds.unique("parameter.wave_direction") assert set(result["parameter.wave_direction"]) == {55.0, 115.0, 175.0, 235.0, 295.0, 355.0} +@pytest.mark.parametrize("fl_type", FL_FILE) +def test_grib_parameter_wave_direction_bounds(fl_type): + """Test wave_direction_bounds extraction from 2D wave spectra GRIB data.""" + ds, _ = load_grib_data("wave_spectra.grib", fl_type, folder="data") + + for f in ds: + bounds = f.parameter.wave_direction_bounds() + if bounds is not None: + assert isinstance(bounds, tuple) + assert len(bounds) == 2 + assert isinstance(bounds[0], float) + assert isinstance(bounds[1], float) + # bounds should bracket the direction value + direction = f.parameter.wave_direction() + assert bounds[0] <= direction <= bounds[1] + + +@pytest.mark.parametrize("fl_type", FL_FILE) +def test_grib_parameter_wave_direction_conversion(fl_type): + """Test wave direction unit conversion from 2D wave spectra GRIB data.""" + ds, _ = load_grib_data("wave_spectra.grib", fl_type, folder="data") + + f = ds[0] + wd_deg = f.parameter.wave_direction() + wd_rad = f.parameter.wave_direction(units="radian") + assert isinstance(wd_rad, float) + assert math.isclose(wd_rad, math.radians(wd_deg), rel_tol=1e-6) + + bounds = f.parameter.wave_direction_bounds() + if bounds is not None: + bounds_rad = f.parameter.wave_direction_bounds(units="radian") + assert isinstance(bounds_rad[0], float) + assert math.isclose(bounds_rad[0], math.radians(bounds[0]), rel_tol=1e-6) + assert math.isclose(bounds_rad[1], math.radians(bounds[1]), rel_tol=1e-6) + + @pytest.mark.parametrize("fl_type", FL_FILE) def test_grib_parameter_wave_frequency(fl_type): """Test wave_frequency extraction from 2D wave spectra GRIB data.""" @@ -133,3 +203,47 @@ def test_grib_parameter_wave_frequency(fl_type): assert abs(freqs[0] - 0.034523) < 0.001 assert abs(freqs[1] - 0.1311) < 0.001 assert abs(freqs[2] - 0.497852) < 0.001 + + # Check units and index + for f in ds: + assert f.parameter.wave_frequency_units() == "s-1" + assert f.parameter.wave_frequency_index() is not None + assert isinstance(f.parameter.wave_frequency_index(), int) + assert f.parameter.wave_frequency_index() >= 0 + + +@pytest.mark.parametrize("fl_type", FL_FILE) +def test_grib_parameter_wave_frequency_bounds(fl_type): + """Test wave_frequency_bounds extraction from 2D wave spectra GRIB data.""" + ds, _ = load_grib_data("wave_spectra.grib", fl_type, folder="data") + + for f in ds: + bounds = f.parameter.wave_frequency_bounds() + if bounds is not None: + assert isinstance(bounds, tuple) + assert len(bounds) == 2 + assert isinstance(bounds[0], float) + assert isinstance(bounds[1], float) + # bounds should bracket the frequency value + freq = f.parameter.wave_frequency() + assert bounds[0] <= freq <= bounds[1] + + +@pytest.mark.parametrize("fl_type", FL_FILE) +def test_grib_parameter_wave_frequency_conversion(fl_type): + """Test wave frequency unit conversion from 2D wave spectra GRIB data.""" + ds, _ = load_grib_data("wave_spectra.grib", fl_type, folder="data") + + f = ds[0] + wf = f.parameter.wave_frequency() + # s-1 and Hz are the same unit, so conversion should be identity + wf_hz = f.parameter.wave_frequency(units="Hz") + assert isinstance(wf_hz, float) + assert math.isclose(wf_hz, wf, rel_tol=1e-9) + + bounds = f.parameter.wave_frequency_bounds() + if bounds is not None: + bounds_hz = f.parameter.wave_frequency_bounds(units="Hz") + assert isinstance(bounds_hz[0], float) + assert math.isclose(bounds_hz[0], bounds[0], rel_tol=1e-9) + assert math.isclose(bounds_hz[1], bounds[1], rel_tol=1e-9) From 3a78760fb184a406047d452f36b7da2b3daca9ba Mon Sep 17 00:00:00 2001 From: Pawel Wolff Date: Wed, 10 Jun 2026 10:19:47 +0200 Subject: [PATCH 17/21] docstrings fixed --- .../data/field/component/parameter.py | 32 ++++++++++--------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/src/earthkit/data/field/component/parameter.py b/src/earthkit/data/field/component/parameter.py index c42021fcd..7085629b9 100644 --- a/src/earthkit/data/field/component/parameter.py +++ b/src/earthkit/data/field/component/parameter.py @@ -680,16 +680,16 @@ def set(self, *args, **kwargs): - "long_name": The long name of the parameter variable. - "chem": The chemical constituent or aerosol type of the parameter. - "chem_long_name": The long name of the chemical constituent or aerosol type of the parameter. - - "wavelength": The optical parameter wavelength in nanometers. - - "wavelength_bounds": The optical parameter wavelength bounds in nanometers, as a 2-tuple of ints. + - "wavelength": The optical parameter wavelength. + - "wavelength_bounds": The optical parameter wavelength bounds, as a 2-tuple. - "wavelength_units": The wavelength units, as a string or a Units object. - - "wave_direction": The wave direction in degrees of the 2D spectra parameter. + - "wave_direction": The wave direction of the 2D spectra parameter. - "wave_direction_index": The 0-based index of the wave direction bin. - - "wave_direction_bounds": The wave direction bounds in degrees, as a 2-tuple of floats. + - "wave_direction_bounds": The wave direction bounds, as a 2-tuple of floats. - "wave_direction_units": The wave direction units, as a string or a Units object. - - "wave_frequency": The wave frequency in Hz of the 2D spectra parameter. + - "wave_frequency": The wave frequency of the 2D spectra parameter. - "wave_frequency_index": The 0-based index of the wave frequency bin. - - "wave_frequency_bounds": The wave frequency bounds in Hz, as a 2-tuple of floats. + - "wave_frequency_bounds": The wave frequency bounds, as a 2-tuple of floats. - "wave_frequency_units": The wave frequency units, as a string or a Units object. """ d = self._normalise_set_kwargs( @@ -791,9 +791,9 @@ class OpticalParameter(Parameter): units : str or Units, optional The parameter units, by default None. Can be provided as a string or a Units object. wavelength : int, optional - The optical parameter wavelength in nanometers, by default None. + The optical parameter wavelength in native units (see ``wavelength_units``), by default None. wavelength_bounds : 2-tuple of ints, optional - The optical parameter wavelength bounds in nanometers, by default None. + The optical parameter wavelength bounds in native units, by default None. wavelength_units : str or Units, optional The wavelength units, by default None. Can be provided as a string or a Units object. """ @@ -835,7 +835,7 @@ def wavelength_bounds(self, units=None) -> Optional[Union[tuple[int, int], tuple ---------- units : str or Units, optional If provided, convert the bounds to the specified units and return as tuple of floats. - If None, return the value in native units (tuple of ints in nanometers). + If None, return the value in native units. """ if self._wavelength_bounds is None: return None @@ -879,9 +879,9 @@ class ChemicalOpticalParameter(ChemicalParameter, OpticalParameter): chem_long_name : str, optional The long name of the parameter chemical constituent or aerosol type, by default None. wavelength : int, optional - The optical parameter wavelength in nanometers, by default None. + The optical parameter wavelength in native units (see ``wavelength_units``), by default None. wavelength_bounds : 2-tuple of ints, optional - The optical parameter wavelength bounds in nanometers, by default None. + The optical parameter wavelength bounds in native units, by default None. wavelength_units : str or Units, optional The wavelength units, by default None. Can be provided as a string or a Units object. """ @@ -933,19 +933,21 @@ class WaveSpectraParameter(Parameter): units : str or Units, optional The parameter units, by default None. Can be provided as a string or a Units object. wave_direction : float, optional - The wave direction in degrees of the 2D spectra parameter, by default None. + The wave direction of the 2D spectra parameter in native units (see ``wave_direction_units``), + by default None. wave_direction_index : int, optional The 0-based index of the wave direction bin, by default None. wave_direction_bounds : 2-tuple of floats, optional - The wave direction bounds in degrees of the 2D spectra parameter, by default None. + The wave direction bounds in native units, by default None. wave_direction_units : str or Units, optional The wave direction units, by default None. Can be provided as a string or a Units object. wave_frequency : float, optional - The wave frequency in Hz of the 2D spectra parameter, by default None. + The wave frequency of the 2D spectra parameter in native units (see ``wave_frequency_units``), + by default None. wave_frequency_index : int, optional The 0-based index of the wave frequency bin, by default None. wave_frequency_bounds : 2-tuple of floats, optional - The wave frequency bounds in Hz of the 2D spectra parameter, by default None. + The wave frequency bounds in native units, by default None. wave_frequency_units : str or Units, optional The wave frequency units, by default None. Can be provided as a string or a Units object. """ From d3d1ffc3265f42ef81d6da3c4509b6f5120c0695 Mon Sep 17 00:00:00 2001 From: Pawel Wolff Date: Wed, 10 Jun 2026 10:20:30 +0200 Subject: [PATCH 18/21] _normalise_create_kwargs fixed when called with `allow_keys=None` --- src/earthkit/data/field/component/component.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/earthkit/data/field/component/component.py b/src/earthkit/data/field/component/component.py index 9048c582c..a76da5593 100644 --- a/src/earthkit/data/field/component/component.py +++ b/src/earthkit/data/field/component/component.py @@ -288,12 +288,13 @@ def _normalise_create_kwargs(cls, data, allowed_keys=None, remove_nones=False): _kwargs = {} for k_in, v in data.items(): k = cls._ALIASES.get(k_in, k_in) - if k in allowed_keys: - if remove_nones and v is None: - continue - _kwargs[k] = v - else: - raise ValueError(f"Cannot use key={k} to create object={cls}") + if allowed_keys is not None: + if k in allowed_keys: + if remove_nones and v is None: + continue + _kwargs[k] = v + else: + raise ValueError(f"Cannot use key={k} to create object={cls}") return _kwargs @classmethod From 0da557d23aaa4792aa8d1975763caa6dfbfb1b2d Mon Sep 17 00:00:00 2001 From: Pawel Wolff Date: Wed, 10 Jun 2026 15:28:10 +0200 Subject: [PATCH 19/21] Mapping the following parameter metadata keys to GRIB: variable, chem, wavelength, wavelength_bounds, wave_direction_index, wave_frequency_index Tests added --- src/earthkit/data/field/grib/parameter.py | 29 +- .../test_grib_parameter_context_collector.py | 314 ++++++++++++++++++ tests/grib/test_grib_set_parameter.py | 153 +++++++++ 3 files changed, 495 insertions(+), 1 deletion(-) create mode 100644 tests/grib/test_grib_parameter_context_collector.py diff --git a/src/earthkit/data/field/grib/parameter.py b/src/earthkit/data/field/grib/parameter.py index e69d5a633..ec19aee14 100644 --- a/src/earthkit/data/field/grib/parameter.py +++ b/src/earthkit/data/field/grib/parameter.py @@ -72,6 +72,8 @@ def _get(key, default=None): d["chem"] = chem d["chem_long_name"] = chem_long_name + # TODO: some of the logic below should be moved to ecCodes + # Metadata for optical parameters _wavelength = _get("mars.wavelength", None) wavelength = None @@ -183,8 +185,33 @@ def collect_keys(handler, context): component = handler.component r = { "shortName": component.variable(), - # "units": param.units, } + + chem = component.chem() + if chem: + r["chemShortName"] = chem + + # TODO: some of the logic below should be moved to ecCodes + wavelength_bounds = component.wavelength_bounds(units="m") + if wavelength_bounds is not None: + r["firstWavelength"] = wavelength_bounds[0] + r["secondWavelength"] = wavelength_bounds[1] + # see: https://codes.ecmwf.int/grib/format/grib2/ctables/4/91/ + r["typeOfWavelengthInterval"] = 2 # Between first and second limit. + # The range includes the first limit but not the second limit + else: + wavelength = component.wavelength(units="m") + if wavelength is not None: + r["firstWavelength"] = wavelength + + wave_direction_index = component.wave_direction_index() + if wave_direction_index is not None: + r["directionNumber"] = wave_direction_index + 1 # convert to 1-based index + + wave_frequency_index = component.wave_frequency_index() + if wave_frequency_index is not None: + r["frequencyNumber"] = wave_frequency_index + 1 # convert to 1-based index + context.update(r) diff --git a/tests/grib/test_grib_parameter_context_collector.py b/tests/grib/test_grib_parameter_context_collector.py new file mode 100644 index 000000000..7cebb66b9 --- /dev/null +++ b/tests/grib/test_grib_parameter_context_collector.py @@ -0,0 +1,314 @@ +#!/usr/bin/env python3 + +# (C) Copyright 2020 ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. +# + +"""Tests for GribParameterContextCollector.collect_keys covering +chem, wavelength, wave_direction_index, and wave_frequency_index context keys. +""" + +import pytest +from grib_fixtures import FL_FILE, load_grib_data # noqa: E402 + +from earthkit.data.field.component.parameter import ( + ChemicalOpticalParameter, + ChemicalParameter, + OpticalParameter, + Parameter, + WaveSpectraParameter, +) +from earthkit.data.field.grib.parameter import GribParameterContextCollector +from earthkit.data.field.handler.parameter import ParameterFieldComponentHandler + + +def _make_handler(component): + """Helper to wrap a parameter component in a handler.""" + return ParameterFieldComponentHandler(component) + + +def _collect(handler): + """Run collect_keys and return the resulting context dict.""" + context = {} + GribParameterContextCollector.collect_keys(handler, context) + return context + + +# -------------------------------------------------------------------------- +# Unit tests using constructed components (no file I/O) +# -------------------------------------------------------------------------- + + +class TestCollectKeysBasicParameter: + """Test collect_keys with a basic Parameter (no chem, wavelength, or wave).""" + + def test_shortname_only(self): + comp = Parameter(variable="t") + ctx = _collect(_make_handler(comp)) + assert ctx == {"shortName": "t"} + + def test_no_chem_key(self): + comp = Parameter(variable="msl", units="Pa") + ctx = _collect(_make_handler(comp)) + assert "chemShortName" not in ctx + + def test_no_wavelength_keys(self): + comp = Parameter(variable="msl") + ctx = _collect(_make_handler(comp)) + assert "firstWavelength" not in ctx + assert "secondWavelength" not in ctx + + def test_no_direction_frequency_keys(self): + comp = Parameter(variable="msl") + ctx = _collect(_make_handler(comp)) + assert "directionNumber" not in ctx + assert "frequencyNumber" not in ctx + + +class TestCollectKeysChemParameter: + """Test collect_keys with ChemicalParameter.""" + + def test_chem_short_name_set(self): + comp = ChemicalParameter(variable="tcvimd", chem="CO") + ctx = _collect(_make_handler(comp)) + assert ctx["shortName"] == "tcvimd" + assert ctx["chemShortName"] == "CO" + + def test_chem_with_long_name(self): + comp = ChemicalParameter(variable="mass_mixrat", chem="O3", chem_long_name="Ozone") + ctx = _collect(_make_handler(comp)) + assert ctx["chemShortName"] == "O3" + # chem_long_name is not collected into context + assert "chemLongName" not in ctx + + def test_chem_none_not_set(self): + """When chem is None, chemShortName should not appear in context.""" + comp = ChemicalParameter(variable="foo", chem=None) + ctx = _collect(_make_handler(comp)) + assert "chemShortName" not in ctx + + def test_chem_empty_string_not_set(self): + """When chem is empty string (falsy), chemShortName should not appear.""" + comp = ChemicalParameter(variable="foo", chem="") + ctx = _collect(_make_handler(comp)) + assert "chemShortName" not in ctx + + +class TestCollectKeysOpticalParameter: + """Test collect_keys with OpticalParameter.""" + + def test_wavelength_no_bounds(self): + """Single wavelength (no bounds) → only firstWavelength set, converted to metres.""" + # wavelength=550 nm → 550e-9 m + comp = OpticalParameter(variable="aod", wavelength=550, wavelength_units="nm") + ctx = _collect(_make_handler(comp)) + assert "firstWavelength" in ctx + assert "secondWavelength" not in ctx + # 550 nm = 5.5e-7 m + assert abs(ctx["firstWavelength"] - 5.5e-7) < 1e-12 + + def test_wavelength_with_bounds(self): + """When wavelength_bounds are present, firstWavelength and secondWavelength set.""" + comp = OpticalParameter( + variable="aod", + wavelength=625, + wavelength_bounds=(500, 750), + wavelength_units="nm", + ) + ctx = _collect(_make_handler(comp)) + assert "firstWavelength" in ctx + assert "secondWavelength" in ctx + # 500 nm = 5e-7 m + assert abs(ctx["firstWavelength"] - 5e-7) < 1e-12 + # 750 nm = 7.5e-7 m + assert abs(ctx["secondWavelength"] - 7.5e-7) < 1e-12 + + def test_no_direction_frequency_for_optical(self): + """Optical parameters should not set direction/frequency keys.""" + comp = OpticalParameter(variable="aod", wavelength=550, wavelength_units="nm") + ctx = _collect(_make_handler(comp)) + assert "directionNumber" not in ctx + assert "frequencyNumber" not in ctx + + +class TestCollectKeysChemOpticalParameter: + """Test collect_keys with ChemicalOpticalParameter.""" + + def test_both_chem_and_wavelength(self): + comp = ChemicalOpticalParameter( + variable="aod", + chem="SO4", + wavelength=550, + wavelength_bounds=(400, 700), + wavelength_units="nm", + ) + ctx = _collect(_make_handler(comp)) + assert ctx["shortName"] == "aod" + assert ctx["chemShortName"] == "SO4" + assert "firstWavelength" in ctx + assert "secondWavelength" in ctx + assert abs(ctx["firstWavelength"] - 4e-7) < 1e-12 + assert abs(ctx["secondWavelength"] - 7e-7) < 1e-12 + + def test_chem_optical_no_bounds(self): + comp = ChemicalOpticalParameter( + variable="aod", + chem="dust", + wavelength=800, + wavelength_units="nm", + ) + ctx = _collect(_make_handler(comp)) + assert ctx["chemShortName"] == "dust" + assert "firstWavelength" in ctx + assert "secondWavelength" not in ctx + assert abs(ctx["firstWavelength"] - 8e-7) < 1e-12 + + +class TestCollectKeysWaveSpectraParameter: + """Test collect_keys with WaveSpectraParameter.""" + + def test_direction_index_converted_to_1_based(self): + """wave_direction_index is 0-based, directionNumber should be 1-based.""" + comp = WaveSpectraParameter( + variable="2dfd", + wave_direction=55.0, + wave_direction_index=0, + wave_direction_units="degree", + wave_frequency=0.035, + wave_frequency_index=0, + wave_frequency_units="s ** -1", + ) + ctx = _collect(_make_handler(comp)) + assert ctx["directionNumber"] == 1 + + def test_frequency_index_converted_to_1_based(self): + """wave_frequency_index is 0-based, frequencyNumber should be 1-based.""" + comp = WaveSpectraParameter( + variable="2dfd", + wave_direction=115.0, + wave_direction_index=2, + wave_direction_units="degree", + wave_frequency=0.131, + wave_frequency_index=5, + wave_frequency_units="s ** -1", + ) + ctx = _collect(_make_handler(comp)) + assert ctx["directionNumber"] == 3 + assert ctx["frequencyNumber"] == 6 + + def test_direction_only(self): + """If only direction index is set, only directionNumber appears.""" + comp = WaveSpectraParameter( + variable="2dfd", + wave_direction=175.0, + wave_direction_index=4, + wave_direction_units="degree", + ) + ctx = _collect(_make_handler(comp)) + assert ctx["directionNumber"] == 5 + assert "frequencyNumber" not in ctx + + def test_frequency_only(self): + """If only frequency index is set, only frequencyNumber appears.""" + comp = WaveSpectraParameter( + variable="2dfd", + wave_frequency=0.5, + wave_frequency_index=9, + wave_frequency_units="s ** -1", + ) + ctx = _collect(_make_handler(comp)) + assert "directionNumber" not in ctx + assert ctx["frequencyNumber"] == 10 + + def test_no_chem_or_wavelength_for_wave(self): + """Wave spectra params should not set chem/wavelength keys.""" + comp = WaveSpectraParameter( + variable="2dfd", + wave_direction=55.0, + wave_direction_index=0, + wave_direction_units="degree", + wave_frequency=0.035, + wave_frequency_index=0, + wave_frequency_units="s ** -1", + ) + ctx = _collect(_make_handler(comp)) + assert "chemShortName" not in ctx + assert "firstWavelength" not in ctx + assert "secondWavelength" not in ctx + + +# -------------------------------------------------------------------------- +# Integration tests using real GRIB data files +# -------------------------------------------------------------------------- + + +@pytest.mark.parametrize("fl_type", FL_FILE) +class TestCollectKeysFromChemGrib: + """Integration: context collection from chem GRIB files.""" + + def test_chem_context(self, fl_type): + ds, _ = load_grib_data("chem-cams.grib", fl_type, folder="data") + expected_chems = ["CO", "HCHO", "O3"] + for i, chem in enumerate(expected_chems): + f = ds[i] + handler = f._components["parameter"] + ctx = _collect(handler) + assert ctx["shortName"] == f.parameter.variable() + assert ctx["chemShortName"] == chem + assert "firstWavelength" not in ctx + assert "secondWavelength" not in ctx + assert "directionNumber" not in ctx + assert "frequencyNumber" not in ctx + + +@pytest.mark.parametrize("fl_type", FL_FILE) +class TestCollectKeysFromOpticalGrib: + """Integration: context collection from optical GRIB files.""" + + def test_optical_context_wavelength(self, fl_type): + ds, _ = load_grib_data("optical-cams.grib", fl_type, folder="data") + for f in ds: + handler = f._components["parameter"] + ctx = _collect(handler) + assert "firstWavelength" in ctx + # wavelength in metres + wl_m = f.parameter.wavelength(units="m") + assert abs(ctx["firstWavelength"] - wl_m) < 1e-15 + assert "directionNumber" not in ctx + assert "frequencyNumber" not in ctx + + +@pytest.mark.parametrize("fl_type", FL_FILE) +class TestCollectKeysFromWaveSpectraGrib: + """Integration: context collection from wave spectra GRIB files.""" + + def test_wave_direction_number(self, fl_type): + ds, _ = load_grib_data("wave_spectra.grib", fl_type, folder="data") + for f in ds: + handler = f._components["parameter"] + ctx = _collect(handler) + # directionNumber should be 1-based + expected = f.parameter.wave_direction_index() + 1 + assert ctx["directionNumber"] == expected + + def test_wave_frequency_number(self, fl_type): + ds, _ = load_grib_data("wave_spectra.grib", fl_type, folder="data") + for f in ds: + handler = f._components["parameter"] + ctx = _collect(handler) + # frequencyNumber should be 1-based + expected = f.parameter.wave_frequency_index() + 1 + assert ctx["frequencyNumber"] == expected + + def test_wave_no_chem_or_wavelength(self, fl_type): + ds, _ = load_grib_data("wave_spectra.grib", fl_type, folder="data") + f = ds[0] + handler = f._components["parameter"] + ctx = _collect(handler) + assert "chemShortName" not in ctx + assert "secondWavelength" not in ctx diff --git a/tests/grib/test_grib_set_parameter.py b/tests/grib/test_grib_set_parameter.py index 3ef96daad..1d7f562b7 100644 --- a/tests/grib/test_grib_set_parameter.py +++ b/tests/grib/test_grib_set_parameter.py @@ -83,3 +83,156 @@ def test_grib_set_parameter_2( assert f.get("metadata.shortName") is None assert f.get("parameter.units") == "kg/kg" assert f.get("metadata.units") is None + + +# -------------------------------------------------------------------------- +# Round-trip tests for GribParameterContextCollector keys +# -------------------------------------------------------------------------- + + +@pytest.mark.parametrize("fl_type", ["file"]) +def test_grib_set_parameter_chem_roundtrip(fl_type): + """Set parameter.chem, write GRIB, read back and verify chemShortName.""" + ds_ori, _ = load_grib_data("chem-cams.grib", fl_type, folder="data") + f = ds_ori[0] + + # Change chem from CO to SO2 + f2 = f.set({"parameter.chem": "SO2"}) + assert f2.get("parameter.chem") == "SO2" + assert f2.get("parameter.variable") == "mass_mixrat" + + with temp_file() as tmp: + f2.to_target("file", tmp) + f_saved = from_source("file", tmp).to_fieldlist() + assert len(f_saved) == 1 + assert f_saved[0].get("parameter.variable") == "mass_mixrat" + assert f_saved[0].get("parameter.chem") == "SO2" + assert f_saved[0].get("metadata.shortName") == "mass_mixrat" + assert f_saved[0].get("metadata.chemShortName") == "SO2" + + +@pytest.mark.parametrize("fl_type", ["file"]) +def test_grib_set_parameter_chem_variable_roundtrip(fl_type): + """Set both parameter.variable and parameter.chem, write GRIB, read back.""" + ds_ori, _ = load_grib_data("chem-cams.grib", fl_type, folder="data") + f = ds_ori[0] + + f2 = f.set({"parameter.variable": "mass_mixrat", "parameter.chem": "HCHO"}) + assert f2.get("parameter.chem") == "HCHO" + + with temp_file() as tmp: + f2.to_target("file", tmp) + f_saved = from_source("file", tmp).to_fieldlist() + assert len(f_saved) == 1 + assert f_saved[0].get("parameter.variable") == "mass_mixrat" + assert f_saved[0].get("parameter.chem") == "HCHO" + assert f_saved[0].get("metadata.chemShortName") == "HCHO" + + +@pytest.mark.parametrize("fl_type", ["file"]) +def test_grib_set_parameter_wavelength_roundtrip(fl_type): + """Set parameter.wavelength (single value), write GRIB, read back.""" + ds_ori, _ = load_grib_data("optical-cams.grib", fl_type, folder="data") + f = ds_ori[0] + + # Change wavelength from 550 to 800 nm + f2 = f.set({"parameter.wavelength": 800, "parameter.wavelength_units": "nm"}) + assert f2.get("parameter.wavelength") == 800 + + with temp_file() as tmp: + f2.to_target("file", tmp) + f_saved = from_source("file", tmp).to_fieldlist() + assert len(f_saved) == 1 + assert f_saved[0].get("parameter.wavelength") == 800 + assert f_saved[0].get("parameter.variable") == "aod" + assert f_saved[0].get("metadata.shortName") == "aod" + + +@pytest.mark.parametrize("fl_type", ["file"]) +def test_grib_set_parameter_wavelength_bounds_roundtrip(fl_type): + """Set parameter.wavelength_bounds, write GRIB, verify raw GRIB keys.""" + ds_ori, _ = load_grib_data("optical-cams.grib", fl_type, folder="data") + f = ds_ori[0] + + # Set wavelength with bounds + f2 = f.set({ + "parameter.wavelength": 625, + "parameter.wavelength_bounds": (500, 750), + "parameter.wavelength_units": "nm", + }) + assert f2.get("parameter.wavelength") == 625 + assert f2.get("parameter.wavelength_bounds") == (500, 750) + + with temp_file() as tmp: + f2.to_target("file", tmp) + f_saved = from_source("file", tmp).to_fieldlist() + assert len(f_saved) == 1 + assert f_saved[0].get("parameter.wavelength") == 625 + assert f_saved[0].get("parameter.wavelength_bounds") == (500, 750) + + +@pytest.mark.parametrize("fl_type", ["file"]) +def test_grib_set_parameter_wave_direction_roundtrip(fl_type): + """Set parameter.wave_direction_index, write GRIB, read back as 1-based directionNumber.""" + ds_ori, _ = load_grib_data("wave_spectra.grib", fl_type, folder="data") + f = ds_ori[0] + + # Set a new direction index (0-based) + f2 = f.set({ + "parameter.wave_direction_index": 3, + }) + assert f2.get("parameter.wave_direction_index") == 3 + + with temp_file() as tmp: + f2.to_target("file", tmp) + f_saved = from_source("file", tmp).to_fieldlist() + assert len(f_saved) == 1 + # 0-based index 3 should be stored as 1-based directionNumber=4 + assert f_saved[0].get("parameter.wave_direction_index") == 3 + assert f_saved[0].get("metadata.directionNumber") == 4 + + +@pytest.mark.parametrize("fl_type", ["file"]) +def test_grib_set_parameter_wave_frequency_roundtrip(fl_type): + """Set parameter.wave_frequency_index, write GRIB, read back as 1-based frequencyNumber.""" + ds_ori, _ = load_grib_data("wave_spectra.grib", fl_type, folder="data") + f = ds_ori[0] + + # Set a new frequency index (0-based) + f2 = f.set({ + "parameter.wave_frequency_index": 2, + }) + assert f2.get("parameter.wave_frequency_index") == 2 + + with temp_file() as tmp: + f2.to_target("file", tmp) + f_saved = from_source("file", tmp).to_fieldlist() + assert len(f_saved) == 1 + # 0-based index 2 should be stored as 1-based frequencyNumber=3 + assert f_saved[0].get("parameter.wave_frequency_index") == 2 + assert f_saved[0].get("metadata.frequencyNumber") == 3 + + +@pytest.mark.parametrize("fl_type", ["file"]) +def test_grib_set_parameter_wave_both_indices_roundtrip(fl_type): + """Set both wave_direction_index and wave_frequency_index, write, read back.""" + ds_ori, _ = load_grib_data("wave_spectra.grib", fl_type, folder="data") + f = ds_ori[0] + + f2 = f.set({ + "parameter.wave_direction_index": 4, + "parameter.wave_frequency_index": 7, + }) + assert f2.get("parameter.wave_direction_index") == 4 + assert f2.get("parameter.wave_frequency_index") == 7 + + with temp_file() as tmp: + f2.to_target("file", tmp) + f_saved = from_source("file", tmp).to_fieldlist() + assert len(f_saved) == 1 + # direction: 0-based 4 → 1-based 5 + assert f_saved[0].get("parameter.wave_direction_index") == 4 + assert f_saved[0].get("metadata.directionNumber") == 5 + # frequency: 0-based 7 → 1-based 8 + assert f_saved[0].get("parameter.wave_frequency_index") == 7 + assert f_saved[0].get("metadata.frequencyNumber") == 8 From 005b045eede9c428ca63ea2495f81372ef82cecf Mon Sep 17 00:00:00 2001 From: Pawel Wolff Date: Wed, 10 Jun 2026 23:23:50 +0200 Subject: [PATCH 20/21] Notebooks and docs updated --- docs/source/concepts/xarray/dim.rst | 12 +- .../xr_engine/xarray_engine_chem.ipynb | 164 +++++++++--------- .../xarray_engine_wave_spectra.ipynb | 60 +++---- 3 files changed, 113 insertions(+), 123 deletions(-) diff --git a/docs/source/concepts/xarray/dim.rst b/docs/source/concepts/xarray/dim.rst index 772269fb0..ded28cb33 100644 --- a/docs/source/concepts/xarray/dim.rst +++ b/docs/source/concepts/xarray/dim.rst @@ -39,13 +39,13 @@ The predefined dimensions are based on the ``dim_roles``, which is a mapping bet - Aerosol type, or chemical or physical constituent type - "parameter.chem" * - "wavelength" - - Optical wavelength in nanometers (e.g. for aerosol optical depth) + - Optical wavelength (e.g. for aerosol optical depth) - "parameter.wavelength" * - "wave_direction" - - Wave direction in degrees (for 2D wave spectra) + - Wave direction (for 2D wave spectra) - "parameter.wave_direction" * - "wave_frequency" - - Wave frequency in Hz (for 2D wave spectra) + - Wave frequency (for 2D wave spectra) - "parameter.wave_frequency" * - "forecast_reference_time" - Forecast reference time (base datetime). Can be a single metadata key, or a list/tuple of two metadata keys representing the "date" and "time" parts of the forecast reference time. Alternatively, it can be a dict with "date" and "time" keys specifying the corresponding metadata keys. Used when ``"forecast_reference_time"`` is in ``time_dims``. @@ -161,7 +161,7 @@ The following dimensions are applicable for chemical and optical parameters (see - ``"chem"``: Indicates an aerosol type, chemical specie, etc. (for example, for the parameter representing *mass mixing ratio*, the coordinates can be ``"CO"``, ``"O3"``, etc.). -- ``"wavelength"`` (*nm*): Wavelength at which the optical parameter is measured, modelled or reported. +- ``"wavelength"``: Wavelength at which the optical parameter is measured, modelled or reported. The following notebook illustrates the use of the above dimensions in a CAMS dataset containing chemical and optical parameters: @@ -175,9 +175,9 @@ The following notebook illustrates the use of the above dimensions in a CAMS dat The following dimensions are applicable for 2D wave spectra parameters: -- ``"wave_direction"`` (*degree*): Direction from which the waves propagate, expressed in degrees clockwise from true north. +- ``"wave_direction"``: Direction from which the waves propagate, expressed in degrees clockwise from true north. -- ``"wave_frequency"`` (*s-1*): Wave frequency corresponding to the spectral component. +- ``"wave_frequency"``: Wave frequency corresponding to the spectral component. The following notebook presents an example 2D wave spectra dataset: diff --git a/docs/source/how-tos/xr_engine/xarray_engine_chem.ipynb b/docs/source/how-tos/xr_engine/xarray_engine_chem.ipynb index 81c8dc7d3..3df5927e8 100644 --- a/docs/source/how-tos/xr_engine/xarray_engine_chem.ipynb +++ b/docs/source/how-tos/xr_engine/xarray_engine_chem.ipynb @@ -27,7 +27,7 @@ "id": "c1d2e3f4", "metadata": {}, "source": [ - "### Chemical dimension\n", + "### Chemical constituent dimension\n", "\n", "Parameters which involve aerosol type or chemical constituent type should have a relevant metadata exposed via\n", "earthkit's `parameter.chem` and `parameter.chem_long_name` metadata keys. To illustrate this, consider\n", @@ -44,7 +44,7 @@ "name": "stderr", "output_type": "stream", "text": [ - " " + " " ] } ], @@ -80,7 +80,7 @@ " \n", " \n", " parameter.variable\n", - " parameter.chem_variable\n", + " parameter.chem\n", " parameter.chem_long_name\n", " \n", " \n", @@ -108,10 +108,10 @@ "" ], "text/plain": [ - " parameter.variable parameter.chem_variable parameter.chem_long_name\n", - "0 mass_mixrat CO Carbon monoxide\n", - "1 mass_mixrat HCHO Formaldehyde\n", - "2 mass_mixrat O3 Ozone" + " parameter.variable parameter.chem parameter.chem_long_name\n", + "0 mass_mixrat CO Carbon monoxide\n", + "1 mass_mixrat HCHO Formaldehyde\n", + "2 mass_mixrat O3 Ozone" ] }, "execution_count": 3, @@ -267,8 +267,8 @@ "
    \n", "
    \n", "\n", - "\n", - "\n", + "\n", + "\n", "
    \n", "\n", "\n", - "\n", + "\n", "
    variablemass_mixrat
    standard_namemass_fraction_of_carbon_monoxide_in_air
    long_nameMass mixing ratio
    unitsdimensionless
    chem_variableCO
    chem_long_nameCarbon monoxide
    wavelengthNone
    wave_directionNone
    wave_frequencyNone
    variablemass_mixrat
    standard_namemass_fraction_of_carbon_monoxide_in_air
    long_nameMass mixing ratio
    unitsdimensionless
    chemCO
    chem_long_nameCarbon monoxide
    \n", "
    \n", " \n", - "\n", - "\n", + "\n", + "\n", "
    \n", "
    <xarray.Dataset> Size: 2kB\n",
    -       "Dimensions:        (chem_variable: 3, latitude: 7, longitude: 12)\n",
    +       "Dimensions:      (chem: 3, latitude: 7, longitude: 12)\n",
            "Coordinates:\n",
    -       "  * chem_variable  (chem_variable) <U4 48B 'CO' 'HCHO' 'O3'\n",
    -       "  * latitude       (latitude) float64 56B 90.0 60.0 30.0 0.0 -30.0 -60.0 -90.0\n",
    -       "  * longitude      (longitude) float64 96B 0.0 30.0 60.0 ... 270.0 300.0 330.0\n",
    +       "  * chem         (chem) <U4 48B 'CO' 'HCHO' 'O3'\n",
    +       "  * latitude     (latitude) float64 56B 90.0 60.0 30.0 0.0 -30.0 -60.0 -90.0\n",
    +       "  * longitude    (longitude) float64 96B 0.0 30.0 60.0 ... 270.0 300.0 330.0\n",
            "Data variables:\n",
    -       "    mass_mixrat    (chem_variable, latitude, longitude) float64 2kB ...\n",
    +       "    mass_mixrat  (chem, latitude, longitude) float64 2kB ...\n",
            "Attributes:\n",
            "    Conventions:  CF-1.8\n",
    -       "    institution:  ECMWF
    " + " institution: ECMWF
    " ], "text/plain": [ " Size: 2kB\n", - "Dimensions: (chem_variable: 3, latitude: 7, longitude: 12)\n", + "Dimensions: (chem: 3, latitude: 7, longitude: 12)\n", "Coordinates:\n", - " * chem_variable (chem_variable)
    <xarray.Dataset> Size: 2kB\n",
    -       "Dimensions:         (chem_variable: 3, latitude: 7, longitude: 12)\n",
    +       "Dimensions:         (chem: 3, latitude: 7, longitude: 12)\n",
            "Coordinates:\n",
    -       "  * chem_variable   (chem_variable) <U4 48B 'CO' 'HCHO' 'O3'\n",
    -       "    chem_long_name  (chem_variable) <U15 180B ...\n",
    +       "  * chem            (chem) <U4 48B 'CO' 'HCHO' 'O3'\n",
    +       "    chem_long_name  (chem) <U15 180B ...\n",
            "  * latitude        (latitude) float64 56B 90.0 60.0 30.0 0.0 -30.0 -60.0 -90.0\n",
            "  * longitude       (longitude) float64 96B 0.0 30.0 60.0 ... 270.0 300.0 330.0\n",
            "Data variables:\n",
    -       "    mass_mixrat     (chem_variable, latitude, longitude) float64 2kB ...\n",
    +       "    mass_mixrat     (chem, latitude, longitude) float64 2kB ...\n",
            "Attributes:\n",
            "    Conventions:  CF-1.8\n",
    -       "    institution:  ECMWF
    " + " institution: ECMWF" ], "text/plain": [ " Size: 2kB\n", - "Dimensions: (chem_variable: 3, latitude: 7, longitude: 12)\n", + "Dimensions: (chem: 3, latitude: 7, longitude: 12)\n", "Coordinates:\n", - " * chem_variable (chem_variable)
    <xarray.DataArray 'chem_long_name' (chem_variable: 3)> Size: 180B\n",
    +       "
    <xarray.DataArray 'chem_long_name' (chem: 3)> Size: 180B\n",
            "array(['Carbon monoxide', 'Formaldehyde', 'Ozone'], dtype='<U15')\n",
            "Coordinates:\n",
    -       "  * chem_variable   (chem_variable) <U4 48B 'CO' 'HCHO' 'O3'\n",
    -       "    chem_long_name  (chem_variable) <U15 180B 'Carbon monoxide' ... 'Ozone'
    " + " * chem (chem) <U4 48B 'CO' 'HCHO' 'O3'\n", + " chem_long_name (chem) <U15 180B 'Carbon monoxide' 'Formaldehyde' 'Ozone'
    " ], "text/plain": [ - " Size: 180B\n", + " Size: 180B\n", "array(['Carbon monoxide', 'Formaldehyde', 'Ozone'], dtype='\n", " parameter.variable\n", " parameter.long_name\n", - " parameter.chem_variable\n", + " parameter.chem\n", " parameter.chem_long_name\n", " parameter.wavelength\n", " \n", @@ -2253,11 +2253,11 @@ "" ], "text/plain": [ - " parameter.variable parameter.long_name parameter.chem_variable \\\n", - "0 aod Aerosol optical depth aer_total \n", - "1 aod Aerosol optical depth aer_total \n", - "2 aod Aerosol optical depth aer_sm \n", - "3 aod Aerosol optical depth aer_sm \n", + " parameter.variable parameter.long_name parameter.chem \\\n", + "0 aod Aerosol optical depth aer_total \n", + "1 aod Aerosol optical depth aer_total \n", + "2 aod Aerosol optical depth aer_sm \n", + "3 aod Aerosol optical depth aer_sm \n", "\n", " parameter.chem_long_name parameter.wavelength \n", "0 Total aerosol 550 \n", @@ -2277,7 +2277,7 @@ " keys=[\n", " \"parameter.variable\",\n", " \"parameter.long_name\",\n", - " \"parameter.chem_variable\",\n", + " \"parameter.chem\",\n", " \"parameter.chem_long_name\",\n", " \"parameter.wavelength\",\n", " ]\n", @@ -2428,8 +2428,8 @@ "
    \n", "
    \n", "\n", - "\n", - "\n", + "\n", + "\n", "
    \n", "\n", "\n", - "\n", + "\n", "
    variableaod
    standard_nameunknown
    long_nameAerosol optical depth
    unitsNumeric
    chem_variableaer_total
    chem_long_nameTotal aerosol
    wavelength550
    wave_directionNone
    wave_frequencyNone
    variableaod
    standard_nameunknown
    long_nameAerosol optical depth
    unitsNumeric
    chemaer_total
    chem_long_nameTotal aerosol
    wavelength550
    wavelength_boundsNone
    wavelength_unitsnanometer
    \n", "
    \n", " \n", - "\n", - "\n", + "\n", + "\n", "
    \n", "
    <xarray.Dataset> Size: 3kB\n",
    -       "Dimensions:        (chem_variable: 2, wavelength: 2, latitude: 7, longitude: 12)\n",
    +       "Dimensions:     (chem: 2, wavelength: 2, latitude: 7, longitude: 12)\n",
            "Coordinates:\n",
    -       "  * chem_variable  (chem_variable) <U9 72B 'aer_sm' 'aer_total'\n",
    -       "  * wavelength     (wavelength) int64 16B 550 800\n",
    -       "  * latitude       (latitude) float64 56B 90.0 60.0 30.0 0.0 -30.0 -60.0 -90.0\n",
    -       "  * longitude      (longitude) float64 96B 0.0 30.0 60.0 ... 270.0 300.0 330.0\n",
    +       "  * chem        (chem) <U9 72B 'aer_sm' 'aer_total'\n",
    +       "  * wavelength  (wavelength) int64 16B 550 800\n",
    +       "  * latitude    (latitude) float64 56B 90.0 60.0 30.0 0.0 -30.0 -60.0 -90.0\n",
    +       "  * longitude   (longitude) float64 96B 0.0 30.0 60.0 90.0 ... 270.0 300.0 330.0\n",
            "Data variables:\n",
    -       "    aod            (chem_variable, wavelength, latitude, longitude) float64 3kB ...\n",
    +       "    aod         (chem, wavelength, latitude, longitude) float64 3kB ...\n",
            "Attributes:\n",
            "    Conventions:  CF-1.8\n",
    -       "    institution:  ECMWF
    " + " institution: ECMWF
    " ], "text/plain": [ " Size: 3kB\n", - "Dimensions: (chem_variable: 2, wavelength: 2, latitude: 7, longitude: 12)\n", + "Dimensions: (chem: 2, wavelength: 2, latitude: 7, longitude: 12)\n", "Coordinates:\n", - " * chem_variable (chem_variable) \n", "
    \n", "\n", - "\n", - "\n", + "\n", + "\n", "
    \n", "\n", "\n", - "\n", + "\n", "
    variable2dfd
    standard_nameunknown
    long_name2D wave spectra (single)
    unitsmeter ** 2 * second / radian
    chem_variableNone
    chem_long_nameNone
    wavelengthNone
    wave_direction5.0
    wave_frequency0.034523
    variable2dfd
    standard_nameunknown
    long_name2D wave spectra (single)
    unitsmeter ** 2 * second / radian
    wave_direction5.0
    wave_direction_index0
    wave_direction_bounds(0.0, 10.0)
    wave_direction_unitsdegree
    wave_frequency0.034523
    wave_frequency_index0
    wave_frequency_bounds(0.032917, 0.036208)
    wave_frequency_units1 / second
    \n", "
    \n", " \n", - "\n", - "\n", + "\n", + "\n", "
    \n", "
    <xarray.Dataset> Size: 1MB\n",
    -       "Dimensions:                  (member: 1, wave_direction: 36,\n",
    -       "                              wave_frequency: 29, forecast_reference_time: 2,\n",
    -       "                              step: 1, level: 1, level_type: 1, latitude: 7,\n",
    +       "Dimensions:                  (wave_direction: 36, wave_frequency: 29,\n",
    +       "                              forecast_reference_time: 2, latitude: 7,\n",
            "                              longitude: 12)\n",
            "Coordinates:\n",
    -       "  * member                   (member) <U1 4B '0'\n",
            "  * wave_direction           (wave_direction) float64 288B 5.0 15.0 ... 355.0\n",
            "  * wave_frequency           (wave_frequency) float64 232B 0.03452 ... 0.4979\n",
            "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 16B 202...\n",
    -       "  * step                     (step) timedelta64[ns] 8B 00:00:00\n",
    -       "  * level                    (level) int64 8B 0\n",
    -       "  * level_type               (level_type) <U8 32B 'mean_sea'\n",
            "  * latitude                 (latitude) float64 56B 90.0 60.0 ... -60.0 -90.0\n",
            "  * longitude                (longitude) float64 96B 0.0 30.0 ... 300.0 330.0\n",
            "Data variables:\n",
    -       "    2dfd                     (member, wave_direction, wave_frequency, forecast_reference_time, step, level, level_type, latitude, longitude) float64 1MB ...\n",
    +       "    2dfd                     (wave_direction, wave_frequency, forecast_reference_time, latitude, longitude) float64 1MB ...\n",
            "Attributes:\n",
            "    Conventions:  CF-1.8\n",
    -       "    institution:  ECMWF
    • 2dfd
      (wave_direction, wave_frequency, forecast_reference_time, latitude, longitude)
      float64
      ...
      standard_name :
      unknown
      long_name :
      2D wave spectra (single)
      units :
      meter ** 2 * second / radian
      level_type :
      mean_sea
      _earthkit :
      {'message': b'GRIB\\x00\\x01\\xb6\\x01\\x00\\x01h\\x8cbj\\xff\\x80\\xfbf\\x00\\x00\\x19\\x0c\\x0f\\x00\\x00\\x01\\x00\\x00\\n\\x00\\x00\\x00\\x15\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\r\\x01\\x02\\x04\\x150001\\x00\\x00\\x01\\x01$\\x1d\\x00\\x00\\x03\\xe8\\x00\\x0fB@\\x03\\xff\\xff\\xff\\xff\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x13\\x88\\x00\\x00:\\x98\\x00\\x00a\\xa8\\x00\\x00\\x88\\xb8\\x00\\x00\\xaf\\xc8\\x00\\x00\\xd6\\xd8\\x00\\x00\\xfd\\xe8\\x00\\x01$\\xf8\\x00\\x01L\\x08\\x00\\x01s\\x18\\x00\\x01\\x9a(\\x00\\x01\\xc18\\x00\\x01\\xe8H\\x00\\x02\\x0fX\\x00\\x026h\\x00\\x02]x\\x00\\x02\\x84\\x88\\x00\\x02\\xab\\x98\\x00\\x02\\xd2\\xa8\\x00\\x02\\xf9\\xb8\\x00\\x03 \\xc8\\x00\\x03G\\xd8\\x00\\x03n\\xe8\\x00\\x03\\x95\\xf8\\x00\\x03\\xbd\\x08\\x00\\x03\\xe4\\x18\\x00\\x04\\x0b(\\x00\\x0428\\x00\\x04YH\\x00\\x04\\x80X\\x00\\x04\\xa7h\\x00\\x04\\xcex\\x00\\x04\\xf5\\x88\\x00\\x05\\x1c\\x98\\x00\\x05C\\xa8\\x00\\x05j\\xb8\\x00\\x00\\x86\\xdb\\x00\\x00\\x94W\\x00\\x00\\xa3,\\x00\\x00\\xb3~\\x00\\x00\\xc5q\\x00\\x00\\xd9/\\x00\\x00\\xee\\xe7\\x00\\x01\\x06\\xcb\\x00\\x01!\\x12\\x00\\x01=\\xfb\\x00\\x01]\\xc7\\x00\\x01\\x80\\xc1\\x00\\x01\\xa7;\\x00\\x01\\xd1\\x8e\\x00\\x02\\x00\\x1c\\x00\\x023R\\x00\\x02k\\xa7\\x00\\x02\\xa9\\x9e\\x00\\x02\\xed\\xc7\\x00\\x038\\xc2\\x00\\x03\\x8b<\\x00\\x03\\xe5\\xf5\\x00\\x04I\\xc0\\x00\\x04\\xb7\\x87\\x00\\x050G\\x00\\x05\\xb5\\x1b\\x00\\x06G8\\x00\\x06\\xe7\\xf1\\x00\\x07\\x98\\xbc\\x00\\x00 \\x00\\xff\\x00\\x00\\x0c\\x00\\x07\\x01_\\x90\\x00\\x00\\x00\\x80\\x81_\\x90\\x05\\t\\x10u0u0\\x00\\x00\\x00\\x00\\x00\\x00\\x00"\\x04\\x80\\x0b\\xc1SY\\x1a\\t\\x9cn5\\x1a\\x0b\\xc6\\xc3AX\\xfcL5\\x1b\\x0c\\xc6\\xc3A\\xa0\\x00h$\\x19\\x807777', 'bitsPerValue': 9}
      [175392 values with dtype=float64]
  • Conventions :
    CF-1.8
    institution :
    ECMWF
  • " ], "text/plain": [ " Size: 1MB\n", - "Dimensions: (member: 1, wave_direction: 36,\n", - " wave_frequency: 29, forecast_reference_time: 2,\n", - " step: 1, level: 1, level_type: 1, latitude: 7,\n", + "Dimensions: (wave_direction: 36, wave_frequency: 29,\n", + " forecast_reference_time: 2, latitude: 7,\n", " longitude: 12)\n", "Coordinates:\n", - " * member (member) Date: Thu, 11 Jun 2026 19:10:03 +0200 Subject: [PATCH 21/21] Final fixes --- .../data/field/component/component.py | 26 ++------------- .../data/field/component/parameter.py | 33 +++++++++++-------- tests/field/test_parameter_component.py | 18 +++------- 3 files changed, 26 insertions(+), 51 deletions(-) diff --git a/src/earthkit/data/field/component/component.py b/src/earthkit/data/field/component/component.py index a76da5593..9e6bfda87 100644 --- a/src/earthkit/data/field/component/component.py +++ b/src/earthkit/data/field/component/component.py @@ -6,7 +6,8 @@ # granted to it by virtue of its status as an intergovernmental organisation # nor does it submit to any jurisdiction. # -import inspect + + from abc import ABCMeta, abstractmethod from functools import wraps @@ -222,13 +223,6 @@ def __setstate__(self, state): class SimpleFieldComponent(FieldComponent): _KEYS = tuple() _ALIASES = dict() - # keys accepted by a class __init__(); set by __init_subclass__() for every subclass - _ALLOWED_CREATE_KEYS = None - - def __init_subclass__(cls): - super().__init_subclass__() - sig = inspect.signature(cls.__init__) - cls._ALLOWED_CREATE_KEYS = tuple(key for key in sig.parameters.keys() if key not in {"self", "args", "kwargs"}) def __contains__(self, name): """Check if the key is in the component.""" @@ -321,19 +315,3 @@ def _normalise_set_kwargs(cls, *args, allowed_keys=None, **kwargs): _kwargs[k] = v return _kwargs - - @classmethod - def _create_component(cls, d: dict): - filtered_keys = {} - not_allowed_keys = {} - allowed_keys = cls._ALLOWED_CREATE_KEYS - for k, v in d.items(): - if k in allowed_keys: - filtered_keys[k] = v - elif v is not None: - not_allowed_keys[k] = v - - if not_allowed_keys: - raise ValueError(f"Cannot create {cls.__name__} with {not_allowed_keys}. Allowed keys are: {allowed_keys}") - - return cls(**filtered_keys) diff --git a/src/earthkit/data/field/component/parameter.py b/src/earthkit/data/field/component/parameter.py index 7085629b9..681f5a56d 100644 --- a/src/earthkit/data/field/component/parameter.py +++ b/src/earthkit/data/field/component/parameter.py @@ -322,11 +322,11 @@ def create_parameter(d: dict) -> ParameterBase: d, allowed_keys=( "variable", + "standard_name", + "long_name", "units", "chem", "chem_long_name", - "standard_name", - "long_name", "wavelength", "wavelength_bounds", "wavelength_units", @@ -343,9 +343,14 @@ def create_parameter(d: dict) -> ParameterBase: if "variable" not in d1: raise ValueError("Cannot create Parameter without variable") - has_chem = d1.get("chem") is not None - has_wavelength = d1.get("wavelength") is not None - has_wave_spectra = d1.get("wave_direction") is not None or d1.get("wave_frequency") is not None + has_chem = d1.get("chem") is not None or d1.get("chem_long_name") is not None + has_wavelength = d1.get("wavelength") is not None or d1.get("wavelength_bounds") is not None + has_wave_spectra = ( + d1.get("wave_direction") is not None + or d1.get("wave_direction_index") is not None + or d1.get("wave_frequency") is not None + or d1.get("wave_frequency_index") is not None + ) if has_chem and has_wavelength: cls = ChemicalOpticalParameter @@ -358,7 +363,7 @@ def create_parameter(d: dict) -> ParameterBase: else: cls = Parameter - return cls._create_component(d1) + return cls(**d1) class EmptyParameter(ParameterBase): @@ -753,7 +758,7 @@ def __init__( chem: str = None, chem_long_name: str = None, ) -> None: - Parameter.__init__(self, variable=variable, standard_name=standard_name, long_name=long_name, units=units) + super().__init__(variable=variable, standard_name=standard_name, long_name=long_name, units=units) self._chem = chem self._chem_long_name = chem_long_name @@ -767,7 +772,7 @@ def chem_long_name(self) -> Optional[str]: def to_dict(self): """Return a dictionary representation of the chemical parameter.""" - d = Parameter.to_dict(self) + d = super().to_dict() d["chem"] = self._chem d["chem_long_name"] = self._chem_long_name return d @@ -808,7 +813,7 @@ def __init__( wavelength_bounds: Optional[tuple[int, int]] = None, wavelength_units: Union[str, "Units"] = None, ) -> None: - Parameter.__init__(self, variable=variable, standard_name=standard_name, long_name=long_name, units=units) + super().__init__(variable=variable, standard_name=standard_name, long_name=long_name, units=units) self._wavelength = wavelength self._wavelength_bounds = wavelength_bounds self._wavelength_units = Units.from_any(wavelength_units) @@ -850,7 +855,7 @@ def wavelength_units(self) -> Optional["Units"]: def to_dict(self): """Return a dictionary representation of the optical parameter.""" - d = Parameter.to_dict(self) + d = super().to_dict() d["wavelength"] = self._wavelength d["wavelength_bounds"] = self._wavelength_bounds d["wavelength_units"] = str(self._wavelength_units) @@ -898,7 +903,7 @@ def __init__( wavelength_bounds: Optional[tuple[int, int]] = None, wavelength_units: Union[str, "Units"] = None, ) -> None: - Parameter.__init__(self, variable=variable, standard_name=standard_name, long_name=long_name, units=units) + super().__init__(variable=variable, standard_name=standard_name, long_name=long_name, units=units) self._chem = chem self._chem_long_name = chem_long_name self._wavelength = wavelength @@ -907,7 +912,7 @@ def __init__( def to_dict(self): """Return a dictionary representation of the chemical-optical parameter.""" - d = Parameter.to_dict(self) + d = super().to_dict() d["chem"] = self._chem d["chem_long_name"] = self._chem_long_name d["wavelength"] = self._wavelength @@ -967,7 +972,7 @@ def __init__( wave_frequency_bounds: Optional[tuple[float, float]] = None, wave_frequency_units: Union[str, "Units"] = None, ) -> None: - Parameter.__init__(self, variable=variable, standard_name=standard_name, long_name=long_name, units=units) + super().__init__(variable=variable, standard_name=standard_name, long_name=long_name, units=units) self._wave_direction = wave_direction self._wave_direction_index = wave_direction_index self._wave_direction_bounds = wave_direction_bounds @@ -1057,7 +1062,7 @@ def wave_frequency_units(self) -> Optional["Units"]: def to_dict(self): """Return a dictionary representation of the wave spectra parameter.""" - d = Parameter.to_dict(self) + d = super().to_dict() d["wave_direction"] = self._wave_direction d["wave_direction_index"] = self._wave_direction_index d["wave_direction_bounds"] = self._wave_direction_bounds diff --git a/tests/field/test_parameter_component.py b/tests/field/test_parameter_component.py index cc8f153be..ca87221a2 100644 --- a/tests/field/test_parameter_component.py +++ b/tests/field/test_parameter_component.py @@ -71,14 +71,6 @@ def test_parameter_component_alias_1(): "wavelength": 550, "wavelength_bounds": None, "wavelength_units": "nm", - "wave_direction": None, - "wave_direction_index": None, - "wave_direction_bounds": None, - "wave_direction_units": None, - "wave_frequency": None, - "wave_frequency_index": None, - "wave_frequency_bounds": None, - "wave_frequency_units": None, }, { "variable": "aod", @@ -110,7 +102,7 @@ def test_parameter_component_from_dict_ok(input_d, ref): if isinstance(input_d, list): for d in input_d: - r = Parameter.from_dict(d) + r = create_parameter(d) assert r.variable() == ref["variable"] assert r.param() == ref["param"] @@ -273,10 +265,10 @@ def test_parameter_component_set(input_d, ref): assert r.units() == "Pa" -def test_parameter_component_wavelength_tuple(): - """Test wavelength as a tuple (wavelength range).""" - p = OpticalParameter(variable="aod", wavelength=(400, 700)) - assert p.wavelength() == (400, 700) +def test_parameter_component_wavelength(): + """Test wavelength bounds (wavelength range).""" + p = create_parameter(dict(variable="aod", wavelength_bounds=(400, 700))) + assert p.wavelength_bounds() == (400, 700) def test_parameter_component_create_parameter_regular():