diff --git a/docs/source/concepts/xarray/dim.rst b/docs/source/concepts/xarray/dim.rst index 808dfaa2..ded28cb3 100644 --- a/docs/source/concepts/xarray/dim.rst +++ b/docs/source/concepts/xarray/dim.rst @@ -15,6 +15,12 @@ Predefined dimensions and dimension roles By default, the following predefined dimensions are generated, in the following order: - ensemble forecast member dimension +- aerosol type, or atmospheric chemical or physical constituent type +(discrete dimension, applicable for chemical parameters; see details :ref:`here <_xr_chem_optical_dims>`) +- optical depth or wavelength range or colour channel (applicable for optical parameters, +simulated satellite images, etc.; see details :ref:`here <_xr_chem_optical_dims>`) +- direction and frequency of sea waves (applicable for 2D wave spectra parameters; +see details :ref:`here <_xr_wave_spectra_dims>`) - temporal dimensions, controlled by ``time_dims`` (see details :ref:`here `) - vertical dimensions, controlled by ``level_dim_mode`` (see details :ref:`here `) @@ -29,6 +35,18 @@ The predefined dimensions are based on the ``dim_roles``, which is a mapping bet * - "member" - Ensemble forecast member - "ensemble.member" + * - "chem" + - Aerosol type, or chemical or physical constituent type + - "parameter.chem" + * - "wavelength" + - Optical wavelength (e.g. for aerosol optical depth) + - "parameter.wavelength" + * - "wave_direction" + - Wave direction (for 2D wave spectra) + - "parameter.wave_direction" + * - "wave_frequency" + - Wave frequency (for 2D wave spectra) + - "parameter.wave_frequency" * - "forecast_reference_time" - Forecast reference time (base datetime). Can be a single metadata key, or a list/tuple of two metadata keys representing the "date" and "time" parts of the forecast reference time. Alternatively, it can be a dict with "date" and "time" keys specifying the corresponding metadata keys. Used when ``"forecast_reference_time"`` is in ``time_dims``. - "time.forecast_reference_time" @@ -134,6 +152,38 @@ The following example demonstrates the vertical dimensions modes: - :ref:`/how-tos/xr_engine/xarray_engine_level.ipynb` +.. _xr_chem_optical_dims: + +Chemical and optical dimensions +------------------------------------------------ + +The following dimensions are applicable for chemical and optical parameters (see for example CAMS datasets): + +- ``"chem"``: Indicates an aerosol type, chemical specie, etc. (for example, for the parameter representing *mass mixing ratio*, the coordinates can be ``"CO"``, ``"O3"``, etc.). + +- ``"wavelength"``: Wavelength at which the optical parameter is measured, modelled or reported. + +The following notebook illustrates the use of the above dimensions in a CAMS dataset containing chemical and optical parameters: + +- :ref:`/how-tos/xr_engine/xarray_engine_chem.ipynb` + + +.. _xr_wave_spectra_dims: + +2D wave spectra dimensions +------------------------------------------------ + +The following dimensions are applicable for 2D wave spectra parameters: + +- ``"wave_direction"``: Direction from which the waves propagate, expressed in degrees clockwise from true north. + +- ``"wave_frequency"``: Wave frequency corresponding to the spectral component. + +The following notebook presents an example 2D wave spectra dataset: + +- :ref:`/how-tos/xr_engine/xarray_engine_wave_spectra.ipynb` + + .. _xr_squeeze_and_ensure_dims: diff --git a/docs/source/how-tos/xr_engine/index.rst b/docs/source/how-tos/xr_engine/index.rst index 7ac54176..2b1ef518 100644 --- a/docs/source/how-tos/xr_engine/index.rst +++ b/docs/source/how-tos/xr_engine/index.rst @@ -13,6 +13,8 @@ Xarray engine xarray_engine_seasonal.ipynb xarray_engine_level.ipynb xarray_engine_ensemble.ipynb + xarray_engine_chem.ipynb + xarray_engine_wave_spectra.ipynb xarray_engine_variable_key.ipynb xarray_engine_mono_variable.ipynb xarray_engine_mono_variable_remapping.ipynb diff --git a/docs/source/how-tos/xr_engine/xarray_engine_chem.ipynb b/docs/source/how-tos/xr_engine/xarray_engine_chem.ipynb new file mode 100644 index 00000000..3df5927e --- /dev/null +++ b/docs/source/how-tos/xr_engine/xarray_engine_chem.ipynb @@ -0,0 +1,3170 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "a1b2c3d4", + "metadata": {}, + "source": [ + "# Xarray engine: chemical and optical dimensions\n", + "\n", + "This notebook demonstrates how earthkit-data deals with parameters which involve:\n", + "- a type of aerosol or chemical constituent (for example, CAMS atmospheric composition data),\n", + "- radiation wavelength (for example, CAMS aerosol optical depth data)." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "b1c2d3e4", + "metadata": {}, + "outputs": [], + "source": [ + "import earthkit.data as ekd" + ] + }, + { + "cell_type": "markdown", + "id": "c1d2e3f4", + "metadata": {}, + "source": [ + "### Chemical constituent dimension\n", + "\n", + "Parameters which involve aerosol type or chemical constituent type should have a relevant metadata exposed via\n", + "earthkit's `parameter.chem` and `parameter.chem_long_name` metadata keys. To illustrate this, consider\n", + "the following CAMS atmospheric composition dataset:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "d1e2f3a4", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " " + ] + } + ], + "source": [ + "fl_chem = ekd.from_source(\"sample\", \"chem-cams.grib\").to_fieldlist()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "2dfee2c8-08d4-4359-b77a-5d5ec7f32f68", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
parameter.variableparameter.chemparameter.chem_long_name
0mass_mixratCOCarbon monoxide
1mass_mixratHCHOFormaldehyde
2mass_mixratO3Ozone
\n", + "
" + ], + "text/plain": [ + " parameter.variable parameter.chem parameter.chem_long_name\n", + "0 mass_mixrat CO Carbon monoxide\n", + "1 mass_mixrat HCHO Formaldehyde\n", + "2 mass_mixrat O3 Ozone" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fl_chem.ls(keys=[\"parameter.variable\", \"parameter.chem\", \"parameter.chem_long_name\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "a0bb00f9-4ede-4347-a297-4fcdd867be2c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Field
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + "\n", + "
\n", + "\n", + "\n", + "\n", + "
number_of_values84
array_typendarray
array_dtypefloat64
\n", + "
\n", + " \n", + "\n", + "\n", + "
\n", + "\n", + "\n", + "\n", + "
variablemass_mixrat
standard_namemass_fraction_of_carbon_monoxide_in_air
long_nameMass mixing ratio
unitsdimensionless
chemCO
chem_long_nameCarbon monoxide
\n", + "
\n", + " \n", + "\n", + "\n", + "
\n", + "\n", + "\n", + "\n", + "
valid_datetime2011-09-29 00:00:00
base_datetime2011-09-29 00:00:00
step0:00:00
\n", + "
\n", + " \n", + "\n", + "\n", + "
\n", + "\n", + "\n", + "\n", + "
level1
layerNone
level_typehybrid
\n", + "
\n", + " \n", + "\n", + "\n", + "
\n", + "\n", + "\n", + "\n", + "
memberNone
\n", + "
\n", + " \n", + "\n", + "\n", + "
\n", + "\n", + "\n", + "\n", + "
grid_specNone
grid_typeregular_ll
shape(7, 12)
area(90.0, 0.0, -90.0, 330.0)
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " " + ], + "text/plain": [ + "Field(mass_mixrat, 2011-09-29 00:00:00, 2011-09-29 00:00:00, 0:00:00, 1, hybrid, None, regular_ll)" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fl_chem[0]" + ] + }, + { + "cell_type": "markdown", + "id": "843a0476-db89-4c2d-a8b8-745ed7f927b0", + "metadata": {}, + "source": [ + "The conversion into Xarray constructs the ``\"chem\"`` dimension automatically:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "8cebce0e-64f2-402c-bba5-75dbfa8d99fd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 2kB\n",
+       "Dimensions:      (chem: 3, latitude: 7, longitude: 12)\n",
+       "Coordinates:\n",
+       "  * chem         (chem) <U4 48B 'CO' 'HCHO' 'O3'\n",
+       "  * latitude     (latitude) float64 56B 90.0 60.0 30.0 0.0 -30.0 -60.0 -90.0\n",
+       "  * longitude    (longitude) float64 96B 0.0 30.0 60.0 ... 270.0 300.0 330.0\n",
+       "Data variables:\n",
+       "    mass_mixrat  (chem, latitude, longitude) float64 2kB ...\n",
+       "Attributes:\n",
+       "    Conventions:  CF-1.8\n",
+       "    institution:  ECMWF
" + ], + "text/plain": [ + " Size: 2kB\n", + "Dimensions: (chem: 3, latitude: 7, longitude: 12)\n", + "Coordinates:\n", + " * chem (chem) \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 2kB\n",
+       "Dimensions:         (chem: 3, latitude: 7, longitude: 12)\n",
+       "Coordinates:\n",
+       "  * chem            (chem) <U4 48B 'CO' 'HCHO' 'O3'\n",
+       "    chem_long_name  (chem) <U15 180B ...\n",
+       "  * latitude        (latitude) float64 56B 90.0 60.0 30.0 0.0 -30.0 -60.0 -90.0\n",
+       "  * longitude       (longitude) float64 96B 0.0 30.0 60.0 ... 270.0 300.0 330.0\n",
+       "Data variables:\n",
+       "    mass_mixrat     (chem, latitude, longitude) float64 2kB ...\n",
+       "Attributes:\n",
+       "    Conventions:  CF-1.8\n",
+       "    institution:  ECMWF
" + ], + "text/plain": [ + " Size: 2kB\n", + "Dimensions: (chem: 3, latitude: 7, longitude: 12)\n", + "Coordinates:\n", + " * chem (chem) \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.DataArray 'chem_long_name' (chem: 3)> Size: 180B\n",
+       "array(['Carbon monoxide', 'Formaldehyde', 'Ozone'], dtype='<U15')\n",
+       "Coordinates:\n",
+       "  * chem            (chem) <U4 48B 'CO' 'HCHO' 'O3'\n",
+       "    chem_long_name  (chem) <U15 180B 'Carbon monoxide' 'Formaldehyde' 'Ozone'
" + ], + "text/plain": [ + " Size: 180B\n", + "array(['Carbon monoxide', 'Formaldehyde', 'Ozone'], dtype='\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
parameter.variableparameter.long_nameparameter.chemparameter.chem_long_nameparameter.wavelength
0aodAerosol optical depthaer_totalTotal aerosol550
1aodAerosol optical depthaer_totalTotal aerosol800
2aodAerosol optical depthaer_smAerosol small mode550
3aodAerosol optical depthaer_smAerosol small mode800
\n", + "" + ], + "text/plain": [ + " parameter.variable parameter.long_name parameter.chem \\\n", + "0 aod Aerosol optical depth aer_total \n", + "1 aod Aerosol optical depth aer_total \n", + "2 aod Aerosol optical depth aer_sm \n", + "3 aod Aerosol optical depth aer_sm \n", + "\n", + " parameter.chem_long_name parameter.wavelength \n", + "0 Total aerosol 550 \n", + "1 Total aerosol 800 \n", + "2 Aerosol small mode 550 \n", + "3 Aerosol small mode 800 " + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fl_opt = ekd.from_source(\"sample\", \"optical-cams.grib\").to_fieldlist()\n", + "fl_opt.ls(\n", + " keys=[\n", + " \"parameter.variable\",\n", + " \"parameter.long_name\",\n", + " \"parameter.chem\",\n", + " \"parameter.chem_long_name\",\n", + " \"parameter.wavelength\",\n", + " ]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "9609a38a-30c6-41ea-b24d-8432b2e72dae", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Field
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + "\n", + "
\n", + "\n", + "\n", + "\n", + "
number_of_values84
array_typendarray
array_dtypefloat64
\n", + "
\n", + " \n", + "\n", + "\n", + "
\n", + "\n", + "\n", + "\n", + "
variableaod
standard_nameunknown
long_nameAerosol optical depth
unitsNumeric
chemaer_total
chem_long_nameTotal aerosol
wavelength550
wavelength_boundsNone
wavelength_unitsnanometer
\n", + "
\n", + " \n", + "\n", + "\n", + "
\n", + "\n", + "\n", + "\n", + "
valid_datetime2011-09-29 00:00:00
base_datetime2011-09-29 00:00:00
step0:00:00
\n", + "
\n", + " \n", + "\n", + "\n", + "
\n", + "\n", + "\n", + "\n", + "
level0
layerNone
level_typesurface
\n", + "
\n", + " \n", + "\n", + "\n", + "
\n", + "\n", + "\n", + "\n", + "
memberNone
\n", + "
\n", + " \n", + "\n", + "\n", + "
\n", + "\n", + "\n", + "\n", + "
grid_specNone
grid_typeregular_ll
shape(7, 12)
area(90.0, 0.0, -90.0, 330.0)
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " " + ], + "text/plain": [ + "Field(aod, 2011-09-29 00:00:00, 2011-09-29 00:00:00, 0:00:00, 0, surface, None, regular_ll)" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fl_opt[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "k1l2m3n4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 3kB\n",
+       "Dimensions:     (chem: 2, wavelength: 2, latitude: 7, longitude: 12)\n",
+       "Coordinates:\n",
+       "  * chem        (chem) <U9 72B 'aer_sm' 'aer_total'\n",
+       "  * wavelength  (wavelength) int64 16B 550 800\n",
+       "  * latitude    (latitude) float64 56B 90.0 60.0 30.0 0.0 -30.0 -60.0 -90.0\n",
+       "  * longitude   (longitude) float64 96B 0.0 30.0 60.0 90.0 ... 270.0 300.0 330.0\n",
+       "Data variables:\n",
+       "    aod         (chem, wavelength, latitude, longitude) float64 3kB ...\n",
+       "Attributes:\n",
+       "    Conventions:  CF-1.8\n",
+       "    institution:  ECMWF
" + ], + "text/plain": [ + " Size: 3kB\n", + "Dimensions: (chem: 2, wavelength: 2, latitude: 7, longitude: 12)\n", + "Coordinates:\n", + " * chem (chem) \n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
metadata.directionNumbermetadata.frequencyNumber
011
121
231
341
451
.........
20833229
20843329
20853429
20863529
20873629
\n", - "

2088 rows × 2 columns

\n", - "" - ], - "text/plain": [ - " metadata.directionNumber metadata.frequencyNumber\n", - "0 1 1\n", - "1 2 1\n", - "2 3 1\n", - "3 4 1\n", - "4 5 1\n", - "... ... ...\n", - "2083 32 29\n", - "2084 33 29\n", - "2085 34 29\n", - "2086 35 29\n", - "2087 36 29\n", - "\n", - "[2088 rows x 2 columns]" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ds_fl.ls(keys=[\"metadata.directionNumber\", \"metadata.frequencyNumber\"])" - ] - }, - { - "cell_type": "raw", - "id": "4e822233-94fc-4609-a3ce-68a2f0bbaf29", - "metadata": { - "editable": true, - "raw_mimetype": "text/restructuredtext", - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "source": [ - "To represent this structure in Xarray, the predefined dimensions of the\n", - "Xarray engine must therefore be complemented with dimensions derived\n", - "from the metadata keys ``\"directionNumber\"`` and ``\"frequencyNumber\"`` when calling :py:meth:`~earthkit.data.indexing.xarray.XarrayMixIn.to_xarray`" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "34d1814b-c9f4-450f-b868-1edb5d7c6382", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.Dataset> Size: 1MB\n",
-       "Dimensions:                  (directionNumber: 36, frequencyNumber: 29,\n",
-       "                              forecast_reference_time: 2, latitude: 7,\n",
-       "                              longitude: 12)\n",
-       "Coordinates:\n",
-       "  * directionNumber          (directionNumber) int64 288B 1 2 3 4 ... 34 35 36\n",
-       "  * frequencyNumber          (frequencyNumber) int64 232B 1 2 3 4 ... 27 28 29\n",
-       "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 16B 202...\n",
-       "  * latitude                 (latitude) float64 56B 90.0 60.0 ... -60.0 -90.0\n",
-       "  * longitude                (longitude) float64 96B 0.0 30.0 ... 300.0 330.0\n",
-       "Data variables:\n",
-       "    2dfd                     (directionNumber, frequencyNumber, forecast_reference_time, latitude, longitude) float64 1MB ...\n",
-       "Attributes:\n",
-       "    Conventions:  CF-1.8\n",
-       "    institution:  ECMWF
" - ], - "text/plain": [ - " Size: 1MB\n", - "Dimensions: (directionNumber: 36, frequencyNumber: 29,\n", - " forecast_reference_time: 2, latitude: 7,\n", - " longitude: 12)\n", - "Coordinates:\n", - " * directionNumber (directionNumber) int64 288B 1 2 3 4 ... 34 35 36\n", - " * frequencyNumber (frequencyNumber) int64 232B 1 2 3 4 ... 27 28 29\n", - " * forecast_reference_time (forecast_reference_time) datetime64[ns] 16B 202...\n", - " * latitude (latitude) float64 56B 90.0 60.0 ... -60.0 -90.0\n", - " * longitude (longitude) float64 96B 0.0 30.0 ... 300.0 330.0\n", - "Data variables:\n", - " 2dfd (directionNumber, frequencyNumber, forecast_reference_time, latitude, longitude) float64 1MB ...\n", - "Attributes:\n", - " Conventions: CF-1.8\n", - " institution: ECMWF" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ds = ds_fl.to_xarray(\n", - " extra_dims=[\"metadata.directionNumber\", \"metadata.frequencyNumber\"],\n", - " add_earthkit_attrs=False,\n", - ")\n", - "ds" - ] - }, - { - "cell_type": "markdown", - "id": "028aeab5-8de8-43b5-adab-0446b4f3eea2", - "metadata": { - "editable": true, - "raw_mimetype": "", - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "source": [ - "The ``extra_dims`` option also supports defining an explicit mapping\n", - "between the name of an extra dimension and the corresponding metadata\n", - "key, in a way that is conceptually similar to **dimension roles**." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "ee32a7f8-933b-4447-b771-23939b0d1d4e", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.Dataset> Size: 1MB\n",
-       "Dimensions:                  (d: 36, f: 29, forecast_reference_time: 2,\n",
-       "                              latitude: 7, longitude: 12)\n",
-       "Coordinates:\n",
-       "  * d                        (d) int64 288B 1 2 3 4 5 6 7 ... 31 32 33 34 35 36\n",
-       "  * f                        (f) int64 232B 1 2 3 4 5 6 7 ... 24 25 26 27 28 29\n",
-       "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 16B 202...\n",
-       "  * latitude                 (latitude) float64 56B 90.0 60.0 ... -60.0 -90.0\n",
-       "  * longitude                (longitude) float64 96B 0.0 30.0 ... 300.0 330.0\n",
-       "Data variables:\n",
-       "    2dfd                     (d, f, forecast_reference_time, latitude, longitude) float64 1MB ...\n",
-       "Attributes:\n",
-       "    Conventions:  CF-1.8\n",
-       "    institution:  ECMWF
" - ], - "text/plain": [ - " Size: 1MB\n", - "Dimensions: (d: 36, f: 29, forecast_reference_time: 2,\n", - " latitude: 7, longitude: 12)\n", - "Coordinates:\n", - " * d (d) int64 288B 1 2 3 4 5 6 7 ... 31 32 33 34 35 36\n", - " * f (f) int64 232B 1 2 3 4 5 6 7 ... 24 25 26 27 28 29\n", - " * forecast_reference_time (forecast_reference_time) datetime64[ns] 16B 202...\n", - " * latitude (latitude) float64 56B 90.0 60.0 ... -60.0 -90.0\n", - " * longitude (longitude) float64 96B 0.0 30.0 ... 300.0 330.0\n", - "Data variables:\n", - " 2dfd (d, f, forecast_reference_time, latitude, longitude) float64 1MB ...\n", - "Attributes:\n", - " Conventions: CF-1.8\n", - " institution: ECMWF" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ds2 = ds_fl.to_xarray(\n", - " extra_dims=[{\"d\": \"metadata.directionNumber\"}, {\"f\": \"metadata.frequencyNumber\"}],\n", - " add_earthkit_attrs=False,\n", - ")\n", - "ds2" + "# Xarray engine: extra dimensions\n", + "\n", + "Quantiles in a probabilistic forecast" ] }, { - "cell_type": "markdown", - "id": "9a8a57b1-3e2a-4a17-88ee-34eea704601a", + "cell_type": "code", + "execution_count": 1, + "id": "1a6e355d-3fbf-4d92-b32f-a9d7e770f9db", "metadata": { "editable": true, + "scrolled": true, "slideshow": { "slide_type": "" }, "tags": [] }, + "outputs": [], "source": [ - "### Quantiles in a probabilistic forecast" + "import earthkit.data as ekd" ] }, { @@ -1487,7 +50,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 2, "id": "111d30d9-8b3b-4732-ad5c-a9004a50cd4b", "metadata": { "editable": true, @@ -1501,12 +64,12 @@ "name": "stderr", "output_type": "stream", "text": [ - " " + " " ] } ], "source": [ - "ds_fl2 = ekd.from_source(\"sample\", \"quantiles_pd.grib\").to_fieldlist()" + "fl = ekd.from_source(\"sample\", \"quantiles_pd.grib\").to_fieldlist()" ] }, { @@ -1525,7 +88,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 3, "id": "6580667e-2ca2-4f78-a929-84e1d721c853", "metadata": {}, "outputs": [ @@ -1786,13 +349,13 @@ "17 10 10 10:10 " ] }, - "execution_count": 7, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "ds_fl2.ls(\n", + "fl.ls(\n", " keys=[\n", " \"parameter.variable\",\n", " \"time.base_datetime\",\n", @@ -1829,7 +392,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 4, "id": "cd3a79c9-a994-4d0a-89d1-d7a1e0d64d1b", "metadata": { "editable": true, @@ -2400,9 +963,9 @@ " 2tp (quantile, forecast_reference_time, step, level, level_type, latitude, longitude) float64 12kB ...\n", "Attributes:\n", " Conventions: CF-1.8\n", - " institution: ECMWF
    • 2tp
      (quantile, forecast_reference_time, step, level, level_type, latitude, longitude)
      float64
      ...
      standard_name :
      unknown
      long_name :
      2 metre temperature probability
      units :
      percent
      level_type :
      surface
      [1512 values with dtype=float64]
  • Conventions :
    CF-1.8
    institution :
    ECMWF
  • " ], "text/plain": [ " Size: 13kB\n", @@ -2424,19 +987,19 @@ " institution: ECMWF" ] }, - "execution_count": 8, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "ds3 = ds_fl2.to_xarray(\n", + "ds = fl.to_xarray(\n", " squeeze=False,\n", " extra_dims=\"metadata.quantile\",\n", " drop_dims=\"member\",\n", " add_earthkit_attrs=False,\n", ")\n", - "ds3" + "ds" ] }, { @@ -2469,7 +1032,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 5, "id": "7fcc5da4-02c5-494d-8cc6-7a34491b0491", "metadata": { "editable": true, @@ -3034,7 +1597,7 @@ " 2tp (quantile, latitude, longitude) float64 672B ...\n", "Attributes:\n", " Conventions: CF-1.8\n", - " institution: ECMWF" + " institution: ECMWF" ], "text/plain": [ " Size: 836B\n", @@ -3050,34 +1613,20 @@ " institution: ECMWF" ] }, - "execution_count": 9, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "ds4 = ds_fl2.sel({\"metadata.quantile\": \"2:3\"}).to_xarray(\n", + "ds2 = fl.sel({\"metadata.quantile\": \"2:3\"}).to_xarray(\n", " squeeze=True,\n", " ensure_dims=\"metadata.quantile\",\n", " drop_dims=\"member\",\n", " add_earthkit_attrs=False,\n", ")\n", - "ds4" + "ds2" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "daf42911-e416-406e-a2ee-cae49e7b77a3", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/docs/source/how-tos/xr_engine/xarray_engine_wave_spectra.ipynb b/docs/source/how-tos/xr_engine/xarray_engine_wave_spectra.ipynb new file mode 100644 index 00000000..100e8119 --- /dev/null +++ b/docs/source/how-tos/xr_engine/xarray_engine_wave_spectra.ipynb @@ -0,0 +1,1085 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "a1b2c3d4", + "metadata": {}, + "source": [ + "# Xarray engine: 2D wave spectra dimensions\n", + "\n", + "This notebook demonstrates how earthkit-data manages parameters related to 2D wave spectra." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "b1c2d3e4", + "metadata": {}, + "outputs": [], + "source": [ + "import earthkit.data as ekd" + ] + }, + { + "cell_type": "markdown", + "id": "f7ffd0d6-2c18-436e-b460-0d2a45ee8a34", + "metadata": {}, + "source": [ + "We analyse a 2D wave spectra product at 2025-12-15 00 UTC and 03 UTC.\n", + "\n", + "A specific feature of this dataset is that the fields are additionally\n", + "indexed by\n", + "\n", + "- `parameter.wave_direction`\n", + "\n", + "- `parameter.wave_frequency`" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "80b6fb81-26b7-46f4-93a1-8972fc503842", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " " + ] + } + ], + "source": [ + "fl = ekd.from_source(\"sample\", \"2d-wave-spectra_an.grib\").to_fieldlist()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "050e5e6a-7bda-478a-891c-b30997009c8b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
    \n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_typeparameter.wave_directionparameter.wave_frequency
    02dfd2025-12-15 00:00:002025-12-15 00:00:000 days0mean_sea0regular_ll5.00.034523
    12dfd2025-12-15 00:00:002025-12-15 00:00:000 days0mean_sea0regular_ll15.00.034523
    20862dfd2025-12-15 03:00:002025-12-15 03:00:000 days0mean_sea0regular_ll345.00.497852
    20872dfd2025-12-15 03:00:002025-12-15 03:00:000 days0mean_sea0regular_ll355.00.497852
    \n", + "
    " + ], + "text/plain": [ + " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", + "0 2dfd 2025-12-15 00:00:00 2025-12-15 00:00:00 0 days \n", + "1 2dfd 2025-12-15 00:00:00 2025-12-15 00:00:00 0 days \n", + "2086 2dfd 2025-12-15 03:00:00 2025-12-15 03:00:00 0 days \n", + "2087 2dfd 2025-12-15 03:00:00 2025-12-15 03:00:00 0 days \n", + "\n", + " vertical.level vertical.level_type ensemble.member geography.grid_type \\\n", + "0 0 mean_sea 0 regular_ll \n", + "1 0 mean_sea 0 regular_ll \n", + "2086 0 mean_sea 0 regular_ll \n", + "2087 0 mean_sea 0 regular_ll \n", + "\n", + " parameter.wave_direction parameter.wave_frequency \n", + "0 5.0 0.034523 \n", + "1 15.0 0.034523 \n", + "2086 345.0 0.497852 \n", + "2087 355.0 0.497852 " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fl.ls(extra_keys=[\"parameter.wave_direction\", \"parameter.wave_frequency\"]).iloc[[0, 1, -2, -1]]" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "353f2463-1086-43d7-8a74-3140599a1860", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
    Field
    \n", + "
    \n", + "
    \n", + "
    \n", + "
    \n", + "
    \n", + "\n", + "\n", + "\n", + "
    \n", + "\n", + "\n", + "\n", + "
    number_of_values84
    array_typendarray
    array_dtypefloat64
    \n", + "
    \n", + " \n", + "\n", + "\n", + "
    \n", + "\n", + "\n", + "\n", + "
    variable2dfd
    standard_nameunknown
    long_name2D wave spectra (single)
    unitsmeter ** 2 * second / radian
    wave_direction5.0
    wave_direction_index0
    wave_direction_bounds(0.0, 10.0)
    wave_direction_unitsdegree
    wave_frequency0.034523
    wave_frequency_index0
    wave_frequency_bounds(0.032917, 0.036208)
    wave_frequency_units1 / second
    \n", + "
    \n", + " \n", + "\n", + "\n", + "
    \n", + "\n", + "\n", + "\n", + "
    valid_datetime2025-12-15 00:00:00
    base_datetime2025-12-15 00:00:00
    step0:00:00
    \n", + "
    \n", + " \n", + "\n", + "\n", + "
    \n", + "\n", + "\n", + "\n", + "
    level0
    layerNone
    level_typemean_sea
    \n", + "
    \n", + " \n", + "\n", + "\n", + "
    \n", + "\n", + "\n", + "\n", + "
    member0
    \n", + "
    \n", + " \n", + "\n", + "\n", + "
    \n", + "\n", + "\n", + "\n", + "
    grid_specNone
    grid_typeregular_ll
    shape(7, 12)
    area(90.0, 0.0, -90.0, 330.0)
    \n", + "
    \n", + " \n", + "
    \n", + "
    \n", + "
    \n", + "
    \n", + "
    \n", + " " + ], + "text/plain": [ + "Field(2dfd, 2025-12-15 00:00:00, 2025-12-15 00:00:00, 0:00:00, 0, mean_sea, 0, regular_ll)" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fl[0]" + ] + }, + { + "cell_type": "markdown", + "id": "m1n2o3p4", + "metadata": {}, + "source": [ + "Conversion to Xarray is straightforward:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "n1o2p3q4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
    \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
    <xarray.Dataset> Size: 1MB\n",
    +       "Dimensions:                  (wave_direction: 36, wave_frequency: 29,\n",
    +       "                              forecast_reference_time: 2, latitude: 7,\n",
    +       "                              longitude: 12)\n",
    +       "Coordinates:\n",
    +       "  * wave_direction           (wave_direction) float64 288B 5.0 15.0 ... 355.0\n",
    +       "  * wave_frequency           (wave_frequency) float64 232B 0.03452 ... 0.4979\n",
    +       "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 16B 202...\n",
    +       "  * latitude                 (latitude) float64 56B 90.0 60.0 ... -60.0 -90.0\n",
    +       "  * longitude                (longitude) float64 96B 0.0 30.0 ... 300.0 330.0\n",
    +       "Data variables:\n",
    +       "    2dfd                     (wave_direction, wave_frequency, forecast_reference_time, latitude, longitude) float64 1MB ...\n",
    +       "Attributes:\n",
    +       "    Conventions:  CF-1.8\n",
    +       "    institution:  ECMWF
    " + ], + "text/plain": [ + " Size: 1MB\n", + "Dimensions: (wave_direction: 36, wave_frequency: 29,\n", + " forecast_reference_time: 2, latitude: 7,\n", + " longitude: 12)\n", + "Coordinates:\n", + " * wave_direction (wave_direction) float64 288B 5.0 15.0 ... 355.0\n", + " * wave_frequency (wave_frequency) float64 232B 0.03452 ... 0.4979\n", + " * forecast_reference_time (forecast_reference_time) datetime64[ns] 16B 202...\n", + " * latitude (latitude) float64 56B 90.0 60.0 ... -60.0 -90.0\n", + " * longitude (longitude) float64 96B 0.0 30.0 ... 300.0 330.0\n", + "Data variables:\n", + " 2dfd (wave_direction, wave_frequency, forecast_reference_time, latitude, longitude) float64 1MB ...\n", + "Attributes:\n", + " Conventions: CF-1.8\n", + " institution: ECMWF" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds = fl.to_xarray()\n", + "ds" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "dev", + "language": "python", + "name": "dev" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.1" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/src/earthkit/data/core/field.py b/src/earthkit/data/core/field.py index 337ea5b7..60d37791 100644 --- a/src/earthkit/data/core/field.py +++ b/src/earthkit/data/core/field.py @@ -1729,8 +1729,8 @@ def describe(self, component=all, filter=None, **kwargs): Returns ------- NamespaceDump - Dict-like object with one item s per component. In a Jupyter notebook represented - as a tabbed interface to browse the dump con stents. + Dict-like object with one item per component. In a Jupyter notebook represented + as a tabbed interface to browse the dump contents. See Also diff --git a/src/earthkit/data/field/component/component.py b/src/earthkit/data/field/component/component.py index c5e76691..9e6bfda8 100644 --- a/src/earthkit/data/field/component/component.py +++ b/src/earthkit/data/field/component/component.py @@ -282,12 +282,13 @@ def _normalise_create_kwargs(cls, data, allowed_keys=None, remove_nones=False): _kwargs = {} for k_in, v in data.items(): k = cls._ALIASES.get(k_in, k_in) - if k in allowed_keys: - if remove_nones and v is None: - continue - _kwargs[k] = v - else: - raise ValueError(f"Cannot use key={k} to create object={cls}") + if allowed_keys is not None: + if k in allowed_keys: + if remove_nones and v is None: + continue + _kwargs[k] = v + else: + raise ValueError(f"Cannot use key={k} to create object={cls}") return _kwargs @classmethod diff --git a/src/earthkit/data/field/component/parameter.py b/src/earthkit/data/field/component/parameter.py index bf7530ae..681f5a56 100644 --- a/src/earthkit/data/field/component/parameter.py +++ b/src/earthkit/data/field/component/parameter.py @@ -12,7 +12,7 @@ from abc import abstractmethod from typing import TYPE_CHECKING, Optional, Union -from earthkit.utils.units import Units +from earthkit.utils.units import Units, convert_array from .component import SimpleFieldComponent, component_keys, mark_alias, mark_get_key @@ -26,10 +26,10 @@ class ParameterBase(SimpleFieldComponent): This class defines the interface for parameter components, which can represent different types of parameter information. Some of the methods may not be applicable to all parameter - types (e.g. :meth:`chem_variable`), and may return None. + types (e.g. :meth:`chem`), and may return None. The parameter information can be accessed by methods like :meth:`variable`, - :meth:`units`, and :meth:`chem_variable`. Each of these methods has an associated key + :meth:`units`, and :meth:`chem`. Each of these methods has an associated key that can be used in the :meth:`get` method to retrieve the corresponding information. The list of supported keys are as follows: @@ -38,11 +38,23 @@ class ParameterBase(SimpleFieldComponent): on the CF standard name - "long_name": string representing the long name of the parameter variable - "units": as a string or a :class:`Units` object representing the parameter units - - "chem_variable": string representing the parameter chemical variable + - "chem": string representing the parameter chemical constituent or aerosol type, or None + - "chem_long_name": string representing the long name of the parameter chemical constituent or aerosol type, or None + - "wavelength": int or float representing the optical parameter wavelength, or None + - "wavelength_bounds": 2-tuple of ints or floats representing the optical parameter wavelength bounds, or None + - "wavelength_units": :class:`Units` object representing the wavelength units (e.g. nanometers), or None + - "wave_direction": float representing the wave direction of the 2D spectra parameter, or None + - "wave_direction_index": int representing the 0-based index of the wave direction bin, or None + - "wave_direction_bounds": 2-tuple of floats representing the wave direction bounds, or None + - "wave_direction_units": :class:`Units` object representing the wave direction units (e.g. degrees), or None + - "wave_frequency": float representing the wave frequency of the 2D spectra parameter, or None + - "wave_frequency_index": int representing the 0-based index of the wave frequency bin, or None + - "wave_frequency_bounds": 2-tuple of floats representing the wave frequency bounds, or None + - "wave_frequency_units": :class:`Units` object representing the wave frequency units (e.g. 1/s), or None - "param": alias of "variable" Depending on the type of parameter information available, some of these keys may not be supported - and will return None in the subclasses. For example, the "chem_variable" key is only supported + and will return None in the subclasses. For example, the "chem" key is only supported for chemical parameters, and will return None for other parameter types. Typically, this object is used as a component of a field, and can be accessed via the :attr:`parameter` @@ -93,15 +105,21 @@ def variable(self) -> Optional[str]: def units(self) -> Optional["Units"]: r"""Return the parameter units. - The parameter units are :class:`Units` objects. The units are are based on Pint (when possible) + The parameter units are :class:`Units` objects. The units are based on Pint (when possible) and are normalised to a standard form. They can be used for unit conversions and comparisons. """ pass @mark_get_key @abstractmethod - def chem_variable(self) -> Optional[str]: - r"""Return the parameter chemical variable.""" + def chem(self) -> Optional[str]: + r"""Return the parameter chemical constituent or aerosol type.""" + pass + + @mark_get_key + @abstractmethod + def chem_long_name(self) -> Optional[str]: + r"""Return the long name of the parameter chemical constituent or aerosol type.""" pass @mark_alias("variable") @@ -127,10 +145,166 @@ def long_name(self) -> Optional[str]: """ pass + @mark_get_key + @abstractmethod + def wavelength(self, units=None) -> Optional[Union[int, float]]: + """Return the optical parameter wavelength. + + Parameters + ---------- + units : str or Units, optional + If provided, convert the wavelength to the specified units and return as float. + If None, return the value in native units. + """ + pass + + @mark_get_key + @abstractmethod + def wavelength_bounds(self, units=None) -> Optional[Union[tuple[int, int], tuple[float, float]]]: + """Return the optical parameter wavelength bounds. + + Parameters + ---------- + units : str or Units, optional + If provided, convert the bounds to the specified units and return as tuple of floats. + If None, return the value in native units. + """ + pass + + @mark_get_key + @abstractmethod + def wavelength_units(self) -> Optional["Units"]: + """Return the units of the optical parameter wavelength.""" + pass + + @mark_get_key + @abstractmethod + def wave_direction(self, units=None) -> Optional[float]: + """Return the wave direction of the 2D spectra parameter. + + Parameters + ---------- + units : str or Units, optional + If provided, convert the direction to the specified units and return as float. + If None, return the value in native units. + """ + pass + + @mark_get_key + @abstractmethod + def wave_direction_index(self) -> Optional[int]: + """Return the 0-based index of the wave direction bin.""" + pass + + @mark_get_key + @abstractmethod + def wave_direction_bounds(self, units=None) -> Optional[tuple[float, float]]: + """Return the wave direction bounds of the 2D spectra parameter. + + Parameters + ---------- + units : str or Units, optional + If provided, convert the bounds to the specified units and return as tuple of floats. + If None, return the value in native units. + """ + pass + + @mark_get_key + @abstractmethod + def wave_direction_units(self) -> Optional["Units"]: + """Return the units of the wave direction.""" + pass + + @mark_get_key + @abstractmethod + def wave_frequency(self, units=None) -> Optional[float]: + """Return the wave frequency of the 2D spectra parameter. + + Parameters + ---------- + units : str or Units, optional + If provided, convert the frequency to the specified units and return as float. + If None, return the value in native units. + """ + pass + + @mark_get_key + @abstractmethod + def wave_frequency_index(self) -> Optional[int]: + """Return the 0-based index of the wave frequency bin.""" + pass + + @mark_get_key + @abstractmethod + def wave_frequency_bounds(self, units=None) -> Optional[tuple[float, float]]: + """Return the wave frequency bounds of the 2D spectra parameter. + + Parameters + ---------- + units : str or Units, optional + If provided, convert the bounds to the specified units and return as tuple of floats. + If None, return the value in native units. + """ + pass + + @mark_get_key + @abstractmethod + def wave_frequency_units(self) -> Optional["Units"]: + """Return the units of the wave frequency.""" + pass + + @classmethod + def from_dict(cls, d: dict) -> "ParameterBase": + """Create a parameter object from a dictionary. + + The appropriate subclass is determined automatically based on the dictionary contents. + + Parameters + ---------- + d : dict + Dictionary containing parameter data. + + The dictionary can contain the following keys: + + - "variable": The parameter variable. + - "standard_name": The standard name of the parameter variable. + - "long_name": The long name of the parameter variable. + - "units": The parameter units, as a string or a Units object. + - "chem": The chemical constituent or aerosol type of the parameter. + - "chem_long_name": The long name of the chemical constituent or aerosol type of the parameter. + - "wavelength": The optical parameter wavelength, as an int or a float. + - "wavelength_bounds": The optical parameter wavelength bounds, as a 2-tuple of ints or floats. + - "wavelength_units": The wavelength units, as a string or a Units object. + - "wave_direction": The wave direction of the 2D spectra parameter, as a float. + - "wave_direction_index": The 0-based index of the wave direction bin. + - "wave_direction_bounds": The wave direction bounds, as a 2-tuple of floats. + - "wave_direction_units": The wave direction units, as a string or a Units object. + - "wave_frequency": The wave frequency of the 2D spectra parameter, as a float. + - "wave_frequency_index": The 0-based index of the wave frequency bin. + - "wave_frequency_bounds": The wave frequency bounds, as a 2-tuple of floats. + - "wave_frequency_units": The wave frequency units, as a string or a Units object. + + Returns + ------- + ParameterBase + The created parameter instance. The actual type depends on the dictionary contents: + :class:`ChemicalOpticalParameter`, :class:`ChemicalParameter`, + :class:`OpticalParameter`, :class:`WaveSpectraParameter`, or :class:`Parameter`. + """ + return create_parameter(d) -def create_parameter(d: dict) -> "ParameterBase": + +def create_parameter(d: dict) -> ParameterBase: """Create a ParameterBase object from a dictionary. + The appropriate subclass is determined automatically based on the dictionary contents: + + - If both ``chem`` and ``wavelength`` are present, a :class:`ChemicalOpticalParameter` is created. + - If only ``chem`` is present, a :class:`ChemicalParameter` is created. + - If only ``wavelength`` is present, an :class:`OpticalParameter` is created. + - If ``wave_direction`` or ``wave_frequency`` is present, a :class:`WaveSpectraParameter` is created. + - Otherwise, a :class:`Parameter` is created. + Parameters ---------- d : dict @@ -139,18 +313,56 @@ def create_parameter(d: dict) -> "ParameterBase": Returns ------- ParameterBase - The created ParameterBase instance. + The created parameter instance. The actual type depends on the dictionary contents. """ if not isinstance(d, dict): raise TypeError(f"Cannot create Parameter from {type(d)}, expected dict") - cls = Parameter - d1 = cls._normalise_create_kwargs( - d, allowed_keys=("variable", "units", "chem_variable", "standard_name", "long_name") + d1 = Parameter._normalise_create_kwargs( + d, + allowed_keys=( + "variable", + "standard_name", + "long_name", + "units", + "chem", + "chem_long_name", + "wavelength", + "wavelength_bounds", + "wavelength_units", + "wave_direction", + "wave_direction_index", + "wave_direction_bounds", + "wave_direction_units", + "wave_frequency", + "wave_frequency_index", + "wave_frequency_bounds", + "wave_frequency_units", + ), ) if "variable" not in d1: raise ValueError("Cannot create Parameter without variable") + has_chem = d1.get("chem") is not None or d1.get("chem_long_name") is not None + has_wavelength = d1.get("wavelength") is not None or d1.get("wavelength_bounds") is not None + has_wave_spectra = ( + d1.get("wave_direction") is not None + or d1.get("wave_direction_index") is not None + or d1.get("wave_frequency") is not None + or d1.get("wave_frequency_index") is not None + ) + + if has_chem and has_wavelength: + cls = ChemicalOpticalParameter + elif has_chem: + cls = ChemicalParameter + elif has_wavelength: + cls = OpticalParameter + elif has_wave_spectra: + cls = WaveSpectraParameter + else: + cls = Parameter + return cls(**d1) @@ -185,8 +397,92 @@ def units(self) -> None: """ return None - def chem_variable(self) -> None: - r"""Return the parameter chemical variable. + def chem(self) -> None: + r"""Return the parameter chemical constituent or aerosol type. + + An EmptyParameter does not contain any parameter information, and this method returns None. + """ + return None + + def chem_long_name(self) -> None: + r"""Return the long name of the parameter chemical constituent or aerosol type. + + An EmptyParameter does not contain any parameter information, and this method returns None. + """ + return None + + def wavelength(self, units=None) -> None: + r"""Return the optical parameter wavelength. + + An EmptyParameter does not contain any parameter information, and this method returns None. + """ + return None + + def wavelength_bounds(self, units=None) -> None: + r"""Return the optical parameter wavelength bounds. + + An EmptyParameter does not contain any parameter information, and this method returns None. + """ + return None + + def wavelength_units(self) -> None: + r"""Return the units of the optical parameter wavelength. + + An EmptyParameter does not contain any parameter information, and this method returns None. + """ + return None + + def wave_direction(self, units=None) -> None: + r"""Return the wave direction of the 2D spectra parameter. + + An EmptyParameter does not contain any parameter information, and this method returns None. + """ + return None + + def wave_direction_index(self) -> None: + r"""Return the 0-based index of the wave direction bin. + + An EmptyParameter does not contain any parameter information, and this method returns None. + """ + return None + + def wave_direction_bounds(self, units=None) -> None: + r"""Return the wave direction bounds of the 2D spectra parameter. + + An EmptyParameter does not contain any parameter information, and this method returns None. + """ + return None + + def wave_direction_units(self) -> None: + r"""Return the units of the wave direction. + + An EmptyParameter does not contain any parameter information, and this method returns None. + """ + return None + + def wave_frequency(self, units=None) -> None: + r"""Return the wave frequency of the 2D spectra parameter. + + An EmptyParameter does not contain any parameter information, and this method returns None. + """ + return None + + def wave_frequency_index(self) -> None: + r"""Return the 0-based index of the wave frequency bin. + + An EmptyParameter does not contain any parameter information, and this method returns None. + """ + return None + + def wave_frequency_bounds(self, units=None) -> None: + r"""Return the wave frequency bounds of the 2D spectra parameter. + + An EmptyParameter does not contain any parameter information, and this method returns None. + """ + return None + + def wave_frequency_units(self) -> None: + r"""Return the units of the wave frequency. An EmptyParameter does not contain any parameter information, and this method returns None. """ @@ -218,34 +514,37 @@ def __setstate__(self, state): class Parameter(ParameterBase): - """Parameter component representing parameter information. + """Parameter component representing a regular parameter. + + A regular parameter is one that does not have any chemical, optical, or wave spectra + properties. For parameters with chemical constituents, use :class:`ChemicalParameter`. + For parameters with optical wavelength information, use :class:`OpticalParameter`. + For parameters with both chemical and optical properties, use :class:`ChemicalOpticalParameter`. + For parameters with wave spectra properties, use :class:`WaveSpectraParameter`. Parameters ---------- variable : str, optional The parameter variable, by default None. + standard_name : str, optional + The standard name of the parameter variable, by default None. + long_name : str, optional + The long name of the parameter variable, by default None. units : str or Units, optional The parameter units, by default None. Can be provided as a string or a Units object. - chem_variable : str, optional - The parameter chemical variable, by default None. """ - _chem_variable = None - def __init__( self, variable: str = None, standard_name: str = None, long_name: str = None, units: Union[str, "Units"] = None, - chem_variable: str = None, ) -> None: self._variable = variable self._standard_name = standard_name self._long_name = long_name self._units = Units.from_any(units) - if chem_variable is not None: - self._chem_variable = chem_variable def variable(self) -> Optional[str]: return self._variable @@ -259,60 +558,118 @@ def long_name(self) -> Optional[str]: def units(self) -> Optional["Units"]: return self._units - def chem_variable(self) -> Optional[str]: - return self._chem_variable + def chem(self) -> None: + r"""Return the parameter chemical constituent or aerosol type. - @classmethod - def from_dict(cls, d: dict) -> "Parameter": - """Create a Parameter object from a dictionary. + A regular Parameter does not have chemical information, and this method returns None. + """ + return None - Parameters - ---------- - d : dict - Dictionary containing parameter data. + def chem_long_name(self) -> None: + r"""Return the long name of the parameter chemical constituent or aerosol type. - The dictionary can contain the following keys: + A regular Parameter does not have chemical information, and this method returns None. + """ + return None - - "variable": The parameter variable. - - "units": The parameter units, as a string or a Units object. + def wavelength(self, units=None) -> None: + r"""Return the optical parameter wavelength. - Returns - ------- - Parameter - The created Parameter instance. + A regular Parameter does not have optical information, and this method returns None. """ - return create_parameter(d) + return None + + def wavelength_bounds(self, units=None) -> None: + r"""Return the optical parameter wavelength bounds. + + A regular Parameter does not have optical information, and this method returns None. + """ + return None + + def wavelength_units(self) -> None: + r"""Return the units of the optical parameter wavelength. + + A regular Parameter does not have optical information, and this method returns None. + """ + return None + + def wave_direction(self, units=None) -> None: + r"""Return the wave direction of the 2D spectra parameter. + + A regular Parameter does not have wave spectra information, and this method returns None. + """ + return None + + def wave_direction_index(self) -> None: + r"""Return the 0-based index of the wave direction bin. + + A regular Parameter does not have wave spectra information, and this method returns None. + """ + return None + + def wave_direction_bounds(self, units=None) -> None: + r"""Return the wave direction bounds of the 2D spectra parameter. + + A regular Parameter does not have wave spectra information, and this method returns None. + """ + return None + + def wave_direction_units(self) -> None: + r"""Return the units of the wave direction. + + A regular Parameter does not have wave spectra information, and this method returns None. + """ + return None + + def wave_frequency(self, units=None) -> None: + r"""Return the wave frequency of the 2D spectra parameter. + + A regular Parameter does not have wave spectra information, and this method returns None. + """ + return None + + def wave_frequency_index(self) -> None: + r"""Return the 0-based index of the wave frequency bin. + + A regular Parameter does not have wave spectra information, and this method returns None. + """ + return None + + def wave_frequency_bounds(self, units=None) -> None: + r"""Return the wave frequency bounds of the 2D spectra parameter. + + A regular Parameter does not have wave spectra information, and this method returns None. + """ + return None + + def wave_frequency_units(self) -> None: + r"""Return the units of the wave frequency. + + A regular Parameter does not have wave spectra information, and this method returns None. + """ + return None def to_dict(self): + """Return a dictionary representation of the parameter.""" return { "variable": self._variable, "standard_name": self._standard_name, "long_name": self._long_name, "units": str(self._units), - "chem_variable": self._chem_variable, } def __getstate__(self): - state = {} - state["variable"] = self._variable - state["standard_name"] = self._standard_name - state["long_name"] = self._long_name - state["units"] = str(self._units) - state["chem_variable"] = self._chem_variable - return state + return self.to_dict() def __setstate__(self, state): - self.__init__( - variable=state["variable"], - standard_name=state["standard_name"], - long_name=state["long_name"], - units=state["units"], - chem_variable=state["chem_variable"], - ) + self.__init__(**state) def set(self, *args, **kwargs): """Create a new instance with updated data. + The returned instance type is determined by the resulting dictionary contents, + which may differ from the current instance type. + Parameters ---------- args : tuple @@ -326,19 +683,392 @@ def set(self, *args, **kwargs): - "units": The parameter units, as a string or a Units object. - "standard_name": The standard name of the parameter variable. - "long_name": The long name of the parameter variable. - - "chem_variable": The chemical variable of the parameter. + - "chem": The chemical constituent or aerosol type of the parameter. + - "chem_long_name": The long name of the chemical constituent or aerosol type of the parameter. + - "wavelength": The optical parameter wavelength. + - "wavelength_bounds": The optical parameter wavelength bounds, as a 2-tuple. + - "wavelength_units": The wavelength units, as a string or a Units object. + - "wave_direction": The wave direction of the 2D spectra parameter. + - "wave_direction_index": The 0-based index of the wave direction bin. + - "wave_direction_bounds": The wave direction bounds, as a 2-tuple of floats. + - "wave_direction_units": The wave direction units, as a string or a Units object. + - "wave_frequency": The wave frequency of the 2D spectra parameter. + - "wave_frequency_index": The 0-based index of the wave frequency bin. + - "wave_frequency_bounds": The wave frequency bounds, as a 2-tuple of floats. + - "wave_frequency_units": The wave frequency units, as a string or a Units object. """ d = self._normalise_set_kwargs( - *args, allowed_keys=("variable", "units", "chem_variable", "standard_name", "long_name"), **kwargs + *args, + allowed_keys=( + "variable", + "units", + "chem", + "chem_long_name", + "standard_name", + "long_name", + "wavelength", + "wavelength_bounds", + "wavelength_units", + "wave_direction", + "wave_direction_index", + "wave_direction_bounds", + "wave_direction_units", + "wave_frequency", + "wave_frequency_index", + "wave_frequency_bounds", + "wave_frequency_units", + ), + **kwargs, ) - current = { - "variable": self._variable, - "standard_name": self._standard_name, - "long_name": self._long_name, - "units": self._units, - "chem_variable": self._chem_variable, - } - + current = self.to_dict() current.update(d) - return self.from_dict(current) + return create_parameter(current) + + +class ChemicalParameter(Parameter): + """Parameter component representing a chemical parameter. + + A chemical parameter includes a chemical constituent or aerosol type identifier. + For parameters that also have optical wavelength information, use + :class:`ChemicalOpticalParameter`. + + Parameters + ---------- + variable : str, optional + The parameter variable, by default None. + standard_name : str, optional + The standard name of the parameter variable, by default None. + long_name : str, optional + The long name of the parameter variable, by default None. + units : str or Units, optional + The parameter units, by default None. Can be provided as a string or a Units object. + chem : str, optional + The parameter chemical constituent or aerosol type, by default None. + chem_long_name : str, optional + The long name of the parameter chemical constituent or aerosol type, by default None. + """ + + def __init__( + self, + variable: str = None, + standard_name: str = None, + long_name: str = None, + units: Union[str, "Units"] = None, + chem: str = None, + chem_long_name: str = None, + ) -> None: + super().__init__(variable=variable, standard_name=standard_name, long_name=long_name, units=units) + self._chem = chem + self._chem_long_name = chem_long_name + + def chem(self) -> Optional[str]: + r"""Return the parameter chemical constituent or aerosol type.""" + return self._chem + + def chem_long_name(self) -> Optional[str]: + r"""Return the long name of the parameter chemical constituent or aerosol type.""" + return self._chem_long_name + + def to_dict(self): + """Return a dictionary representation of the chemical parameter.""" + d = super().to_dict() + d["chem"] = self._chem + d["chem_long_name"] = self._chem_long_name + return d + + +class OpticalParameter(Parameter): + """Parameter component representing an optical parameter. + + An optical parameter includes a wavelength but no chemical constituent. + For parameters that have both chemical and optical properties, use + :class:`ChemicalOpticalParameter`. + + Parameters + ---------- + variable : str, optional + The parameter variable, by default None. + standard_name : str, optional + The standard name of the parameter variable, by default None. + long_name : str, optional + The long name of the parameter variable, by default None. + units : str or Units, optional + The parameter units, by default None. Can be provided as a string or a Units object. + wavelength : int, optional + The optical parameter wavelength in native units (see ``wavelength_units``), by default None. + wavelength_bounds : 2-tuple of ints, optional + The optical parameter wavelength bounds in native units, by default None. + wavelength_units : str or Units, optional + The wavelength units, by default None. Can be provided as a string or a Units object. + """ + + def __init__( + self, + variable: str = None, + standard_name: str = None, + long_name: str = None, + units: Union[str, "Units"] = None, + wavelength: Optional[int] = None, + wavelength_bounds: Optional[tuple[int, int]] = None, + wavelength_units: Union[str, "Units"] = None, + ) -> None: + super().__init__(variable=variable, standard_name=standard_name, long_name=long_name, units=units) + self._wavelength = wavelength + self._wavelength_bounds = wavelength_bounds + self._wavelength_units = Units.from_any(wavelength_units) + + def wavelength(self, units=None) -> Optional[Union[int, float]]: + r"""Return the optical parameter wavelength. + + Parameters + ---------- + units : str or Units, optional + If provided, convert the wavelength to the specified units and return as float. + If None, return the value in native units. + """ + if self._wavelength is None: + return None + if units is None: + return self._wavelength + return convert_array(self._wavelength, units, self._wavelength_units) + + def wavelength_bounds(self, units=None) -> Optional[Union[tuple[int, int], tuple[float, float]]]: + r"""Return the optical parameter wavelength bounds. + + Parameters + ---------- + units : str or Units, optional + If provided, convert the bounds to the specified units and return as tuple of floats. + If None, return the value in native units. + """ + if self._wavelength_bounds is None: + return None + if units is None: + return self._wavelength_bounds + a, b = convert_array(self._wavelength_bounds, units, self._wavelength_units) + return float(a), float(b) + + def wavelength_units(self) -> Optional["Units"]: + r"""Return the units of the optical parameter wavelength.""" + return self._wavelength_units + + def to_dict(self): + """Return a dictionary representation of the optical parameter.""" + d = super().to_dict() + d["wavelength"] = self._wavelength + d["wavelength_bounds"] = self._wavelength_bounds + d["wavelength_units"] = str(self._wavelength_units) + return d + + +class ChemicalOpticalParameter(ChemicalParameter, OpticalParameter): + """Parameter component representing a chemical-optical parameter. + + A chemical-optical parameter includes both a chemical constituent or aerosol type + and an optical wavelength. It inherits chemical properties from + :class:`ChemicalParameter` and optical properties from :class:`OpticalParameter`. + + Parameters + ---------- + variable : str, optional + The parameter variable, by default None. + standard_name : str, optional + The standard name of the parameter variable, by default None. + long_name : str, optional + The long name of the parameter variable, by default None. + units : str or Units, optional + The parameter units, by default None. Can be provided as a string or a Units object. + chem : str, optional + The parameter chemical constituent or aerosol type, by default None. + chem_long_name : str, optional + The long name of the parameter chemical constituent or aerosol type, by default None. + wavelength : int, optional + The optical parameter wavelength in native units (see ``wavelength_units``), by default None. + wavelength_bounds : 2-tuple of ints, optional + The optical parameter wavelength bounds in native units, by default None. + wavelength_units : str or Units, optional + The wavelength units, by default None. Can be provided as a string or a Units object. + """ + + def __init__( + self, + variable: str = None, + standard_name: str = None, + long_name: str = None, + units: Union[str, "Units"] = None, + chem: str = None, + chem_long_name: str = None, + wavelength: Optional[int] = None, + wavelength_bounds: Optional[tuple[int, int]] = None, + wavelength_units: Union[str, "Units"] = None, + ) -> None: + super().__init__(variable=variable, standard_name=standard_name, long_name=long_name, units=units) + self._chem = chem + self._chem_long_name = chem_long_name + self._wavelength = wavelength + self._wavelength_bounds = wavelength_bounds + self._wavelength_units = Units.from_any(wavelength_units) + + def to_dict(self): + """Return a dictionary representation of the chemical-optical parameter.""" + d = super().to_dict() + d["chem"] = self._chem + d["chem_long_name"] = self._chem_long_name + d["wavelength"] = self._wavelength + d["wavelength_bounds"] = self._wavelength_bounds + d["wavelength_units"] = str(self._wavelength_units) + return d + + +class WaveSpectraParameter(Parameter): + """Parameter component representing a wave spectra parameter. + + A wave spectra parameter includes wave direction and/or wave frequency information + from 2D wave spectra fields. + + Parameters + ---------- + variable : str, optional + The parameter variable, by default None. + standard_name : str, optional + The standard name of the parameter variable, by default None. + long_name : str, optional + The long name of the parameter variable, by default None. + units : str or Units, optional + The parameter units, by default None. Can be provided as a string or a Units object. + wave_direction : float, optional + The wave direction of the 2D spectra parameter in native units (see ``wave_direction_units``), + by default None. + wave_direction_index : int, optional + The 0-based index of the wave direction bin, by default None. + wave_direction_bounds : 2-tuple of floats, optional + The wave direction bounds in native units, by default None. + wave_direction_units : str or Units, optional + The wave direction units, by default None. Can be provided as a string or a Units object. + wave_frequency : float, optional + The wave frequency of the 2D spectra parameter in native units (see ``wave_frequency_units``), + by default None. + wave_frequency_index : int, optional + The 0-based index of the wave frequency bin, by default None. + wave_frequency_bounds : 2-tuple of floats, optional + The wave frequency bounds in native units, by default None. + wave_frequency_units : str or Units, optional + The wave frequency units, by default None. Can be provided as a string or a Units object. + """ + + def __init__( + self, + variable: str = None, + standard_name: str = None, + long_name: str = None, + units: Union[str, "Units"] = None, + wave_direction: float = None, + wave_direction_index: Optional[int] = None, + wave_direction_bounds: Optional[tuple[float, float]] = None, + wave_direction_units: Union[str, "Units"] = None, + wave_frequency: float = None, + wave_frequency_index: Optional[int] = None, + wave_frequency_bounds: Optional[tuple[float, float]] = None, + wave_frequency_units: Union[str, "Units"] = None, + ) -> None: + super().__init__(variable=variable, standard_name=standard_name, long_name=long_name, units=units) + self._wave_direction = wave_direction + self._wave_direction_index = wave_direction_index + self._wave_direction_bounds = wave_direction_bounds + self._wave_direction_units = Units.from_any(wave_direction_units) + self._wave_frequency = wave_frequency + self._wave_frequency_index = wave_frequency_index + self._wave_frequency_bounds = wave_frequency_bounds + self._wave_frequency_units = Units.from_any(wave_frequency_units) + + def wave_direction(self, units=None) -> Optional[float]: + r"""Return the wave direction of the 2D spectra parameter. + + Parameters + ---------- + units : str or Units, optional + If provided, convert the direction to the specified units and return as float. + If None, return the value in native units. + """ + if self._wave_direction is None: + return None + if units is None: + return self._wave_direction + return convert_array(self._wave_direction, units, self._wave_direction_units) + + def wave_direction_index(self) -> Optional[int]: + r"""Return the 0-based index of the wave direction bin.""" + return self._wave_direction_index + + def wave_direction_bounds(self, units=None) -> Optional[tuple[float, float]]: + r"""Return the wave direction bounds of the 2D spectra parameter. + + Parameters + ---------- + units : str or Units, optional + If provided, convert the bounds to the specified units and return as tuple of floats. + If None, return the value in native units. + """ + if self._wave_direction_bounds is None: + return None + if units is None: + return self._wave_direction_bounds + a, b = convert_array(self._wave_direction_bounds, units, self._wave_direction_units) + return float(a), float(b) + + def wave_direction_units(self) -> Optional["Units"]: + r"""Return the units of the wave direction.""" + return self._wave_direction_units + + def wave_frequency(self, units=None) -> Optional[float]: + r"""Return the wave frequency of the 2D spectra parameter. + + Parameters + ---------- + units : str or Units, optional + If provided, convert the frequency to the specified units and return as float. + If None, return the value in native units. + """ + if self._wave_frequency is None: + return None + if units is None: + return self._wave_frequency + return convert_array(self._wave_frequency, units, self._wave_frequency_units) + + def wave_frequency_index(self) -> Optional[int]: + r"""Return the 0-based index of the wave frequency bin.""" + return self._wave_frequency_index + + def wave_frequency_bounds(self, units=None) -> Optional[tuple[float, float]]: + r"""Return the wave frequency bounds of the 2D spectra parameter. + + Parameters + ---------- + units : str or Units, optional + If provided, convert the bounds to the specified units and return as tuple of floats. + If None, return the value in native units. + """ + if self._wave_frequency_bounds is None: + return None + if units is None: + return self._wave_frequency_bounds + a, b = convert_array(self._wave_frequency_bounds, units, self._wave_frequency_units) + return float(a), float(b) + + def wave_frequency_units(self) -> Optional["Units"]: + r"""Return the units of the wave frequency.""" + return self._wave_frequency_units + + def to_dict(self): + """Return a dictionary representation of the wave spectra parameter.""" + d = super().to_dict() + d["wave_direction"] = self._wave_direction + d["wave_direction_index"] = self._wave_direction_index + d["wave_direction_bounds"] = self._wave_direction_bounds + d["wave_direction_units"] = str(self._wave_direction_units) + d["wave_frequency"] = self._wave_frequency + d["wave_frequency_index"] = self._wave_frequency_index + d["wave_frequency_bounds"] = self._wave_frequency_bounds + d["wave_frequency_units"] = str(self._wave_frequency_units) + return d diff --git a/src/earthkit/data/field/component/vertical.py b/src/earthkit/data/field/component/vertical.py index f69b3d63..5cc7b1f1 100644 --- a/src/earthkit/data/field/component/vertical.py +++ b/src/earthkit/data/field/component/vertical.py @@ -408,8 +408,8 @@ def coefficients(self) -> Optional[int]: def coefficient_names(self) -> Optional[int]: return None - def __print__(self) -> str: - return f"{self.level} {self.units} ({self.abbreviation})" + def __str__(self) -> str: + return f"{self.level()} {self.units()} ({self.abbreviation()})" def __repr__(self) -> str: return f"{self.__class__.__name__}(level={self.level()}, units={self.units()}, level_type={self._type.name})" diff --git a/src/earthkit/data/field/geotiff/create.py b/src/earthkit/data/field/geotiff/create.py index d52d2961..80984262 100644 --- a/src/earthkit/data/field/geotiff/create.py +++ b/src/earthkit/data/field/geotiff/create.py @@ -11,13 +11,13 @@ def create_geotiff_field(band, da): r"""Create a Field object from GeoTIFF Xarray dataarray.""" from earthkit.data.core.field import Field - from earthkit.data.field.component.parameter import Parameter + from earthkit.data.field.component.parameter import create_parameter from earthkit.data.field.geotiff.data import GeoTIFFData from earthkit.data.field.geotiff.geography import GeoTIFFGeography from earthkit.data.field.handler.labels import SimpleLabels data = GeoTIFFData(da) - parameter = Parameter.from_dict({"variable": da.name}) + parameter = create_parameter({"variable": da.name}) geography = GeoTIFFGeography(da) labels = SimpleLabels(band=band, **da.attrs) diff --git a/src/earthkit/data/field/geotiff/parameter.py b/src/earthkit/data/field/geotiff/parameter.py index a1b79152..fd1b078e 100644 --- a/src/earthkit/data/field/geotiff/parameter.py +++ b/src/earthkit/data/field/geotiff/parameter.py @@ -9,7 +9,7 @@ import logging -from earthkit.data.field.component.parameter import Parameter +from earthkit.data.field.component.parameter import create_parameter from earthkit.data.field.handler.parameter import ParameterFieldComponent LOG = logging.getLogger(__name__) @@ -33,5 +33,5 @@ def __init__(self, owner, selection=None) -> None: # self.owner = owner name = owner.name units = owner.variable.attrs.get("units", None) - spec = Parameter.from_dict(dict(variable=name, units=units)) + spec = create_parameter(dict(variable=name, units=units)) super().__init__(spec) diff --git a/src/earthkit/data/field/grib/parameter.py b/src/earthkit/data/field/grib/parameter.py index c9061d86..ec19aee1 100644 --- a/src/earthkit/data/field/grib/parameter.py +++ b/src/earthkit/data/field/grib/parameter.py @@ -7,18 +7,29 @@ # nor does it submit to any jurisdiction. # +from earthkit.data.field.component.parameter import create_parameter +from earthkit.data.field.handler.parameter import ParameterFieldComponentHandler + from .collector import GribContextCollector from .core import GribFieldComponentHandler class GribParameterBuilder: + """Builder for creating parameter components from GRIB message handles. + + This builder extracts parameter metadata from GRIB messages and creates the appropriate + parameter component subclass (:class:`~earthkit.data.field.component.parameter.Parameter`, + :class:`~earthkit.data.field.component.parameter.ChemicalParameter`, + :class:`~earthkit.data.field.component.parameter.OpticalParameter`, + :class:`~earthkit.data.field.component.parameter.ChemicalOpticalParameter`, or + :class:`~earthkit.data.field.component.parameter.WaveSpectraParameter`) based on the + metadata contents. + """ + @staticmethod def build(handle): - from earthkit.data.field.component.parameter import Parameter - from earthkit.data.field.handler.parameter import ParameterFieldComponentHandler - d = GribParameterBuilder._build_dict(handle) - component = Parameter.from_dict(d) + component = create_parameter(d) handler = ParameterFieldComponentHandler.from_component(component) return handler @@ -27,6 +38,7 @@ def _build_dict(handle): def _get(key, default=None): return handle.get(key, default=default) + # Core metadata keys for identifying the parameter v = _get("shortName", None) if v == "~": v = handle.get("paramId", ktype=str, default=None) @@ -38,25 +50,168 @@ def _get(key, default=None): units = _get("units", None) - chem_name = _get("chemShortName", None) - - return dict( + d = dict( variable=variable, standard_name=standard_name, long_name=long_name, units=units, - chem_variable=chem_name, ) + # Metadata for chemical parameters + if _get("chemId", None) is not None: + # "chemId" is defined for chemical parameters + chem = _get("parameter.chemShortName", None) + # using "parameter.chemShortName" instead of "chemShortName" + # avoids getting "unknown" if this key is not defined + # cf. https://github.com/ecmwf/eccodes/blob/eac2eb507b5b44fcc3d3c58e38/definitions/grib2/parameters.def#L29 + + chem_long_name = _get("chemName", None) + if chem_long_name == "unknown": + chem_long_name = None + + d["chem"] = chem + d["chem_long_name"] = chem_long_name + + # TODO: some of the logic below should be moved to ecCodes + + # Metadata for optical parameters + _wavelength = _get("mars.wavelength", None) + wavelength = None + wavelength_bounds = None + # The logic below follows the "mars.wavelength" key definition: + # https://github.com/ecmwf/eccodes/blob/develop/definitions/mars/mars.wavelength.def + if isinstance(_wavelength, (int, float)): + wavelength = round(_wavelength) + elif isinstance(_wavelength, str): + # expected format is "-" + try: + wlen1, wlen2 = _wavelength.split("-") + wavelength_bounds = round(float(wlen1)), round(float(wlen2)) + wavelength = round((wavelength_bounds[1] + wavelength_bounds[0]) / 2) + except Exception: + pass + + if wavelength is not None: + d["wavelength"] = wavelength + d["wavelength_bounds"] = wavelength_bounds + d["wavelength_units"] = "nm" + + _grib_edition = _get("edition", None) + + def _scale_value(v, scaling_factor): + if _grib_edition == 1: + return float(v / scaling_factor) + elif _grib_edition >= 2: + return float(v * 10 ** (-scaling_factor)) + raise ValueError(f"Unsupported GRIB edition: {_grib_edition}") + + # 2D wave spectra: direction + try: + direction_number = _get("directionNumber", None) + if direction_number is not None: + direction_index = direction_number - 1 # convert to 0-based index + number_of_directions = _get("numberOfDirections", None) + direction_scaling_factor = _get("directionScalingFactor", None) + scaled_directions = _get("scaledDirections", None) + wave_direction = _scale_value(scaled_directions[direction_index], direction_scaling_factor) + + d["wave_direction"] = wave_direction + d["wave_direction_index"] = direction_index + d["wave_direction_units"] = "degree" + + # wave direction bounds + if number_of_directions > 1: + if direction_index > 0: + prev_wave_direction = _scale_value( + scaled_directions[direction_index - 1], direction_scaling_factor + ) + delta = (wave_direction - prev_wave_direction) / 2 + else: + next_wave_direction = _scale_value( + scaled_directions[direction_index + 1], direction_scaling_factor + ) + delta = (next_wave_direction - wave_direction) / 2 + d["wave_direction_bounds"] = (wave_direction - delta, wave_direction + delta) + else: + d["wave_direction_bounds"] = None + except Exception: + pass + + # 2D wave spectra: frequency + try: + frequency_number = _get("frequencyNumber", None) + if frequency_number is not None: + frequency_index = frequency_number - 1 # convert to 0-based index + number_of_frequencies = _get("numberOfFrequencies", None) + frequency_scaling_factor = _get("frequencyScalingFactor", None) + scaled_frequencies = _get("scaledFrequencies", None) + wave_frequency = _scale_value(scaled_frequencies[frequency_index], frequency_scaling_factor) + + d["wave_frequency"] = wave_frequency + d["wave_frequency_index"] = frequency_index + d["wave_frequency_units"] = "s ** -1" + + # wave frequency bounds: frequencies are equally spaced on the log scale + if number_of_frequencies > 1: + if frequency_index > 0: + prev_wave_frequency = _scale_value( + scaled_frequencies[frequency_index - 1], frequency_scaling_factor + ) + factor = (wave_frequency / prev_wave_frequency) ** 0.5 + else: + next_wave_frequency = _scale_value( + scaled_frequencies[frequency_index + 1], frequency_scaling_factor + ) + factor = (next_wave_frequency / wave_frequency) ** 0.5 + d["wave_frequency_bounds"] = (round(wave_frequency / factor, 6), round(wave_frequency * factor, 6)) + else: + d["wave_frequency_bounds"] = None + + except Exception: + pass + + return d + class GribParameterContextCollector(GribContextCollector): + """Collector for extracting GRIB context keys from parameter components. + + Collects the "shortName" key from the parameter component's variable for use + in GRIB encoding context. + """ + @staticmethod def collect_keys(handler, context): component = handler.component r = { "shortName": component.variable(), - # "units": param.units, } + + chem = component.chem() + if chem: + r["chemShortName"] = chem + + # TODO: some of the logic below should be moved to ecCodes + wavelength_bounds = component.wavelength_bounds(units="m") + if wavelength_bounds is not None: + r["firstWavelength"] = wavelength_bounds[0] + r["secondWavelength"] = wavelength_bounds[1] + # see: https://codes.ecmwf.int/grib/format/grib2/ctables/4/91/ + r["typeOfWavelengthInterval"] = 2 # Between first and second limit. + # The range includes the first limit but not the second limit + else: + wavelength = component.wavelength(units="m") + if wavelength is not None: + r["firstWavelength"] = wavelength + + wave_direction_index = component.wave_direction_index() + if wave_direction_index is not None: + r["directionNumber"] = wave_direction_index + 1 # convert to 1-based index + + wave_frequency_index = component.wave_frequency_index() + if wave_frequency_index is not None: + r["frequencyNumber"] = wave_frequency_index + 1 # convert to 1-based index + context.update(r) diff --git a/src/earthkit/data/field/mars/parameter.py b/src/earthkit/data/field/mars/parameter.py index 335cf929..81f4d18e 100644 --- a/src/earthkit/data/field/mars/parameter.py +++ b/src/earthkit/data/field/mars/parameter.py @@ -13,21 +13,29 @@ class MarsParameterBuilder: + """Builder for creating parameter components from MARS requests. + + This builder extracts parameter metadata from MARS request dictionaries and creates + the appropriate parameter component subclass using :func:`create_parameter`. + """ + @staticmethod def build(request, build_empty=False): - from earthkit.data.field.component.parameter import Parameter + from earthkit.data.field.component.parameter import create_parameter from earthkit.data.field.handler.parameter import ParameterFieldComponentHandler d = MarsParameterBuilder._build_dict(request) if not d and not build_empty: return None - component = Parameter.from_dict(d) + component = create_parameter(d) handler = ParameterFieldComponentHandler.from_component(component) return handler @staticmethod def _build_dict(request): + # TODO: add chem and wavelength? + # TODO: chem would require an unaliasing table "grib-chemid.csv" param = request.get("param", None) if param is None: diff --git a/src/earthkit/data/field/xarray/parameter.py b/src/earthkit/data/field/xarray/parameter.py index 3d68e204..07b9f6a2 100644 --- a/src/earthkit/data/field/xarray/parameter.py +++ b/src/earthkit/data/field/xarray/parameter.py @@ -9,7 +9,7 @@ import logging -from earthkit.data.field.component.parameter import Parameter +from earthkit.data.field.component.parameter import create_parameter from earthkit.data.field.handler.parameter import ParameterFieldComponentHandler LOG = logging.getLogger(__name__) @@ -35,5 +35,7 @@ def __init__(self, owner, selection=None) -> None: standard_name = owner.variable.attrs.get("standard_name", "unknown") long_name = owner.variable.attrs.get("long_name", "unknown") units = owner.variable.attrs.get("units", None) - p = Parameter.from_dict(dict(variable=name, standard_name=standard_name, long_name=long_name, units=units)) + # TODO: add "chem", "wavelength", "wave_direction", "wave_frequency" + # would need a similar mechanism to the one in the field/xarray/ensemble.py module + p = create_parameter(dict(variable=name, standard_name=standard_name, long_name=long_name, units=units)) super().__init__(p) diff --git a/src/earthkit/data/xr_engine/dim.py b/src/earthkit/data/xr_engine/dim.py index 7d8dfb3e..1365d550 100644 --- a/src/earthkit/data/xr_engine/dim.py +++ b/src/earthkit/data/xr_engine/dim.py @@ -48,6 +48,11 @@ def _get_metadata_keys(keys): _ENS_KEYS = ["member", "realisation", "realization"] ENS_KEYS = ["member"] + _get_component_keys("ensemble", _ENS_KEYS) + _get_metadata_keys(_GRIB_ENS_KEYS) +CHEM_KEYS = ["chem"] + _get_component_keys("parameter", ["chem"]) +WAVELENGTH_KEYS = ["wavelength"] + _get_component_keys("parameter", ["wavelength"]) +WAVE_DIRECTION_KEYS = ["wave_direction"] + _get_component_keys("parameter", ["wave_direction"]) +WAVE_FREQUENCY_KEYS = ["wave_frequency"] + _get_component_keys("parameter", ["wave_frequency"]) + _GRIB_LEVEL_KEYS = ["level", "levelist", "topLevel", "bottomLevel", "levels"] _VERTICAL_LEVEL_KEYS = ["level", "layer"] LEVEL_KEYS = ["level"] + _get_component_keys("vertical", _VERTICAL_LEVEL_KEYS) + _get_metadata_keys(_GRIB_LEVEL_KEYS) @@ -102,6 +107,10 @@ def _get_metadata_keys(keys): KEYS = ( ENS_KEYS, + CHEM_KEYS, + WAVELENGTH_KEYS, + WAVE_DIRECTION_KEYS, + WAVE_FREQUENCY_KEYS, LEVEL_KEYS, LEVEL_TYPE_KEYS, DATE_KEYS, @@ -295,6 +304,22 @@ class MemberDim(Dim): alias = get_keys(ENS_KEYS) +class ChemDim(Dim): + alias = get_keys(CHEM_KEYS) + + +class WavelengthDim(Dim): + alias = get_keys(WAVELENGTH_KEYS) + + +class WaveDirectionDim(Dim): + alias = get_keys(WAVE_DIRECTION_KEYS) + + +class WaveFrequencyDim(Dim): + alias = get_keys(WAVE_FREQUENCY_KEYS) + + class DateDim(Dim): name = "date" drop = get_keys(DATE_KEYS + DATETIME_KEYS, drop="date") @@ -449,7 +474,20 @@ class OtherDim(Dim): class DimRole: - NAMES = ("member", "date", "time", "step", "level", "level_type", "forecast_reference_time", "valid_time") + NAMES = ( + "member", + "date", + "time", + "step", + "level", + "level_type", + "forecast_reference_time", + "valid_time", + "chem", + "wavelength", + "wave_direction", + "wave_frequency", + ) def __init__(self, d, name_as_key=True): self.d = d @@ -564,21 +602,44 @@ class LevelPerTypeDimMode(LevelAndTypeDimMode): class DimBuilder: + name = None + dim_class = None used = {} ignored = {} + def __init__(self, profile, owner): + key, name = owner.dim_roles.role(self.name) + + dim = self.dim_class(owner, name=name, key=key) + self.used = {dim.name: dim} + def dims(self): return self.used, self.ignored class MemberDimBuilder(DimBuilder): name = "member" + dim_class = MemberDim - def __init__(self, profile, owner): - key, name = owner.dim_roles.role("member") - dim = MemberDim(owner, name=name, key=key) - self.used = {dim.name: dim} +class ChemDimBuilder(DimBuilder): + name = "chem" + dim_class = ChemDim + + +class WavelengthDimBuilder(DimBuilder): + name = "wavelength" + dim_class = WavelengthDim + + +class WaveDirectionDimBuilder(DimBuilder): + name = "wave_direction" + dim_class = WaveDirectionDim + + +class WaveFrequencyDimBuilder(DimBuilder): + name = "wave_frequency" + dim_class = WaveFrequencyDim class TimeDimBuilder(DimBuilder): @@ -685,7 +746,18 @@ def __init__(self, profile, owner): self.ignored.update(_ignored) -DIM_BUILDERS = {v.name: v for v in [MemberDimBuilder, TimeDimBuilder, LevelDimBuilder]} +DIM_BUILDERS = { + v.name: v + for v in [ + MemberDimBuilder, + ChemDimBuilder, + WavelengthDimBuilder, + WaveDirectionDimBuilder, + WaveFrequencyDimBuilder, + TimeDimBuilder, + LevelDimBuilder, + ] +} def ensure_dim_map(d): @@ -1074,6 +1146,10 @@ def rename_dataset_dims(self, dataset): PREDEFINED_DIMS = {} for i, d in enumerate([ MemberDim, + ChemDim, + WavelengthDim, + WaveDirectionDim, + WaveFrequencyDim, ForecastRefTimeDim, DateDim, TimeDim, diff --git a/src/earthkit/data/xr_engine/fieldlist.py b/src/earthkit/data/xr_engine/fieldlist.py index 940b2a0d..35675522 100644 --- a/src/earthkit/data/xr_engine/fieldlist.py +++ b/src/earthkit/data/xr_engine/fieldlist.py @@ -172,7 +172,12 @@ def unique_values(self, names): for k, v in vals.items(): v = [x for x in v if x is not None] - if all(isinstance(x, (int, datetime.timedelta)) for x in v): + if ( + all(isinstance(x, (int, float)) for x in v) + or all(isinstance(x, datetime.date) for x in v) + or all(isinstance(x, datetime.time) for x in v) + or all(isinstance(x, datetime.timedelta) for x in v) + ): vals[k] = sorted(v) else: vals[k] = sorted(v, key=str) diff --git a/src/earthkit/data/xr_engine/profiles/defaults.yaml b/src/earthkit/data/xr_engine/profiles/defaults.yaml index d53f860f..3b23f209 100644 --- a/src/earthkit/data/xr_engine/profiles/defaults.yaml +++ b/src/earthkit/data/xr_engine/profiles/defaults.yaml @@ -47,11 +47,29 @@ dim_roles: time: time.base_time level: vertical.level level_type: vertical.level_type + chem: parameter.chem + wavelength: parameter.wavelength + wave_direction: parameter.wave_direction + wave_frequency: parameter.wave_frequency dim_name_from_role_name: true coord_attrs: member: standard_name: realization long_name: ensemble member id + chem: + long_name: atmospheric chemical or physical constituent type + wavelength: + standard_name: radiation_wavelength + units: nm + long_name: wavelength + wave_direction: # see: https://github.com/wavespectra/wavespectra/blob/main/wavespectra/core/attributes.yml + standard_name: sea_surface_wave_from_direction + units: degrees + long_name: wave direction + wave_frequency: + standard_name: sea_surface_wave_frequency + units: s-1 + long_name: wave frequency latitude: units: degrees_north standard_name: latitude diff --git a/src/earthkit/data/xr_engine/profiles/earthkit.yaml b/src/earthkit/data/xr_engine/profiles/earthkit.yaml index 4b8903ac..9324aecd 100644 --- a/src/earthkit/data/xr_engine/profiles/earthkit.yaml +++ b/src/earthkit/data/xr_engine/profiles/earthkit.yaml @@ -7,6 +7,10 @@ dim_roles: time: time.base_time level: vertical.level level_type: vertical.level_type + chem: parameter.chem + wavelength: parameter.wavelength + wave_direction: parameter.wave_direction + wave_frequency: parameter.wave_frequency variable_key: parameter.variable attrs_mode: fixed variable_attrs: diff --git a/src/earthkit/data/xr_engine/profiles/grib.yaml b/src/earthkit/data/xr_engine/profiles/grib.yaml index 2273b1e6..356e12a7 100644 --- a/src/earthkit/data/xr_engine/profiles/grib.yaml +++ b/src/earthkit/data/xr_engine/profiles/grib.yaml @@ -7,6 +7,10 @@ dim_roles: time: metadata.time level: metadata.level level_type: metadata.typeOfLevel + chem: parameter.chem + wavelength: parameter.wavelength + wave_direction: parameter.wave_direction + wave_frequency: parameter.wave_frequency variable_key: metadata.param attrs_mode: fixed variable_attrs: diff --git a/src/earthkit/data/xr_engine/profiles/mars.yaml b/src/earthkit/data/xr_engine/profiles/mars.yaml index fb004ac5..6362dc15 100644 --- a/src/earthkit/data/xr_engine/profiles/mars.yaml +++ b/src/earthkit/data/xr_engine/profiles/mars.yaml @@ -7,6 +7,10 @@ dim_roles: time: metadata.time level: metadata.levelist level_type: metadata.levtype + chem: parameter.chem + wavelength: parameter.wavelength + wave_direction: parameter.wave_direction + wave_frequency: parameter.wave_frequency variable_key: metadata.param attrs_mode: unique attrs: diff --git a/tests/data/chem-cams.grib b/tests/data/chem-cams.grib new file mode 100644 index 00000000..e0309edc Binary files /dev/null and b/tests/data/chem-cams.grib differ diff --git a/tests/data/optical-cams.grib b/tests/data/optical-cams.grib new file mode 100644 index 00000000..796ad975 Binary files /dev/null and b/tests/data/optical-cams.grib differ diff --git a/tests/data/wave_spectra.grib b/tests/data/wave_spectra.grib new file mode 100644 index 00000000..b59abd2e Binary files /dev/null and b/tests/data/wave_spectra.grib differ diff --git a/tests/field/test_parameter_component.py b/tests/field/test_parameter_component.py index bb741277..ca87221a 100644 --- a/tests/field/test_parameter_component.py +++ b/tests/field/test_parameter_component.py @@ -11,7 +11,14 @@ import pytest -from earthkit.data.field.component.parameter import Parameter +from earthkit.data.field.component.parameter import ( + ChemicalOpticalParameter, + ChemicalParameter, + OpticalParameter, + Parameter, + WaveSpectraParameter, + create_parameter, +) def test_parameter_component_alias_1(): @@ -21,6 +28,19 @@ def test_parameter_component_alias_1(): assert r.units() == "K" assert r.standard_name() is None assert r.long_name() is None + assert r.chem() is None + assert r.chem_long_name() is None + assert r.wavelength() is None + assert r.wavelength_bounds() is None + assert r.wavelength_units() is None + assert r.wave_direction() is None + assert r.wave_direction_index() is None + assert r.wave_direction_bounds() is None + assert r.wave_direction_units() is None + assert r.wave_frequency() is None + assert r.wave_frequency_index() is None + assert r.wave_frequency_bounds() is None + assert r.wave_frequency_units() is None @pytest.mark.parametrize( @@ -40,6 +60,39 @@ def test_parameter_component_alias_1(): "long_name": "Temperature", }, ), + ( + { + "variable": "aod", + "standard_name": "unknown", + "long_name": "Aerosol optical depth", + "units": "Numeric", + "chem": "aer_total", + "chem_long_name": "Total aerosol", + "wavelength": 550, + "wavelength_bounds": None, + "wavelength_units": "nm", + }, + { + "variable": "aod", + "param": "aod", + "standard_name": "unknown", + "long_name": "Aerosol optical depth", + "units": "Numeric", + "chem": "aer_total", + "chem_long_name": "Total aerosol", + "wavelength": 550, + "wavelength_bounds": None, + "wavelength_units": "nm", + "wave_direction": None, + "wave_direction_index": None, + "wave_direction_bounds": None, + "wave_direction_units": None, + "wave_frequency": None, + "wave_frequency_index": None, + "wave_frequency_bounds": None, + "wave_frequency_units": None, + }, + ), ], ) def test_parameter_component_from_dict_ok(input_d, ref): @@ -49,13 +102,35 @@ def test_parameter_component_from_dict_ok(input_d, ref): if isinstance(input_d, list): for d in input_d: - r = Parameter.from_dict(d) + r = create_parameter(d) assert r.variable() == ref["variable"] assert r.param() == ref["param"] assert r.units() == ref["units"] assert r.standard_name() == ref["standard_name"] assert r.long_name() == ref["long_name"] + assert r.chem() == ref.get("chem", None) + assert r.chem_long_name() == ref.get("chem_long_name", None) + assert r.wavelength() == ref.get("wavelength", None) + assert r.wavelength_bounds() == ref.get("wavelength_bounds", None) + if ref.get("wavelength_units") is not None: + assert r.wavelength_units() == ref["wavelength_units"] + else: + assert r.wavelength_units() is None + assert r.wave_direction() == ref.get("wave_direction", None) + assert r.wave_direction_index() == ref.get("wave_direction_index", None) + assert r.wave_direction_bounds() == ref.get("wave_direction_bounds", None) + if ref.get("wave_direction_units") is not None: + assert r.wave_direction_units() == ref["wave_direction_units"] + else: + assert r.wave_direction_units() is None + assert r.wave_frequency() == ref.get("wave_frequency", None) + assert r.wave_frequency_index() == ref.get("wave_frequency_index", None) + assert r.wave_frequency_bounds() == ref.get("wave_frequency_bounds", None) + if ref.get("wave_frequency_units") is not None: + assert r.wave_frequency_units() == ref["wave_frequency_units"] + else: + assert r.wave_frequency_units() is None @pytest.mark.parametrize( @@ -76,6 +151,94 @@ def test_parameter_component_from_dict_ok(input_d, ref): "variable": "t", "param": "t", "units": "K", + "standard_name": None, + "long_name": None, + "chem": None, + "chem_long_name": None, + "wavelength": None, + "wavelength_bounds": None, + "wavelength_units": None, + "wave_direction": None, + "wave_direction_index": None, + "wave_direction_bounds": None, + "wave_direction_units": None, + "wave_frequency": None, + "wave_frequency_index": None, + "wave_frequency_bounds": None, + "wave_frequency_units": None, + }, + ), + ( + [ + { + "param": "aod", + "standard_name": "unknown", + "long_name": "Aerosol optical depth", + "units": "Numeric", + "chem": "aer_total", + "chem_long_name": "Total aerosol", + "wavelength": 550, + "wavelength_bounds": (400, 700), + "wavelength_units": "nm", + } + ], + { + "variable": "aod", + "param": "aod", + "standard_name": "unknown", + "long_name": "Aerosol optical depth", + "units": "Numeric", + "chem": "aer_total", + "chem_long_name": "Total aerosol", + "wavelength": 550, + "wavelength_bounds": (400, 700), + "wavelength_units": "nm", + "wave_direction": None, + "wave_direction_index": None, + "wave_direction_bounds": None, + "wave_direction_units": None, + "wave_frequency": None, + "wave_frequency_index": None, + "wave_frequency_bounds": None, + "wave_frequency_units": None, + }, + ), + ( + [ + { + "variable": "2dfd", + "units": "meter ** 2 * second / radian", + "standard_name": "unknown", + "long_name": "2D wave spectra (single)", + "wave_direction": 5.0, + "wave_direction_index": 0, + "wave_direction_bounds": (0.0, 7.5), + "wave_direction_units": "degrees", + "wave_frequency": 0.034523, + "wave_frequency_index": 1, + "wave_frequency_bounds": (0.03, 0.04), + "wave_frequency_units": "s-1", + } + ], + { + "variable": "2dfd", + "param": "2dfd", + "standard_name": "unknown", + "long_name": "2D wave spectra (single)", + "units": "meter ** 2 * second / radian", + "chem": None, + "chem_long_name": None, + "wavelength": None, + "wavelength_bounds": None, + "wavelength_units": None, + "wave_direction": 5.0, + "wave_direction_index": 0, + "wave_direction_bounds": (0.0, 7.5), + "wave_direction_units": "degrees", + "wave_frequency": 0.034523, + "wave_frequency_index": 1, + "wave_frequency_bounds": (0.03, 0.04), + "wave_frequency_units": "s-1", }, ), ], @@ -92,8 +255,392 @@ def test_parameter_component_set(input_d, ref): for k, v in ref.items(): rv = getattr(r1, k)() - assert rv == v, f"key {k} expected {v} got {rv}" + if v is None: + assert rv is None, f"key {k} expected None got {rv}" + else: + assert rv == v, f"key {k} expected {v} got {rv}" # the original object is unchanged assert r.variable() == "p" assert r.units() == "Pa" + + +def test_parameter_component_wavelength(): + """Test wavelength bounds (wavelength range).""" + p = create_parameter(dict(variable="aod", wavelength_bounds=(400, 700))) + assert p.wavelength_bounds() == (400, 700) + + +def test_parameter_component_create_parameter_regular(): + """Test create_parameter returns a Parameter for regular parameters.""" + p = create_parameter({"variable": "t", "units": "K"}) + assert isinstance(p, Parameter) + assert p.variable() == "t" + assert p.units() == "K" + assert p.chem() is None + assert p.wavelength() is None + assert p.wavelength_bounds() is None + assert p.wavelength_units() is None + assert p.wave_direction() is None + assert p.wave_direction_index() is None + assert p.wave_direction_bounds() is None + assert p.wave_direction_units() is None + assert p.wave_frequency() is None + assert p.wave_frequency_index() is None + assert p.wave_frequency_bounds() is None + assert p.wave_frequency_units() is None + + +def test_parameter_component_create_parameter_chemical(): + """Test create_parameter returns a ChemicalParameter for chemical parameters.""" + p = create_parameter({"variable": "co", "units": "kg/kg", "chem": "carbon_monoxide", "chem_long_name": "CO"}) + assert isinstance(p, ChemicalParameter) + assert p.variable() == "co" + assert p.chem() == "carbon_monoxide" + assert p.chem_long_name() == "CO" + assert p.wavelength() is None + assert p.wavelength_bounds() is None + assert p.wavelength_units() is None + assert p.wave_direction() is None + assert p.wave_direction_units() is None + assert p.wave_frequency_units() is None + + +def test_parameter_component_create_parameter_optical(): + """Test create_parameter returns an OpticalParameter for optical parameters.""" + p = create_parameter({"variable": "aod", "units": "Numeric", "wavelength": 550, "wavelength_units": "nm"}) + assert isinstance(p, OpticalParameter) + assert p.variable() == "aod" + assert p.wavelength() == 550 + assert p.wavelength_bounds() is None + assert p.wavelength_units() == "nm" + assert p.chem() is None + assert p.wave_direction() is None + assert p.wave_direction_units() is None + assert p.wave_frequency_units() is None + + +def test_parameter_component_create_parameter_optical_with_bounds(): + """Test create_parameter returns an OpticalParameter with wavelength_bounds.""" + p = create_parameter({ + "variable": "aod", + "units": "Numeric", + "wavelength": 550, + "wavelength_bounds": (400, 700), + "wavelength_units": "nm", + }) + assert isinstance(p, OpticalParameter) + assert p.wavelength() == 550 + assert p.wavelength_bounds() == (400, 700) + assert p.wavelength_units() == "nm" + + +def test_parameter_component_create_parameter_chemical_optical(): + """Test create_parameter returns a ChemicalOpticalParameter for chemical-optical parameters.""" + p = create_parameter({ + "variable": "aod", + "units": "Numeric", + "chem": "aer_total", + "chem_long_name": "Total aerosol", + "wavelength": 550, + "wavelength_bounds": (400, 700), + "wavelength_units": "nm", + }) + assert isinstance(p, ChemicalOpticalParameter) + assert p.variable() == "aod" + assert p.chem() == "aer_total" + assert p.chem_long_name() == "Total aerosol" + assert p.wavelength() == 550 + assert p.wavelength_bounds() == (400, 700) + assert p.wavelength_units() == "nm" + assert p.wave_direction() is None + assert p.wave_direction_units() is None + assert p.wave_frequency_units() is None + + +def test_parameter_component_create_parameter_wave_spectra(): + """Test create_parameter returns a WaveSpectraParameter for wave spectra parameters.""" + p = create_parameter({ + "variable": "2dfd", + "units": "m**2 s / rad", + "wave_direction": 5.0, + "wave_direction_index": 0, + "wave_direction_bounds": (0.0, 7.5), + "wave_direction_units": "degrees", + "wave_frequency": 0.034523, + "wave_frequency_index": 1, + "wave_frequency_bounds": (0.03, 0.04), + "wave_frequency_units": "s-1", + }) + assert isinstance(p, WaveSpectraParameter) + assert p.variable() == "2dfd" + assert p.wave_direction() == 5.0 + assert p.wave_direction_index() == 0 + assert p.wave_direction_bounds() == (0.0, 7.5) + assert p.wave_direction_units() == "degrees" + assert p.wave_frequency() == 0.034523 + assert p.wave_frequency_index() == 1 + assert p.wave_frequency_bounds() == (0.03, 0.04) + assert p.wave_frequency_units() == "s-1" + assert p.chem() is None + assert p.wavelength() is None + assert p.wavelength_units() is None + + +def test_parameter_component_set_changes_type(): + """Test that set() can change the parameter type.""" + p = Parameter(variable="t", units="K") + assert isinstance(p, Parameter) + + # Add chem -> becomes ChemicalParameter + p2 = p.set(variable="co", chem="carbon_monoxide") + assert isinstance(p2, ChemicalParameter) + assert p2.chem() == "carbon_monoxide" + + # Add wavelength to chem -> becomes ChemicalOpticalParameter + p3 = p2.set(wavelength=550, wavelength_units="nm") + assert isinstance(p3, ChemicalOpticalParameter) + assert p3.chem() == "carbon_monoxide" + assert p3.wavelength() == 550 + assert p3.wavelength_units() == "nm" + + +def test_parameter_component_inheritance(): + """Test that subclasses have the correct inheritance relationships.""" + cp = ChemicalParameter(variable="co", chem="co") + op = OpticalParameter(variable="aod", wavelength=550, wavelength_units="nm") + cop = ChemicalOpticalParameter(variable="aod", chem="aer", wavelength=550, wavelength_units="nm") + wp = WaveSpectraParameter( + variable="2dfd", + wave_direction=5.0, + wave_direction_units="degrees", + wave_frequency_units="s-1", + ) + + # All are instances of Parameter + assert isinstance(cp, Parameter) + assert isinstance(op, Parameter) + assert isinstance(cop, Parameter) + assert isinstance(wp, Parameter) + + # ChemicalOpticalParameter is both Chemical and Optical + assert isinstance(cop, ChemicalParameter) + assert isinstance(cop, OpticalParameter) + + # But not cross-contaminated + assert not isinstance(cp, OpticalParameter) + assert not isinstance(op, ChemicalParameter) + assert not isinstance(wp, ChemicalParameter) + assert not isinstance(wp, OpticalParameter) + + +def test_parameter_component_optical_units(): + """Test that OpticalParameter stores wavelength_units as a Units object.""" + p = OpticalParameter(variable="aod", wavelength=550, wavelength_units="nm") + assert p.wavelength_units() == "nm" + assert p.wavelength_units() == "nanometer" + + # get() access + assert p.get("wavelength_units") == "nm" + + +def test_parameter_component_wave_spectra_units(): + """Test that WaveSpectraParameter stores wave direction and frequency units.""" + p = WaveSpectraParameter( + variable="2dfd", + wave_direction=15.0, + wave_direction_index=1, + wave_direction_bounds=(7.5, 22.5), + wave_direction_units="degrees", + wave_frequency=0.05, + wave_frequency_index=2, + wave_frequency_bounds=(0.04, 0.06), + wave_frequency_units="s-1", + ) + assert p.wave_direction_units() == "degrees" + assert p.wave_frequency_units() == "s-1" + assert p.wave_frequency_units() == "1 / second" + + # get() access + assert p.get("wave_direction_units") == "degrees" + assert p.get("wave_frequency_units") == "s-1" + + +def test_parameter_component_to_dict_optical(): + """Test to_dict includes wavelength_units for OpticalParameter.""" + p = OpticalParameter( + variable="aod", + wavelength=550, + wavelength_bounds=(400, 700), + wavelength_units="nm", + ) + d = p.to_dict() + assert d["wavelength"] == 550 + assert d["wavelength_bounds"] == (400, 700) + assert d["wavelength_units"] == "nanometer" + + +def test_parameter_component_to_dict_wave_spectra(): + """Test to_dict includes direction/frequency units for WaveSpectraParameter.""" + p = WaveSpectraParameter( + variable="2dfd", + wave_direction=15.0, + wave_direction_index=1, + wave_direction_bounds=(7.5, 22.5), + wave_direction_units="degrees", + wave_frequency=0.05, + wave_frequency_index=2, + wave_frequency_bounds=(0.04, 0.06), + wave_frequency_units="s-1", + ) + d = p.to_dict() + assert d["wave_direction"] == 15.0 + assert d["wave_direction_index"] == 1 + assert d["wave_direction_bounds"] == (7.5, 22.5) + assert d["wave_direction_units"] == "degree" + assert d["wave_frequency"] == 0.05 + assert d["wave_frequency_index"] == 2 + assert d["wave_frequency_bounds"] == (0.04, 0.06) + assert d["wave_frequency_units"] == "1 / second" + + +def test_parameter_component_to_dict_chemical_optical(): + """Test to_dict includes wavelength_units for ChemicalOpticalParameter.""" + p = ChemicalOpticalParameter( + variable="aod", + chem="aer_total", + chem_long_name="Total aerosol", + wavelength=550, + wavelength_bounds=(400, 700), + wavelength_units="nm", + ) + d = p.to_dict() + assert d["chem"] == "aer_total" + assert d["chem_long_name"] == "Total aerosol" + assert d["wavelength"] == 550 + assert d["wavelength_bounds"] == (400, 700) + assert d["wavelength_units"] == "nanometer" + + +def test_parameter_component_wavelength_unit_conversion(): + """Test wavelength conversion to different units.""" + import math + + p = OpticalParameter(variable="aod", wavelength=550, wavelength_bounds=(400, 700), wavelength_units="nm") + + # Native units: int + assert p.wavelength() == 550 + assert isinstance(p.wavelength(), int) + assert p.wavelength_bounds() == (400, 700) + assert isinstance(p.wavelength_bounds()[0], int) + + # Convert to micrometers + wl = p.wavelength(units="um") + assert isinstance(wl, float) + assert math.isclose(wl, 0.55, rel_tol=1e-9) + + wb = p.wavelength_bounds(units="um") + assert isinstance(wb[0], float) + assert isinstance(wb[1], float) + assert math.isclose(wb[0], 0.4, rel_tol=1e-9) + assert math.isclose(wb[1], 0.7, rel_tol=1e-9) + + # Convert to meters + wl_m = p.wavelength(units="m") + assert math.isclose(wl_m, 550e-9, rel_tol=1e-9) + + # None wavelength stays None + p2 = OpticalParameter(variable="aod", wavelength_units="nm") + assert p2.wavelength(units="um") is None + assert p2.wavelength_bounds(units="um") is None + + +def test_parameter_component_wave_direction_unit_conversion(): + """Test wave direction conversion to different units.""" + import math + + p = WaveSpectraParameter( + variable="2dfd", + wave_direction=180.0, + wave_direction_bounds=(170.0, 190.0), + wave_direction_units="degrees", + ) + + # Native units: float + assert p.wave_direction() == 180.0 + assert isinstance(p.wave_direction(), float) + assert p.wave_direction_bounds() == (170.0, 190.0) + + # Convert to radians + wd = p.wave_direction(units="radian") + assert isinstance(wd, float) + assert math.isclose(wd, math.pi, rel_tol=1e-6) + + wdb = p.wave_direction_bounds(units="radian") + assert isinstance(wdb[0], float) + assert math.isclose(wdb[0], math.radians(170.0), rel_tol=1e-6) + assert math.isclose(wdb[1], math.radians(190.0), rel_tol=1e-6) + + # None stays None + p2 = WaveSpectraParameter(variable="2dfd", wave_direction_units="degrees") + assert p2.wave_direction(units="radian") is None + assert p2.wave_direction_bounds(units="radian") is None + + +def test_parameter_component_wave_frequency_unit_conversion(): + """Test wave frequency conversion to different units.""" + import math + + p = WaveSpectraParameter( + variable="2dfd", + wave_frequency=0.05, + wave_frequency_bounds=(0.04, 0.06), + wave_frequency_units="s-1", + ) + + # Native units: float + assert p.wave_frequency() == 0.05 + assert isinstance(p.wave_frequency(), float) + assert p.wave_frequency_bounds() == (0.04, 0.06) + + # Convert to Hz (same dimensionality, should be identity) + wf = p.wave_frequency(units="Hz") + assert isinstance(wf, float) + assert math.isclose(wf, 0.05, rel_tol=1e-9) + + # Convert bounds + wfb = p.wave_frequency_bounds(units="Hz") + assert isinstance(wfb[0], float) + assert math.isclose(wfb[0], 0.04, rel_tol=1e-9) + assert math.isclose(wfb[1], 0.06, rel_tol=1e-9) + + # None stays None + p2 = WaveSpectraParameter(variable="2dfd", wave_frequency_units="s-1") + assert p2.wave_frequency(units="Hz") is None + assert p2.wave_frequency_bounds(units="Hz") is None + + +def test_parameter_component_chemical_optical_wavelength_conversion(): + """Test wavelength conversion works through ChemicalOpticalParameter.""" + import math + + p = ChemicalOpticalParameter( + variable="aod", + chem="aer_total", + wavelength=550, + wavelength_bounds=(400, 700), + wavelength_units="nm", + ) + + # Native: int + assert p.wavelength() == 550 + assert isinstance(p.wavelength(), int) + + # Converted: float + wl = p.wavelength(units="um") + assert isinstance(wl, float) + assert math.isclose(wl, 0.55, rel_tol=1e-9) + + wb = p.wavelength_bounds(units="um") + assert math.isclose(wb[0], 0.4, rel_tol=1e-9) + assert math.isclose(wb[1], 0.7, rel_tol=1e-9) diff --git a/tests/grib/test_grib_parameter.py b/tests/grib/test_grib_parameter.py index 55b50eff..f752ff0c 100644 --- a/tests/grib/test_grib_parameter.py +++ b/tests/grib/test_grib_parameter.py @@ -9,6 +9,7 @@ # nor does it submit to any jurisdiction. # +import math import pytest from grib_fixtures import ( @@ -63,5 +64,186 @@ def test_grib_parameter_chem(fl_type): assert f.parameter.variable() == "tcvimd" assert f.parameter.param() == "tcvimd" - assert f.parameter.chem_variable() == "CO" + assert f.parameter.chem() == "CO" assert f.parameter.units() == "kg m**-2" + + +@pytest.mark.parametrize("fl_type", FL_FILE) +def test_grib_parameter_chem_long_name(fl_type): + """Test chem_long_name extraction from CAMS chemistry GRIB2 data.""" + ds, _ = load_grib_data("chem-cams.grib", fl_type, folder="data") + + expected = [ + ("mass_mixrat", "CO", "Carbon monoxide"), + ("mass_mixrat", "HCHO", "Formaldehyde"), + ("mass_mixrat", "O3", "Ozone"), + ] + + assert len(ds) == 3 + for i, (var, chem, chem_long) in enumerate(expected): + f = ds[i] + assert f.parameter.variable() == var + assert f.parameter.param() == var + assert f.parameter.units() == "dimensionless" + assert f.parameter.chem() == chem + assert f.parameter.chem_long_name() == chem_long + assert f.parameter.wavelength() is None + assert f.parameter.wavelength_bounds() is None + assert f.parameter.wavelength_units() is None + assert f.parameter.wave_direction() is None + assert f.parameter.wave_direction_index() is None + assert f.parameter.wave_direction_bounds() is None + assert f.parameter.wave_direction_units() is None + assert f.parameter.wave_frequency() is None + assert f.parameter.wave_frequency_index() is None + assert f.parameter.wave_frequency_bounds() is None + assert f.parameter.wave_frequency_units() is None + + +@pytest.mark.parametrize("fl_type", FL_FILE) +def test_grib_parameter_wavelength(fl_type): + """Test wavelength extraction from CAMS optical GRIB2 data.""" + ds, _ = load_grib_data("optical-cams.grib", fl_type, folder="data") + + assert len(ds) == 4 + # All fields have aod variable with wavelength 550 or 800 + for f in ds: + assert f.parameter.wavelength() in (550, 800) + assert isinstance(f.parameter.wavelength(), int) + assert f.parameter.wavelength_units() == "nm" + + result = ds.unique("parameter.wavelength") + assert set(result["parameter.wavelength"]) == {550, 800} + + +@pytest.mark.parametrize("fl_type", FL_FILE) +def test_grib_parameter_wavelength_conversion(fl_type): + """Test wavelength unit conversion from CAMS optical GRIB2 data.""" + ds, _ = load_grib_data("optical-cams.grib", fl_type, folder="data") + + for f in ds: + wl_nm = f.parameter.wavelength() + wl_um = f.parameter.wavelength(units="um") + assert isinstance(wl_um, float) + assert math.isclose(wl_um, wl_nm / 1000.0, rel_tol=1e-9) + + # wavelength_bounds may be None (single wavelength) + wb = f.parameter.wavelength_bounds() + if wb is not None: + wb_um = f.parameter.wavelength_bounds(units="um") + assert isinstance(wb_um[0], float) + assert math.isclose(wb_um[0], wb[0] / 1000.0, rel_tol=1e-9) + assert math.isclose(wb_um[1], wb[1] / 1000.0, rel_tol=1e-9) + + +@pytest.mark.parametrize("fl_type", FL_FILE) +def test_grib_parameter_wave_direction(fl_type): + """Test wave_direction extraction from 2D wave spectra GRIB data.""" + ds, _ = load_grib_data("wave_spectra.grib", fl_type, folder="data") + + assert len(ds) == 18 + # All fields should have non-None wave_direction + for f in ds: + assert f.parameter.wave_direction() is not None + assert isinstance(f.parameter.wave_direction(), float) + assert f.parameter.wave_direction_units() == "degrees" + assert f.parameter.wave_direction_index() is not None + assert isinstance(f.parameter.wave_direction_index(), int) + assert f.parameter.wave_direction_index() >= 0 + + result = ds.unique("parameter.wave_direction") + assert set(result["parameter.wave_direction"]) == {55.0, 115.0, 175.0, 235.0, 295.0, 355.0} + + +@pytest.mark.parametrize("fl_type", FL_FILE) +def test_grib_parameter_wave_direction_bounds(fl_type): + """Test wave_direction_bounds extraction from 2D wave spectra GRIB data.""" + ds, _ = load_grib_data("wave_spectra.grib", fl_type, folder="data") + + for f in ds: + bounds = f.parameter.wave_direction_bounds() + if bounds is not None: + assert isinstance(bounds, tuple) + assert len(bounds) == 2 + assert isinstance(bounds[0], float) + assert isinstance(bounds[1], float) + # bounds should bracket the direction value + direction = f.parameter.wave_direction() + assert bounds[0] <= direction <= bounds[1] + + +@pytest.mark.parametrize("fl_type", FL_FILE) +def test_grib_parameter_wave_direction_conversion(fl_type): + """Test wave direction unit conversion from 2D wave spectra GRIB data.""" + ds, _ = load_grib_data("wave_spectra.grib", fl_type, folder="data") + + f = ds[0] + wd_deg = f.parameter.wave_direction() + wd_rad = f.parameter.wave_direction(units="radian") + assert isinstance(wd_rad, float) + assert math.isclose(wd_rad, math.radians(wd_deg), rel_tol=1e-6) + + bounds = f.parameter.wave_direction_bounds() + if bounds is not None: + bounds_rad = f.parameter.wave_direction_bounds(units="radian") + assert isinstance(bounds_rad[0], float) + assert math.isclose(bounds_rad[0], math.radians(bounds[0]), rel_tol=1e-6) + assert math.isclose(bounds_rad[1], math.radians(bounds[1]), rel_tol=1e-6) + + +@pytest.mark.parametrize("fl_type", FL_FILE) +def test_grib_parameter_wave_frequency(fl_type): + """Test wave_frequency extraction from 2D wave spectra GRIB data.""" + ds, _ = load_grib_data("wave_spectra.grib", fl_type, folder="data") + + result = ds.unique("parameter.wave_frequency") + freqs = result["parameter.wave_frequency"] + assert len(freqs) == 3 + # Check approximate values + assert abs(freqs[0] - 0.034523) < 0.001 + assert abs(freqs[1] - 0.1311) < 0.001 + assert abs(freqs[2] - 0.497852) < 0.001 + + # Check units and index + for f in ds: + assert f.parameter.wave_frequency_units() == "s-1" + assert f.parameter.wave_frequency_index() is not None + assert isinstance(f.parameter.wave_frequency_index(), int) + assert f.parameter.wave_frequency_index() >= 0 + + +@pytest.mark.parametrize("fl_type", FL_FILE) +def test_grib_parameter_wave_frequency_bounds(fl_type): + """Test wave_frequency_bounds extraction from 2D wave spectra GRIB data.""" + ds, _ = load_grib_data("wave_spectra.grib", fl_type, folder="data") + + for f in ds: + bounds = f.parameter.wave_frequency_bounds() + if bounds is not None: + assert isinstance(bounds, tuple) + assert len(bounds) == 2 + assert isinstance(bounds[0], float) + assert isinstance(bounds[1], float) + # bounds should bracket the frequency value + freq = f.parameter.wave_frequency() + assert bounds[0] <= freq <= bounds[1] + + +@pytest.mark.parametrize("fl_type", FL_FILE) +def test_grib_parameter_wave_frequency_conversion(fl_type): + """Test wave frequency unit conversion from 2D wave spectra GRIB data.""" + ds, _ = load_grib_data("wave_spectra.grib", fl_type, folder="data") + + f = ds[0] + wf = f.parameter.wave_frequency() + # s-1 and Hz are the same unit, so conversion should be identity + wf_hz = f.parameter.wave_frequency(units="Hz") + assert isinstance(wf_hz, float) + assert math.isclose(wf_hz, wf, rel_tol=1e-9) + + bounds = f.parameter.wave_frequency_bounds() + if bounds is not None: + bounds_hz = f.parameter.wave_frequency_bounds(units="Hz") + assert isinstance(bounds_hz[0], float) + assert math.isclose(bounds_hz[0], bounds[0], rel_tol=1e-9) + assert math.isclose(bounds_hz[1], bounds[1], rel_tol=1e-9) diff --git a/tests/grib/test_grib_parameter_context_collector.py b/tests/grib/test_grib_parameter_context_collector.py new file mode 100644 index 00000000..7cebb66b --- /dev/null +++ b/tests/grib/test_grib_parameter_context_collector.py @@ -0,0 +1,314 @@ +#!/usr/bin/env python3 + +# (C) Copyright 2020 ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. +# + +"""Tests for GribParameterContextCollector.collect_keys covering +chem, wavelength, wave_direction_index, and wave_frequency_index context keys. +""" + +import pytest +from grib_fixtures import FL_FILE, load_grib_data # noqa: E402 + +from earthkit.data.field.component.parameter import ( + ChemicalOpticalParameter, + ChemicalParameter, + OpticalParameter, + Parameter, + WaveSpectraParameter, +) +from earthkit.data.field.grib.parameter import GribParameterContextCollector +from earthkit.data.field.handler.parameter import ParameterFieldComponentHandler + + +def _make_handler(component): + """Helper to wrap a parameter component in a handler.""" + return ParameterFieldComponentHandler(component) + + +def _collect(handler): + """Run collect_keys and return the resulting context dict.""" + context = {} + GribParameterContextCollector.collect_keys(handler, context) + return context + + +# -------------------------------------------------------------------------- +# Unit tests using constructed components (no file I/O) +# -------------------------------------------------------------------------- + + +class TestCollectKeysBasicParameter: + """Test collect_keys with a basic Parameter (no chem, wavelength, or wave).""" + + def test_shortname_only(self): + comp = Parameter(variable="t") + ctx = _collect(_make_handler(comp)) + assert ctx == {"shortName": "t"} + + def test_no_chem_key(self): + comp = Parameter(variable="msl", units="Pa") + ctx = _collect(_make_handler(comp)) + assert "chemShortName" not in ctx + + def test_no_wavelength_keys(self): + comp = Parameter(variable="msl") + ctx = _collect(_make_handler(comp)) + assert "firstWavelength" not in ctx + assert "secondWavelength" not in ctx + + def test_no_direction_frequency_keys(self): + comp = Parameter(variable="msl") + ctx = _collect(_make_handler(comp)) + assert "directionNumber" not in ctx + assert "frequencyNumber" not in ctx + + +class TestCollectKeysChemParameter: + """Test collect_keys with ChemicalParameter.""" + + def test_chem_short_name_set(self): + comp = ChemicalParameter(variable="tcvimd", chem="CO") + ctx = _collect(_make_handler(comp)) + assert ctx["shortName"] == "tcvimd" + assert ctx["chemShortName"] == "CO" + + def test_chem_with_long_name(self): + comp = ChemicalParameter(variable="mass_mixrat", chem="O3", chem_long_name="Ozone") + ctx = _collect(_make_handler(comp)) + assert ctx["chemShortName"] == "O3" + # chem_long_name is not collected into context + assert "chemLongName" not in ctx + + def test_chem_none_not_set(self): + """When chem is None, chemShortName should not appear in context.""" + comp = ChemicalParameter(variable="foo", chem=None) + ctx = _collect(_make_handler(comp)) + assert "chemShortName" not in ctx + + def test_chem_empty_string_not_set(self): + """When chem is empty string (falsy), chemShortName should not appear.""" + comp = ChemicalParameter(variable="foo", chem="") + ctx = _collect(_make_handler(comp)) + assert "chemShortName" not in ctx + + +class TestCollectKeysOpticalParameter: + """Test collect_keys with OpticalParameter.""" + + def test_wavelength_no_bounds(self): + """Single wavelength (no bounds) → only firstWavelength set, converted to metres.""" + # wavelength=550 nm → 550e-9 m + comp = OpticalParameter(variable="aod", wavelength=550, wavelength_units="nm") + ctx = _collect(_make_handler(comp)) + assert "firstWavelength" in ctx + assert "secondWavelength" not in ctx + # 550 nm = 5.5e-7 m + assert abs(ctx["firstWavelength"] - 5.5e-7) < 1e-12 + + def test_wavelength_with_bounds(self): + """When wavelength_bounds are present, firstWavelength and secondWavelength set.""" + comp = OpticalParameter( + variable="aod", + wavelength=625, + wavelength_bounds=(500, 750), + wavelength_units="nm", + ) + ctx = _collect(_make_handler(comp)) + assert "firstWavelength" in ctx + assert "secondWavelength" in ctx + # 500 nm = 5e-7 m + assert abs(ctx["firstWavelength"] - 5e-7) < 1e-12 + # 750 nm = 7.5e-7 m + assert abs(ctx["secondWavelength"] - 7.5e-7) < 1e-12 + + def test_no_direction_frequency_for_optical(self): + """Optical parameters should not set direction/frequency keys.""" + comp = OpticalParameter(variable="aod", wavelength=550, wavelength_units="nm") + ctx = _collect(_make_handler(comp)) + assert "directionNumber" not in ctx + assert "frequencyNumber" not in ctx + + +class TestCollectKeysChemOpticalParameter: + """Test collect_keys with ChemicalOpticalParameter.""" + + def test_both_chem_and_wavelength(self): + comp = ChemicalOpticalParameter( + variable="aod", + chem="SO4", + wavelength=550, + wavelength_bounds=(400, 700), + wavelength_units="nm", + ) + ctx = _collect(_make_handler(comp)) + assert ctx["shortName"] == "aod" + assert ctx["chemShortName"] == "SO4" + assert "firstWavelength" in ctx + assert "secondWavelength" in ctx + assert abs(ctx["firstWavelength"] - 4e-7) < 1e-12 + assert abs(ctx["secondWavelength"] - 7e-7) < 1e-12 + + def test_chem_optical_no_bounds(self): + comp = ChemicalOpticalParameter( + variable="aod", + chem="dust", + wavelength=800, + wavelength_units="nm", + ) + ctx = _collect(_make_handler(comp)) + assert ctx["chemShortName"] == "dust" + assert "firstWavelength" in ctx + assert "secondWavelength" not in ctx + assert abs(ctx["firstWavelength"] - 8e-7) < 1e-12 + + +class TestCollectKeysWaveSpectraParameter: + """Test collect_keys with WaveSpectraParameter.""" + + def test_direction_index_converted_to_1_based(self): + """wave_direction_index is 0-based, directionNumber should be 1-based.""" + comp = WaveSpectraParameter( + variable="2dfd", + wave_direction=55.0, + wave_direction_index=0, + wave_direction_units="degree", + wave_frequency=0.035, + wave_frequency_index=0, + wave_frequency_units="s ** -1", + ) + ctx = _collect(_make_handler(comp)) + assert ctx["directionNumber"] == 1 + + def test_frequency_index_converted_to_1_based(self): + """wave_frequency_index is 0-based, frequencyNumber should be 1-based.""" + comp = WaveSpectraParameter( + variable="2dfd", + wave_direction=115.0, + wave_direction_index=2, + wave_direction_units="degree", + wave_frequency=0.131, + wave_frequency_index=5, + wave_frequency_units="s ** -1", + ) + ctx = _collect(_make_handler(comp)) + assert ctx["directionNumber"] == 3 + assert ctx["frequencyNumber"] == 6 + + def test_direction_only(self): + """If only direction index is set, only directionNumber appears.""" + comp = WaveSpectraParameter( + variable="2dfd", + wave_direction=175.0, + wave_direction_index=4, + wave_direction_units="degree", + ) + ctx = _collect(_make_handler(comp)) + assert ctx["directionNumber"] == 5 + assert "frequencyNumber" not in ctx + + def test_frequency_only(self): + """If only frequency index is set, only frequencyNumber appears.""" + comp = WaveSpectraParameter( + variable="2dfd", + wave_frequency=0.5, + wave_frequency_index=9, + wave_frequency_units="s ** -1", + ) + ctx = _collect(_make_handler(comp)) + assert "directionNumber" not in ctx + assert ctx["frequencyNumber"] == 10 + + def test_no_chem_or_wavelength_for_wave(self): + """Wave spectra params should not set chem/wavelength keys.""" + comp = WaveSpectraParameter( + variable="2dfd", + wave_direction=55.0, + wave_direction_index=0, + wave_direction_units="degree", + wave_frequency=0.035, + wave_frequency_index=0, + wave_frequency_units="s ** -1", + ) + ctx = _collect(_make_handler(comp)) + assert "chemShortName" not in ctx + assert "firstWavelength" not in ctx + assert "secondWavelength" not in ctx + + +# -------------------------------------------------------------------------- +# Integration tests using real GRIB data files +# -------------------------------------------------------------------------- + + +@pytest.mark.parametrize("fl_type", FL_FILE) +class TestCollectKeysFromChemGrib: + """Integration: context collection from chem GRIB files.""" + + def test_chem_context(self, fl_type): + ds, _ = load_grib_data("chem-cams.grib", fl_type, folder="data") + expected_chems = ["CO", "HCHO", "O3"] + for i, chem in enumerate(expected_chems): + f = ds[i] + handler = f._components["parameter"] + ctx = _collect(handler) + assert ctx["shortName"] == f.parameter.variable() + assert ctx["chemShortName"] == chem + assert "firstWavelength" not in ctx + assert "secondWavelength" not in ctx + assert "directionNumber" not in ctx + assert "frequencyNumber" not in ctx + + +@pytest.mark.parametrize("fl_type", FL_FILE) +class TestCollectKeysFromOpticalGrib: + """Integration: context collection from optical GRIB files.""" + + def test_optical_context_wavelength(self, fl_type): + ds, _ = load_grib_data("optical-cams.grib", fl_type, folder="data") + for f in ds: + handler = f._components["parameter"] + ctx = _collect(handler) + assert "firstWavelength" in ctx + # wavelength in metres + wl_m = f.parameter.wavelength(units="m") + assert abs(ctx["firstWavelength"] - wl_m) < 1e-15 + assert "directionNumber" not in ctx + assert "frequencyNumber" not in ctx + + +@pytest.mark.parametrize("fl_type", FL_FILE) +class TestCollectKeysFromWaveSpectraGrib: + """Integration: context collection from wave spectra GRIB files.""" + + def test_wave_direction_number(self, fl_type): + ds, _ = load_grib_data("wave_spectra.grib", fl_type, folder="data") + for f in ds: + handler = f._components["parameter"] + ctx = _collect(handler) + # directionNumber should be 1-based + expected = f.parameter.wave_direction_index() + 1 + assert ctx["directionNumber"] == expected + + def test_wave_frequency_number(self, fl_type): + ds, _ = load_grib_data("wave_spectra.grib", fl_type, folder="data") + for f in ds: + handler = f._components["parameter"] + ctx = _collect(handler) + # frequencyNumber should be 1-based + expected = f.parameter.wave_frequency_index() + 1 + assert ctx["frequencyNumber"] == expected + + def test_wave_no_chem_or_wavelength(self, fl_type): + ds, _ = load_grib_data("wave_spectra.grib", fl_type, folder="data") + f = ds[0] + handler = f._components["parameter"] + ctx = _collect(handler) + assert "chemShortName" not in ctx + assert "secondWavelength" not in ctx diff --git a/tests/grib/test_grib_set_parameter.py b/tests/grib/test_grib_set_parameter.py index 3ef96daa..1d7f562b 100644 --- a/tests/grib/test_grib_set_parameter.py +++ b/tests/grib/test_grib_set_parameter.py @@ -83,3 +83,156 @@ def test_grib_set_parameter_2( assert f.get("metadata.shortName") is None assert f.get("parameter.units") == "kg/kg" assert f.get("metadata.units") is None + + +# -------------------------------------------------------------------------- +# Round-trip tests for GribParameterContextCollector keys +# -------------------------------------------------------------------------- + + +@pytest.mark.parametrize("fl_type", ["file"]) +def test_grib_set_parameter_chem_roundtrip(fl_type): + """Set parameter.chem, write GRIB, read back and verify chemShortName.""" + ds_ori, _ = load_grib_data("chem-cams.grib", fl_type, folder="data") + f = ds_ori[0] + + # Change chem from CO to SO2 + f2 = f.set({"parameter.chem": "SO2"}) + assert f2.get("parameter.chem") == "SO2" + assert f2.get("parameter.variable") == "mass_mixrat" + + with temp_file() as tmp: + f2.to_target("file", tmp) + f_saved = from_source("file", tmp).to_fieldlist() + assert len(f_saved) == 1 + assert f_saved[0].get("parameter.variable") == "mass_mixrat" + assert f_saved[0].get("parameter.chem") == "SO2" + assert f_saved[0].get("metadata.shortName") == "mass_mixrat" + assert f_saved[0].get("metadata.chemShortName") == "SO2" + + +@pytest.mark.parametrize("fl_type", ["file"]) +def test_grib_set_parameter_chem_variable_roundtrip(fl_type): + """Set both parameter.variable and parameter.chem, write GRIB, read back.""" + ds_ori, _ = load_grib_data("chem-cams.grib", fl_type, folder="data") + f = ds_ori[0] + + f2 = f.set({"parameter.variable": "mass_mixrat", "parameter.chem": "HCHO"}) + assert f2.get("parameter.chem") == "HCHO" + + with temp_file() as tmp: + f2.to_target("file", tmp) + f_saved = from_source("file", tmp).to_fieldlist() + assert len(f_saved) == 1 + assert f_saved[0].get("parameter.variable") == "mass_mixrat" + assert f_saved[0].get("parameter.chem") == "HCHO" + assert f_saved[0].get("metadata.chemShortName") == "HCHO" + + +@pytest.mark.parametrize("fl_type", ["file"]) +def test_grib_set_parameter_wavelength_roundtrip(fl_type): + """Set parameter.wavelength (single value), write GRIB, read back.""" + ds_ori, _ = load_grib_data("optical-cams.grib", fl_type, folder="data") + f = ds_ori[0] + + # Change wavelength from 550 to 800 nm + f2 = f.set({"parameter.wavelength": 800, "parameter.wavelength_units": "nm"}) + assert f2.get("parameter.wavelength") == 800 + + with temp_file() as tmp: + f2.to_target("file", tmp) + f_saved = from_source("file", tmp).to_fieldlist() + assert len(f_saved) == 1 + assert f_saved[0].get("parameter.wavelength") == 800 + assert f_saved[0].get("parameter.variable") == "aod" + assert f_saved[0].get("metadata.shortName") == "aod" + + +@pytest.mark.parametrize("fl_type", ["file"]) +def test_grib_set_parameter_wavelength_bounds_roundtrip(fl_type): + """Set parameter.wavelength_bounds, write GRIB, verify raw GRIB keys.""" + ds_ori, _ = load_grib_data("optical-cams.grib", fl_type, folder="data") + f = ds_ori[0] + + # Set wavelength with bounds + f2 = f.set({ + "parameter.wavelength": 625, + "parameter.wavelength_bounds": (500, 750), + "parameter.wavelength_units": "nm", + }) + assert f2.get("parameter.wavelength") == 625 + assert f2.get("parameter.wavelength_bounds") == (500, 750) + + with temp_file() as tmp: + f2.to_target("file", tmp) + f_saved = from_source("file", tmp).to_fieldlist() + assert len(f_saved) == 1 + assert f_saved[0].get("parameter.wavelength") == 625 + assert f_saved[0].get("parameter.wavelength_bounds") == (500, 750) + + +@pytest.mark.parametrize("fl_type", ["file"]) +def test_grib_set_parameter_wave_direction_roundtrip(fl_type): + """Set parameter.wave_direction_index, write GRIB, read back as 1-based directionNumber.""" + ds_ori, _ = load_grib_data("wave_spectra.grib", fl_type, folder="data") + f = ds_ori[0] + + # Set a new direction index (0-based) + f2 = f.set({ + "parameter.wave_direction_index": 3, + }) + assert f2.get("parameter.wave_direction_index") == 3 + + with temp_file() as tmp: + f2.to_target("file", tmp) + f_saved = from_source("file", tmp).to_fieldlist() + assert len(f_saved) == 1 + # 0-based index 3 should be stored as 1-based directionNumber=4 + assert f_saved[0].get("parameter.wave_direction_index") == 3 + assert f_saved[0].get("metadata.directionNumber") == 4 + + +@pytest.mark.parametrize("fl_type", ["file"]) +def test_grib_set_parameter_wave_frequency_roundtrip(fl_type): + """Set parameter.wave_frequency_index, write GRIB, read back as 1-based frequencyNumber.""" + ds_ori, _ = load_grib_data("wave_spectra.grib", fl_type, folder="data") + f = ds_ori[0] + + # Set a new frequency index (0-based) + f2 = f.set({ + "parameter.wave_frequency_index": 2, + }) + assert f2.get("parameter.wave_frequency_index") == 2 + + with temp_file() as tmp: + f2.to_target("file", tmp) + f_saved = from_source("file", tmp).to_fieldlist() + assert len(f_saved) == 1 + # 0-based index 2 should be stored as 1-based frequencyNumber=3 + assert f_saved[0].get("parameter.wave_frequency_index") == 2 + assert f_saved[0].get("metadata.frequencyNumber") == 3 + + +@pytest.mark.parametrize("fl_type", ["file"]) +def test_grib_set_parameter_wave_both_indices_roundtrip(fl_type): + """Set both wave_direction_index and wave_frequency_index, write, read back.""" + ds_ori, _ = load_grib_data("wave_spectra.grib", fl_type, folder="data") + f = ds_ori[0] + + f2 = f.set({ + "parameter.wave_direction_index": 4, + "parameter.wave_frequency_index": 7, + }) + assert f2.get("parameter.wave_direction_index") == 4 + assert f2.get("parameter.wave_frequency_index") == 7 + + with temp_file() as tmp: + f2.to_target("file", tmp) + f_saved = from_source("file", tmp).to_fieldlist() + assert len(f_saved) == 1 + # direction: 0-based 4 → 1-based 5 + assert f_saved[0].get("parameter.wave_direction_index") == 4 + assert f_saved[0].get("metadata.directionNumber") == 5 + # frequency: 0-based 7 → 1-based 8 + assert f_saved[0].get("parameter.wave_frequency_index") == 7 + assert f_saved[0].get("metadata.frequencyNumber") == 8 diff --git a/tests/xr_engine/test_xr_engine_dims.py b/tests/xr_engine/test_xr_engine_dims.py index 0c9328b5..66349c66 100644 --- a/tests/xr_engine/test_xr_engine_dims.py +++ b/tests/xr_engine/test_xr_engine_dims.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +import datetime # (C) Copyright 2020 ECMWF. # @@ -8,8 +9,6 @@ # granted to it by virtue of its status as an intergovernmental organisation # nor does it submit to any jurisdiction. # - - import pandas as pd import pytest from xr_engine_fixtures import ( @@ -22,6 +21,86 @@ from earthkit.data.utils.testing import earthkit_remote_test_data_file +@pytest.fixture(scope="module") +def pl_grib_source(): + return from_source("url", earthkit_remote_test_data_file("xr_engine/level/pl.grib")) + + +@pytest.fixture(scope="module") +def pl_small_grib_source(): + return from_source("url", earthkit_remote_test_data_file("xr_engine/level/pl_small.grib")) + + +@pytest.fixture(scope="module") +def pl_sfc_grib_source(): + return from_source("url", earthkit_remote_test_data_file("xr_engine/level/pl_sfc.grib1")) + + +@pytest.fixture(scope="module") +def quantiles_pd_grib_source(): + return from_source("url", earthkit_remote_test_data_file("xr_engine/ens/quantiles_pd.grib")) + + +@pytest.fixture(scope="module") +def wave_spectra_grib_source(): + return from_source("url", earthkit_remote_test_data_file("xr_engine/wave_spectra.grib")) + + +@pytest.fixture(scope="module") +def aifs_sfc_grib_source(): + return from_source("url", earthkit_remote_test_data_file("xr_engine/level/aifs-sfc.grib")) + + +@pytest.fixture(scope="module") +def aifs_pl_sfc_grib_source(): + return from_source("url", earthkit_remote_test_data_file("xr_engine/level/aifs-pl_sfc.grib")) + + +@pytest.fixture(scope="module") +def chem_cams_grib_source(): + return from_source("sample", "chem-cams.grib") + + +@pytest.fixture(scope="module") +def optical_cams_grib_source(): + return from_source("sample", "optical-cams.grib") + + +@pytest.fixture(scope="module") +def source( + request, + pl_grib_source, + pl_small_grib_source, + pl_sfc_grib_source, + quantiles_pd_grib_source, + wave_spectra_grib_source, + aifs_sfc_grib_source, + aifs_pl_sfc_grib_source, + chem_cams_grib_source, + optical_cams_grib_source, +): + if request.param == "pl.grib": + return pl_grib_source + elif request.param == "pl_small.grib": + return pl_small_grib_source + elif request.param == "pl_sfc.grib1": + return pl_sfc_grib_source + elif request.param == "quantiles_pd.grib": + return quantiles_pd_grib_source + elif request.param == "wave_spectra.grib": + return wave_spectra_grib_source + elif request.param == "aifs-sfc.grib": + return aifs_sfc_grib_source + elif request.param == "aifs-pl_sfc.grib": + return aifs_pl_sfc_grib_source + elif request.param == "chem-cams.grib": + return chem_cams_grib_source + elif request.param == "optical-cams.grib": + return optical_cams_grib_source + else: + raise ValueError(f"No fixture for {request.param}") + + @pytest.mark.cache @pytest.mark.parametrize("allow_holes", [False, True]) @pytest.mark.parametrize("lazy_load", [True, False]) @@ -47,8 +126,8 @@ ), ], ) -def test_xr_rename_dims(allow_holes, lazy_load, kwargs, dim_keys): - ds_ek = from_source("url", earthkit_remote_test_data_file("xr_engine/level/pl.grib")).to_fieldlist() +def test_xr_rename_dims(pl_grib_source, allow_holes, lazy_load, kwargs, dim_keys): + ds_ek = pl_grib_source.to_fieldlist() ds = ds_ek.to_xarray(allow_holes=allow_holes, lazy_load=lazy_load, **kwargs) num = len(ds) @@ -111,8 +190,8 @@ def test_xr_rename_dims(allow_holes, lazy_load, kwargs, dim_keys): ), ], ) -def test_xr_fixed_dims(allow_holes, lazy_load, kwargs, dim_keys): - ds_ek = from_source("url", earthkit_remote_test_data_file("xr_engine/level/pl.grib")).to_fieldlist() +def test_xr_fixed_dims(pl_grib_source, allow_holes, lazy_load, kwargs, dim_keys): + ds_ek = pl_grib_source.to_fieldlist() ds = ds_ek.to_xarray(allow_holes=allow_holes, lazy_load=lazy_load, **kwargs) num = len(ds) @@ -171,8 +250,8 @@ def test_xr_fixed_dims(allow_holes, lazy_load, kwargs, dim_keys): ), ], ) -def test_xr_drop_dims(allow_holes, lazy_load, kwargs, dim_keys): - ds_ek = from_source("url", earthkit_remote_test_data_file("xr_engine/level/pl.grib")).to_fieldlist() +def test_xr_drop_dims(pl_grib_source, allow_holes, lazy_load, kwargs, dim_keys): + ds_ek = pl_grib_source.to_fieldlist() ds = ds_ek.to_xarray(allow_holes=allow_holes, lazy_load=lazy_load, **kwargs) num = len(ds) @@ -187,10 +266,9 @@ def test_xr_drop_dims(allow_holes, lazy_load, kwargs, dim_keys): @pytest.mark.parametrize("allow_holes", [False, True]) @pytest.mark.parametrize("lazy_load", [True, False]) @pytest.mark.parametrize( - "path,sel,kwargs,coords,dims,var_attrs,global_attrs", + "sel,kwargs,coords,dims,var_attrs,global_attrs", [ ( - "level/pl_small.grib", { "metadata.shortName": "t", "metadata.dataDate": 20240603, @@ -228,7 +306,6 @@ def test_xr_drop_dims(allow_holes, lazy_load, kwargs, dim_keys): {"Conventions": "CF-1.8", "institution": "ECMWF"}, ), ( - "level/pl_small.grib", { "metadata.shortName": "t", "metadata.dataDate": 20240603, @@ -263,8 +340,10 @@ def test_xr_drop_dims(allow_holes, lazy_load, kwargs, dim_keys): ), ], ) -def test_xr_ensure_dims(allow_holes, lazy_load, path, sel, kwargs, coords, dims, var_attrs, global_attrs): - ds0 = from_source("url", earthkit_remote_test_data_file("xr_engine", path)).to_fieldlist() +def test_xr_ensure_dims( + pl_small_grib_source, allow_holes, lazy_load, sel, kwargs, coords, dims, var_attrs, global_attrs +): + ds0 = pl_small_grib_source.to_fieldlist() if sel: ds0 = ds0.sel(**sel) ds = ds0.to_xarray(lazy_load=lazy_load, allow_holes=allow_holes, **kwargs) @@ -282,10 +361,10 @@ def test_xr_ensure_dims(allow_holes, lazy_load, path, sel, kwargs, coords, dims, @pytest.mark.parametrize("allow_holes", [False, True]) @pytest.mark.parametrize("lazy_load", [True, False]) @pytest.mark.parametrize( - "path,sel,kwargs,coords,dims,var_attrs,global_attrs", + "source,sel,kwargs,coords,dims,var_attrs,global_attrs", [ ( - "level/pl_small.grib", + "pl_small.grib", None, { "profile": "grib", @@ -331,7 +410,7 @@ def test_xr_ensure_dims(allow_holes, lazy_load, path, sel, kwargs, coords, dims, {"Conventions": "CF-1.8", "institution": "ECMWF"}, ), ( - "level/pl_small.grib", + "pl_small.grib", None, { "profile": "grib", @@ -369,7 +448,7 @@ def test_xr_ensure_dims(allow_holes, lazy_load, path, sel, kwargs, coords, dims, {"Conventions": "CF-1.8", "institution": "ECMWF"}, ), ( - "level/pl_small.grib", + "pl_small.grib", None, { "profile": "grib", @@ -408,7 +487,7 @@ def test_xr_ensure_dims(allow_holes, lazy_load, path, sel, kwargs, coords, dims, {"Conventions": "CF-1.8", "institution": "ECMWF"}, ), ( - "ens/quantiles_pd.grib", + "quantiles_pd.grib", None, { "profile": "grib", @@ -459,6 +538,7 @@ def test_xr_ensure_dims(allow_holes, lazy_load, path, sel, kwargs, coords, dims, "profile": "grib", "time_dims": "valid_time", "extra_dims": ["metadata.directionNumber", "metadata.frequencyNumber"], + "drop_dims": ["wave_direction", "wave_frequency"], "squeeze": False, "add_earthkit_attrs": False, }, @@ -489,9 +569,180 @@ def test_xr_ensure_dims(allow_holes, lazy_load, path, sel, kwargs, coords, dims, {"Conventions": "CF-1.8", "institution": "ECMWF"}, ), ], + indirect=["source"], +) +def test_xr_extra_dims(source, allow_holes, lazy_load, sel, kwargs, coords, dims, var_attrs, global_attrs): + ds0 = source.to_fieldlist() + if sel: + ds0 = ds0.sel(**sel) + ds = ds0.to_xarray(lazy_load=lazy_load, allow_holes=allow_holes, **kwargs) + compare_coords(ds, coords) + compare_dims(ds, dims, sizes=True) + + for v in var_attrs: + v_attrs = dict(ds[v].attrs) + v_attrs.pop("_earthkit", None) + assert v_attrs == var_attrs[v] + assert ds.attrs == global_attrs + + +@pytest.mark.cache +@pytest.mark.parametrize("allow_holes", [False, True]) +@pytest.mark.parametrize("lazy_load", [True, False]) +@pytest.mark.parametrize( + "source,sel,kwargs,coords,dims,var_attrs,global_attrs", + [ + ( + "wave_spectra.grib", + None, + { + "profile": "grib", + "time_dims": "valid_time", + "squeeze": False, + "add_earthkit_attrs": False, + }, + { + "wave_direction": [55.0, 115.0, 175.0, 235.0, 295.0, 355.0], + "wave_frequency": [0.034523, 0.1311, 0.497852], + "member": [0], + "valid_time": [pd.Timestamp("2025-12-10 00:00:00")], + "level": [0], + "level_type": ["meanSea"], + }, + { + "wave_direction": 6, + "wave_frequency": 3, + "member": 1, + "valid_time": 1, + "level": 1, + "level_type": 1, + }, + { + "2dfd": { + "standard_name": "unknown", + "long_name": "2D wave spectra (single)", + "units": "meter ** 2 * second / radian", + "typeOfLevel": "meanSea", + }, + "wave_direction": { + "standard_name": "sea_surface_wave_from_direction", + "long_name": "wave direction", + "units": "degrees", + }, + "wave_frequency": { + "standard_name": "sea_surface_wave_frequency", + "long_name": "wave frequency", + "units": "s-1", + }, + }, + {"Conventions": "CF-1.8", "institution": "ECMWF"}, + ), + ], + indirect=["source"], +) +def test_xr_wave_dims(source, allow_holes, lazy_load, sel, kwargs, coords, dims, var_attrs, global_attrs): + ds0 = source.to_fieldlist() + if sel: + ds0 = ds0.sel(**sel) + ds = ds0.to_xarray(lazy_load=lazy_load, allow_holes=allow_holes, **kwargs) + compare_coords(ds, coords) + compare_dims(ds, dims, sizes=True) + + for v in var_attrs: + v_attrs = dict(ds[v].attrs) + v_attrs.pop("_earthkit", None) + assert v_attrs == var_attrs[v] + assert ds.attrs == global_attrs + + +@pytest.mark.cache +@pytest.mark.parametrize("allow_holes", [False, True]) +@pytest.mark.parametrize("lazy_load", [True, False]) +@pytest.mark.parametrize( + "source,sel,kwargs,coords,dims,var_attrs,global_attrs", + [ + ( + "chem-cams.grib", + None, + { + "profile": "earthkit", + "squeeze": False, + "add_earthkit_attrs": False, + }, + { + "forecast_reference_time": [datetime.datetime(2011, 9, 29, 0, 0)], + "step": [datetime.timedelta(0)], + "level": [1], + "level_type": ["hybrid"], + "chem": ["CO", "HCHO", "O3"], + }, + { + "forecast_reference_time": 1, + "step": 1, + "level": 1, + "level_type": 1, + "chem": 3, + }, + { + "mass_mixrat": { + "standard_name": "mass_fraction_of_carbon_monoxide_in_air", + "long_name": "Mass mixing ratio", + "units": "dimensionless", + "level_type": "hybrid", + }, + "chem": { + "long_name": "atmospheric chemical or physical constituent type", + }, + }, + {"Conventions": "CF-1.8", "institution": "ECMWF"}, + ), + ( + "optical-cams.grib", + None, + { + "profile": "earthkit", + "squeeze": False, + "add_earthkit_attrs": False, + }, + { + "forecast_reference_time": [datetime.datetime(2011, 9, 29, 0, 0)], + "step": [datetime.timedelta(0)], + "level": [0], + "level_type": ["surface"], + "chem": ["aer_sm", "aer_total"], + "wavelength": [550, 800], + }, + { + "forecast_reference_time": 1, + "step": 1, + "level": 1, + "level_type": 1, + "chem": 2, + "wavelength": 2, + }, + { + "aod": { + "standard_name": "unknown", + "long_name": "Aerosol optical depth", + "units": "Numeric", + "level_type": "surface", + }, + "chem": { + "long_name": "atmospheric chemical or physical constituent type", + }, + "wavelength": { + "standard_name": "radiation_wavelength", + "long_name": "wavelength", + "units": "nm", + }, + }, + {"Conventions": "CF-1.8", "institution": "ECMWF"}, + ), + ], + indirect=["source"], ) -def test_xr_extra_dims(allow_holes, lazy_load, path, sel, kwargs, coords, dims, var_attrs, global_attrs): - ds0 = from_source("url", earthkit_remote_test_data_file("xr_engine", path)).to_fieldlist() +def test_xr_chem_dims(source, allow_holes, lazy_load, sel, kwargs, coords, dims, var_attrs, global_attrs): + ds0 = source.to_fieldlist() if sel: ds0 = ds0.sel(**sel) ds = ds0.to_xarray(lazy_load=lazy_load, allow_holes=allow_holes, **kwargs) @@ -508,10 +759,10 @@ def test_xr_extra_dims(allow_holes, lazy_load, path, sel, kwargs, coords, dims, @pytest.mark.cache @pytest.mark.parametrize("lazy_load", [True, False]) @pytest.mark.parametrize( - "path,sel,kwargs,coords,dims,var_attrs,global_attrs", + "source,sel,kwargs,coords,dims,var_attrs,global_attrs", [ ( - "level/pl_sfc.grib1", + "pl_sfc.grib1", { "metadata.dataDate": 20240603, "metadata.dataTime": 0, @@ -538,7 +789,7 @@ def test_xr_extra_dims(allow_holes, lazy_load, path, sel, kwargs, coords, dims, {}, ), ( - "level/pl_sfc.grib1", + "pl_sfc.grib1", None, { "profile": "grib", @@ -575,7 +826,7 @@ def test_xr_extra_dims(allow_holes, lazy_load, path, sel, kwargs, coords, dims, {"gridType": "regular_ll"}, ), ( - "level/pl_sfc.grib1", + "pl_sfc.grib1", { "metadata.dataDate": 20240603, "metadata.dataTime": 0, @@ -608,7 +859,7 @@ def test_xr_extra_dims(allow_holes, lazy_load, path, sel, kwargs, coords, dims, {}, ), ( - "level/pl_sfc.grib1", + "pl_sfc.grib1", { "metadata.dataDate": 20240603, "metadata.dataTime": 0, @@ -646,9 +897,10 @@ def test_xr_extra_dims(allow_holes, lazy_load, path, sel, kwargs, coords, dims, {}, ), ], + indirect=["source"], ) -def test_xr_engine_level_per_type_dim(lazy_load, path, sel, kwargs, coords, dims, var_attrs, global_attrs): - ds0 = from_source("url", earthkit_remote_test_data_file("xr_engine", path)).to_fieldlist() +def test_xr_engine_level_per_type_dim(source, lazy_load, sel, kwargs, coords, dims, var_attrs, global_attrs): + ds0 = source.to_fieldlist() if sel: ds0 = ds0.sel(**sel) ds = ds0.to_xarray(lazy_load=lazy_load, **kwargs) @@ -666,10 +918,10 @@ def test_xr_engine_level_per_type_dim(lazy_load, path, sel, kwargs, coords, dims @pytest.mark.parametrize("allow_holes", [False, True]) @pytest.mark.parametrize("lazy_load", [True, False]) @pytest.mark.parametrize( - "path,sel,idx,kwargs,coords,dims,var_attrs,global_attrs", + "source,sel,idx,kwargs,coords,dims,var_attrs,global_attrs", [ ( - "level/pl_small.grib", + "pl_small.grib", { "metadata.shortName": ["t", "r"], "metadata.dataDate": 20240603, @@ -707,7 +959,7 @@ def test_xr_engine_level_per_type_dim(lazy_load, path, sel, kwargs, coords, dims {"Conventions": "CF-1.8", "institution": "ECMWF"}, ), ( - "level/pl_small.grib", + "pl_small.grib", { "metadata.shortName": ["t", "r"], "metadata.dataDate": 20240603, @@ -747,11 +999,12 @@ def test_xr_engine_level_per_type_dim(lazy_load, path, sel, kwargs, coords, dims {"Conventions": "CF-1.8", "institution": "ECMWF"}, ), ], + indirect=["source"], ) def test_xr_engine_dims_as_attrs_1( - allow_holes, lazy_load, path, sel, idx, kwargs, coords, dims, var_attrs, global_attrs + source, allow_holes, lazy_load, sel, idx, kwargs, coords, dims, var_attrs, global_attrs ): - ds0 = from_source("url", earthkit_remote_test_data_file("xr_engine", path)).to_fieldlist() + ds0 = source.to_fieldlist() if sel: ds0 = ds0.sel(**sel) if idx: @@ -770,10 +1023,10 @@ def test_xr_engine_dims_as_attrs_1( @pytest.mark.cache @pytest.mark.parametrize("lazy_load", [True, False]) @pytest.mark.parametrize( - "path,sel,idx,kwargs,coords,dims,var_attrs,global_attrs", + "source,sel,idx,kwargs,coords,dims,var_attrs,global_attrs", [ ( - "level/aifs-sfc.grib", + "aifs-sfc.grib", None, None, { @@ -800,7 +1053,7 @@ def test_xr_engine_dims_as_attrs_1( {}, ), ( - "level/aifs-pl_sfc.grib", + "aifs-pl_sfc.grib", None, None, { @@ -829,7 +1082,7 @@ def test_xr_engine_dims_as_attrs_1( {}, ), ( - "level/aifs-sfc.grib", + "aifs-sfc.grib", None, None, { @@ -856,7 +1109,7 @@ def test_xr_engine_dims_as_attrs_1( {}, ), ( - "level/aifs-pl_sfc.grib", + "aifs-pl_sfc.grib", None, None, { @@ -885,7 +1138,7 @@ def test_xr_engine_dims_as_attrs_1( {}, ), ( - "level/aifs-sfc.grib", + "aifs-sfc.grib", None, None, { @@ -912,7 +1165,7 @@ def test_xr_engine_dims_as_attrs_1( {}, ), ( - "level/aifs-pl_sfc.grib", + "aifs-pl_sfc.grib", None, None, { @@ -941,9 +1194,10 @@ def test_xr_engine_dims_as_attrs_1( {}, ), ], + indirect=["source"], ) -def test_xr_engine_dims_as_attrs2(lazy_load, path, sel, idx, kwargs, coords, dims, var_attrs, global_attrs): - ds0 = from_source("url", earthkit_remote_test_data_file("xr_engine", path)).to_fieldlist() +def test_xr_engine_dims_as_attrs2(source, lazy_load, sel, idx, kwargs, coords, dims, var_attrs, global_attrs): + ds0 = source.to_fieldlist() if sel: ds0 = ds0.sel(**sel) if idx: