diff --git a/docs/source/how-tos/grib/grib_modify_metadata.ipynb b/docs/source/how-tos/grib/grib_modify_metadata.ipynb index a6483e97..4c3ae7a0 100644 --- a/docs/source/how-tos/grib/grib_modify_metadata.ipynb +++ b/docs/source/how-tos/grib/grib_modify_metadata.ipynb @@ -3,7 +3,13 @@ { "cell_type": "markdown", "id": "5d21c14f-b851-4437-a1f1-1daac37a9e49", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "source": [ "# GRIB: modifying metadata" ] @@ -11,29 +17,35 @@ { "cell_type": "markdown", "id": "ac126208-e64e-4658-b69a-83e9b387464b", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "source": [ "This notebook demonstrates how to modify the metadata in GRIB fields.\n", "\n", "First we read some GRIB data containing pressure level fields." ] }, - { - "cell_type": "markdown", - "id": "0edd73c7-9358-47cd-96b4-cf1e0d1cb720", - "metadata": {}, - "source": [] - }, { "cell_type": "code", "execution_count": 1, "id": "c11cc522-d388-4f39-a2d4-2f7b4c03517e", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "01b91013e30e499da1ed373f75523982", + "model_id": "6bb5a5212f7742f6bd69894ec6a167b3", "version_major": 2, "version_minor": 0 }, @@ -262,7 +274,7 @@ "tags": [] }, "source": [ - "Only the field component metadata keys can be used in :py:meth:`~earthkit.data.core.field.Field.set` and raw metadata keys are not allowed to use. E.g. the field was created from GRIB data so it has the raw metadata key ``metadata.shortName`` but we cannot set it. If you need to change the GRIB metadata see the \"Changing raw GRIB metadata\" section below." + "Only the field component metadata keys can be used in :py:meth:`~earthkit.data.core.field.Field.set` and raw metadata keys are not allowed to use. E.g. since the field was created from GRIB data it has the raw (GRIB) metadata key ``metadata.shortName`` but we cannot set it. If you need to change the GRIB metadata see the \"Changing raw GRIB metadata\" section below." ] }, { @@ -319,7 +331,7 @@ "tags": [] }, "source": [ - "Setting keys for the time field component allows using multiple formats. By default a \"datetime\" key takes a datatime.datetime object and a \"step\" key takes a datatime.timedelta object." + "Setting keys for the \"time\" field component allows using multiple formats. By default a \"datetime\" key takes a datatime.datetime object and a \"step\" key takes a datatime.timedelta object." ] }, { @@ -591,7 +603,7 @@ "tags": [] }, "source": [ - "It is allowed to set individual field components with :py:meth:`~earthkit.data.core.field.Field.set`. The simplest way is to specify them as a dict. E.g. the following cell sets a new \"time\" component on the field." + "It is allowed to set whole individual field components with :py:meth:`~earthkit.data.core.field.Field.set`. The simplest way is to specify them as a dict. E.g. the following cell sets a new \"time\" component on the field." ] }, { @@ -793,8 +805,199 @@ }, { "cell_type": "markdown", - "id": "39666d1c-7db3-4e97-9cfc-55b3b8d71f7d", + "id": "e16dda68-bb85-4d0f-af9c-b814450a81c7", + "metadata": {}, + "source": [ + "## Modified fields and the associated GRIB message" + ] + }, + { + "cell_type": "raw", + "id": "56528846-a9e1-43a8-a299-e33b64aabd55", + "metadata": { + "editable": true, + "raw_mimetype": "text/restructuredtext", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "When a field was created from GRIB data the associated GRIB message can be accessed via the field with :func:`~earthkit.data.core.field.Field.message`." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "e403ad78-821d-4b14-8cae-4b2afef55093", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "b'GRIB\\x00\\x00\\x96\\x01\\x00\\x00'" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "f.message()[:10]" + ] + }, + { + "cell_type": "markdown", + "id": "d9aa4f53-01e1-46de-a0c1-ebb87970cf38", + "metadata": {}, + "source": [ + "Having modified the field metadata this GRIB message is not updated and we cannot access it any longer in the new field. The same is true for any raw GRIB metadata." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "8cddbcda-ea5c-4482-baa7-3535f94752a6", "metadata": {}, + "outputs": [], + "source": [ + "f1 = f.set({\"vertical.level\": 500})\n", + "f1.message()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "8750f04b-b106-493b-96ad-2c02c413137d", + "metadata": {}, + "outputs": [], + "source": [ + "f1.get(\"metadata.shortName\")" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "5faa2075-b6ae-48a1-a07b-0b83055459e2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "'Key metadata.shortName not found in field'\n" + ] + } + ], + "source": [ + "try:\n", + " f1.metadata(\"shortName\")\n", + "except KeyError as e:\n", + " print(e)" + ] + }, + { + "cell_type": "raw", + "id": "1c32e5c7-fc47-491b-bf2b-476512791831", + "metadata": { + "editable": true, + "raw_mimetype": "text/restructuredtext", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "If we want to keep a valid associated GRIB message in the modified field we need to call :func:`~earthkit.data.core.field.Field.sync`. This will create a new GRIB handle, update the relevant metadata in it and create a new field out of it." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "5a2a63ec-0d95-4b3e-ba13-039f09234c81", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['t', 500]" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "f1 = f1.sync()\n", + "f1.get([\"metadata.shortName\", \"metadata.level\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "f17ffe64-45d4-4bdc-a174-26e8a4fa28d7", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['t', 500]" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "f1.metadata([\"shortName\", \"level\"])" + ] + }, + { + "cell_type": "raw", + "id": "af6311d3-74b0-4793-976e-d02a907f5113", + "metadata": { + "editable": true, + "raw_mimetype": "text/restructuredtext", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "Alternatively, if your workflow is strictly GRIB-bound you can carry out the filed modification via the :py:class:`~earthkit.data.encoders.grib.GribEncoder` as shown in the next chapter. " + ] + }, + { + "cell_type": "markdown", + "id": "39666d1c-7db3-4e97-9cfc-55b3b8d71f7d", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "source": [ "## Changing raw GRIB metadata" ] @@ -817,7 +1020,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 17, "id": "82e780fe-3539-4af9-ad4f-585739577dcc", "metadata": { "editable": true, @@ -882,7 +1085,7 @@ "0 1000 pressure 0 regular_ll " ] }, - "execution_count": 11, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -897,8 +1100,14 @@ { "cell_type": "code", "execution_count": null, - "id": "e16dda68-bb85-4d0f-af9c-b814450a81c7", - "metadata": {}, + "id": "60b11297-7499-445d-9b71-2d59709aed25", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "outputs": [], "source": [] } diff --git a/docs/source/how-tos/grib/grib_modify_values.ipynb b/docs/source/how-tos/grib/grib_modify_values.ipynb index 2d49be61..a63b4f54 100644 --- a/docs/source/how-tos/grib/grib_modify_values.ipynb +++ b/docs/source/how-tos/grib/grib_modify_values.ipynb @@ -39,7 +39,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "e39683874919433bbac0194d1c67e7b5", + "model_id": "0a3b6df9fe9e40d8a214a4d43332f8fa", "version_major": 2, "version_minor": 0 }, @@ -462,6 +462,111 @@ "t1_w = ekd.from_source(\"file\", \"_res_3.grib\").to_fieldlist()\n", "print(t[0].values.max(), t1_w[0].values.max())" ] + }, + { + "cell_type": "markdown", + "id": "09e4a417-a3be-42a0-977a-2053ab67c3e9", + "metadata": {}, + "source": [ + "## Modified fields and the associated GRIB message" + ] + }, + { + "cell_type": "raw", + "id": "5054e101-5208-4a8a-945b-67c048372c05", + "metadata": { + "editable": true, + "raw_mimetype": "text/restructuredtext", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "When a field is created from GRIB data the associated GRIB message can be accessed via the field using :func:`~earthkit.data.field.Field.message`. Having modified the field values this GRIB message is still available and the data that :func:`~earthkit.data.field.Field.message` returns will contain the updated values.\n", + "\n", + "The following example demonstrates this by creating new fields from the messages in the original and modified fields and comparing their values." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "5c405417-c51c-4fa3-8c37-847c54e36163", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(np.float64(272.5641784667969), np.float64(273.5641784667969))" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from earthkit.data.field.grib.create import create_grib_field_from_message\n", + "\n", + "f = fl[0]\n", + "f1 = f.set(values=f.values + 1)\n", + "\n", + "f_m = create_grib_field_from_message(f.message())\n", + "f1_m = create_grib_field_from_message(f1.message())\n", + "\n", + "f_m.values[0], f1_m.values[1]" + ] + }, + { + "cell_type": "markdown", + "id": "3c3ca0c5-fb30-4134-86f5-e90c12f4960d", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "If the metadata is also modified the associated GRIB message is not updated and we cannot access it any longer in the new field. The same is true for any raw GRIB metadata." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "a29ae7ac-4617-48c8-9176-98ad89c1ceec", + "metadata": {}, + "outputs": [], + "source": [ + "f = fl[0]\n", + "f1 = f.set({\"values\": f.values + 1, \"vertical.level\": 850})" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "b21a3608-693d-43ac-b851-ebd9998306c6", + "metadata": {}, + "outputs": [], + "source": [ + "f1.message()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "74cb432d-797a-4685-9415-fa2583f67e4e", + "metadata": {}, + "outputs": [], + "source": [ + "f1.get(\"metadata.shortName\")" + ] } ], "metadata": { diff --git a/docs/source/how-tos/target/grib_encoder.ipynb b/docs/source/how-tos/target/grib_encoder.ipynb index 590d8e46..6031fd34 100644 --- a/docs/source/how-tos/target/grib_encoder.ipynb +++ b/docs/source/how-tos/target/grib_encoder.ipynb @@ -60,7 +60,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "a2c2ac767de74fdb8f218c7243f31b2c", + "model_id": "cf241aa38f354b9db40f7e486bf0edc2", "version_major": 2, "version_minor": 0 }, @@ -152,7 +152,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 3, @@ -178,7 +178,7 @@ "tags": [] }, "source": [ - "The method to call is :meth:`GribEncoder.encode`. The ``template`` argument can be a GRIB field. In the example below :meth:`GribEncoder.encode` will simply create a copy (clone) of the underlying GRIB handle in the field." + "The method to call is :meth:`~earthkit.data.encoders.grib.GribEncoder.encode`. The ``template`` argument can be a GRIB field. In the example below :meth:`~earthkit.data.encoders.grib.GribEncoder.encode` will simply create a copy (clone) of the underlying GRIB handle in the field." ] }, { @@ -196,7 +196,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 4, @@ -247,6 +247,7 @@ } ], "source": [ + "# get message as bytes\n", "r.to_bytes()[:10]" ] }, @@ -323,6 +324,7 @@ } ], "source": [ + "# convert to field\n", "f = r.to_field()\n", "f.ls()" ] @@ -334,6 +336,7 @@ "metadata": {}, "outputs": [], "source": [ + "# write into a file\n", "with open(\"_res_encoded.grib\", \"wb\") as out:\n", " r.to_file(out)" ] @@ -924,7 +927,7 @@ { "data": { "text/plain": [ - "(np.float64(4.1302121976172), np.float64(4.130106449127197))" + "(np.float64(4.244827050861318), np.float64(4.244751930236816))" ] }, "execution_count": 15, @@ -1331,20 +1334,6 @@ "for d in encoder.encode(data=ds, step=18):\n", " print(d.to_field().ls(keys=[\"metadata.shortName\", \"metadata.step\"]))" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bd24efbb-9c27-47a0-870e-43e1e21b5cc3", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/docs/source/how-tos/target/grib_to_file_pattern_target.ipynb b/docs/source/how-tos/target/grib_to_file_pattern_target.ipynb index df08a0c5..3f698e6d 100644 --- a/docs/source/how-tos/target/grib_to_file_pattern_target.ipynb +++ b/docs/source/how-tos/target/grib_to_file_pattern_target.ipynb @@ -29,7 +29,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "d1c0f177bba543729b8ec1b4a7ceaeff", + "model_id": "97df2cbbbd9a49148b26e5c67a9ada59", "version_major": 2, "version_minor": 0 }, diff --git a/docs/source/how-tos/target/grib_to_file_target.ipynb b/docs/source/how-tos/target/grib_to_file_target.ipynb index 8264b272..809c6938 100644 --- a/docs/source/how-tos/target/grib_to_file_target.ipynb +++ b/docs/source/how-tos/target/grib_to_file_target.ipynb @@ -29,7 +29,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "37999b81f3cf4e33a49aea96cc3d2d5a", + "model_id": "258dac7b98e541a18ffc86acd4561001", "version_major": 2, "version_minor": 0 }, diff --git a/docs/source/release-notes/migration_1.0.0.rst b/docs/source/release-notes/migration_1.0.0.rst index c9d5da2b..c5110172 100644 --- a/docs/source/release-notes/migration_1.0.0.rst +++ b/docs/source/release-notes/migration_1.0.0.rst @@ -129,6 +129,13 @@ Raw metadata keys are still available but they are only accessible either by usi f.metadata("shortName") f.metadata("metadata.shortName") +Field modification +++++++++++++++++++++++++ + +Fields can be modified using the :py:meth:`~earthkit.data.core.field.Field.set` method. This method allows to set new data values and/or change metadata keys. See the notebook examples: + +- :ref:`/how-tos/grib/grib_modify_metadata.ipynb` +- :ref:`/how-tos/grib/grib_modify_values.ipynb` Field arithmetic ++++++++++++++++++++++++ @@ -319,8 +326,9 @@ The following table gives an overview of the changes in the Fieldlist API: Xarray engine ------------------ -The Xarray engine has been refactored and many of the internal classes and methods have been changed. The following table gives an overview of the changes in the Xarray engine: +The Xarray engine has been refactored and many of the internal classes and methods have been changed. The following list gives an overview of the changes in the Xarray engine: - a new default profile :ref:`earthkit ` has been added which is used when no profile is specified. This profile is designed to work with the new format independent metadata keys from :py:class:`~earthkit.data.core.field.Field` to generate the Xarray dataset. - the old :ref:`mars ` and :ref:`grib ` profiles were kept but they are now using some of the new format independent metadata keys to generate the Xarray dataset. - the "number" ``dim_role`` was renamed to "member" in line with the new format independent metadata keys. See: :ref:`xr_dim` for more details. +- the ``time_dim_mode`` kwarg in :func:`to_xarray` was replaced by ``time_dims`` and the meaning of some temporal dimensions in the ``dim_roles`` also changed. See :ref:`xr_time_dims` for more details. diff --git a/docs/source/skip_api_rules.py b/docs/source/skip_api_rules.py index f3b5743d..a8ed88b7 100644 --- a/docs/source/skip_api_rules.py +++ b/docs/source/skip_api_rules.py @@ -27,6 +27,7 @@ def _ends_with_any(name, suffixes): "sources.SourceMaker", ], "data": [ + "encoders.grib.encoder", "sources.get_source", ], "method": [ @@ -43,7 +44,9 @@ def _skip_api_items(app, what, name, obj, skip, options): # if "ArrayLike" in name: # print(f"Skipping {what} {name} {obj}") - if what == "data" and ".ArrayLike" in name: + if name.endswith(".LOG"): + skip = True + elif what == "data" and ".ArrayLike" in name: skip = True else: s = _SKIP.get(what, []) diff --git a/src/earthkit/data/core/field.py b/src/earthkit/data/core/field.py index e7208a16..9369b6dc 100644 --- a/src/earthkit/data/core/field.py +++ b/src/earthkit/data/core/field.py @@ -349,6 +349,55 @@ def from_field( v.sync(r) return r + def _from_set( + self, + data=None, + time=None, + parameter=None, + geography=None, + vertical=None, + ensemble=None, + proc=None, + labels=None, + ): + _kwargs = { + _DATA: data, + _TIME: time, + _PARAMETER: parameter, + _GEOGRAPHY: geography, + _VERTICAL: vertical, + _ENSEMBLE: ensemble, + _PROC: proc, + _LABELS: labels, + } + + has_new_metadata = False + for name in Field._COMPONENT_NAMES: + v = _kwargs[name] + if v is not None: + _kwargs[name] = _COMPONENT_MAKER.default_cls(name).from_any(v) + if name != "data" and name != "labels": + has_new_metadata = True + else: + _kwargs[name] = self._components[name] + + r = self.__class__(**_kwargs) + + # copy private data and initialize + if self._private: + r._private = self._private.copy() + if has_new_metadata: + for k in list(r._private.keys()): + v = r._private[k] + if not k.startswith("_"): + r._private.pop(k) + k = f"_{k}" + if hasattr(v, "sync"): + v = v.sync(r) + r._private[k] = v + + return r + @classmethod def from_dict(cls, d): r"""Create a Field from a dictionary. @@ -826,25 +875,26 @@ def _get_single(self, key, default=None, astype=None, raise_on_missing=False): if component: return component.get(key_name, default=default, astype=astype, raise_on_missing=raise_on_missing) elif component_name == _METADATA: - for _, private_component in self._private.items(): - if hasattr(private_component, "metadata"): - return private_component.metadata( - key_name, default=default, astype=astype, raise_on_missing=raise_on_missing - ) - else: + for p_name, p_component in self._private.items(): + if not p_name.startswith("_"): + if hasattr(p_component, "metadata"): + return p_component.metadata( + key_name, default=default, astype=astype, raise_on_missing=raise_on_missing + ) + else: - def _cast(v): - if callable(astype): - try: - return astype(v) - except Exception: - return None - return v + def _cast(v): + if callable(astype): + try: + return astype(v) + except Exception: + return None + return v - # TODO: review this - v = self.private_component.get(key) - if v is not None: - return _cast(v) + # TODO: review this + v = p_component.get(key) + if v is not None: + return _cast(v) if raise_on_missing: raise KeyError(f"Key {key} not found in field") @@ -1267,7 +1317,7 @@ def set(self, *args, **kwargs): >>> field.set({"parameter.variable": "t"}) >>> field.set({"parameter.variable": "t", "vertical.level": 1000}) - New data values can be set by using the "data" or "values" key with the new values + New data values can be set by using the "data" or "values" key with the new values as a value. For example, >>> field.set(data=new_values_array) @@ -1314,6 +1364,44 @@ def set(self, *args, **kwargs): Field A new field with the specified metadata keys set to the given values. + + Notes + ----- + When the field is created from a GRIB message, calling :meth:`set` copies the associated + GRIB message into the new field without any modifications. Since it is now out of sync with the + new field's components, the new field will not provide access to any GRIB metadata + neither via :meth:`get` nor via :meth:`metadata`. Additionally, when calling + :meth:`message` on the new field, None is returned. (Use :meth:`sync` to synchronize the + associated GRIB message to the new field and expose the GRIB metadata keys again). + + >>> import earthkit.data as ekd + >>> fl = ekd.from_source("sample", "test.grib").to_fieldlist() + >>> f = fl[0] + >>> f1 = f.set({"parameter.variable": "msl", "parameter.units": "Pa"}) + >>> f1.get("metadata.shortName") + None + >>> f1.metadata("shortName") + KeyError: 'metadata.shortName' not found in field + >>> f1.message() + None + + However, if only the labels or the values are set (the latter via the "data" or "values" keys), the new + field returned by :meth:`set` is still linked to the original GRIB message and the GRIB specific keys + in the raw metadata are still accessible. If the values were modified, :meth:`message` will return + the original GRIB message updated with the modified data values. + + >>> import earthkit.data as ekd + >>> fl = ekd.from_source("sample", "test.grib").to_fieldlist() + >>> f = fl[0] + >>> f1 = f.set(values=f.values()+1) + >>> f1.get("metadata.shortName") + "2t" + >>> f1.metadata("shortName") + "2t" + >>> f1.message() + + + Examples -------- See the how-to examples for the :meth:`set` method in the following notebook: @@ -1372,7 +1460,7 @@ def set(self, *args, **kwargs): _components[component_name] = s if _components: - return Field.from_field(self, **_components) + return self._from_set(**_components) elif kwargs: raise ValueError("No valid keys to set in the field.") @@ -1382,6 +1470,50 @@ def _set_values(self, array): data = self._components[_DATA].set_values(array) return Field.from_field(self, data=data) + def sync(self): + """Return a field with the raw metadata in sync with the field's components. + + When a field is created from a GRIB message, it stores this associated GRIB message/handle + and the raw GRIB metadata is extracted from it e.g. when calling :meth:`get`. When the field's + components are modified using :meth:`set`, the GRIB message is copied into the new field but not + modified. Since it is now out of sync with the new field's components, the new field will + not provide access to any GRIB metadata either via :meth:`get` or via :meth:`metadata`. When + :meth:`sync` is called on such a field the GRIB message is re-encoded using the field's components + and the raw GRIB metadata will become available again and in sync with the field's components. + + + Returns + ------- + Field + A field with the raw metadata in sync with the field's components. If the field is not associated with + a GRIB message or if the raw metadata is already in sync, the original field is returned. + + Examples + -------- + >>> import earthkit.data as ekd + >>> fl = ekd.from_source("sample", "test.grib").to_fieldlist() + >>> f = fl[0] + >>> f1 = f.set({"parameter.variable": "msl", "parameter.units": "Pa"}) + >>> f1.get("metadata.shortName") + None + >>> f1.metadata("shortName") + KeyError: 'metadata.shortName' not found in field + >>> f2 = f1.sync() + >>> f2.get("metadata.shortName") + 'msl' + >>> f2.metadata("shortName") + 'msl' + """ + if self._get_grib() and self._private and "_metadata" in self._private: + from earthkit.data.encoders.grib import GribEncoder + + encoder = GribEncoder() + f = encoder.encode(data=self).to_field() + if self.labels: + f = f.set(labels=self.labels) + return f + return self + def to_target(self, target, *args, **kwargs): r"""Write the field into a target object. @@ -1689,15 +1821,19 @@ def tail(self, *args, **kwargs): return self.ls("tail", *args, **kwargs) def message(self): - r"""Return a buffer containing the encoded message for Fields generated from a message based format (e.g. GRIB). + r"""Return a buffer containing the encoded message associated with the field. + + Only available for fields generated from a message based format (e.g. GRIB). + Once the field metadata is modified by calling :meth:`set` the link to the original + message is lost and this method will return None. Returns ------- bytes """ - grib = self._get_grib() + grib = self._get_grib(strict=True) if grib is not None: - return grib.message() + return grib.message(owner=self) return None def _dispatch_to_fieldlist_method(self, method_name, *args, **kwargs): @@ -1710,10 +1846,15 @@ def _set_private_data(self, name, data): def _get_private_data(self, name): return self._private.get(name) - def _get_grib(self): - if self._private and "metadata" in self._private and getattr(self._private["metadata"], "NAME", None) == _GRIB: - return self._private["metadata"] - + def _get_grib(self, strict=False): + r"""Return the GRIB metadata object associated with the field if available.""" + if self._private: + names = ["metadata", "_metadata"] if not strict else ["metadata"] + for name in names: + if name in self._private: + md = self._private[name] + if getattr(md, "NAME", None) == _GRIB: + return md return None def _check(self): @@ -1744,74 +1885,6 @@ def _normalise_key_values(**kwargs): r"""Normalise the selection input for :meth:`FieldList.sel`.""" return kwargs - # def sel(self, *args, **kwargs): - # r"""Check if a field matches the given selection criteria. - - # Parameters - # ---------- - # *args: tuple - # Positional arguments specifying the filter conditions as a dict. - # Both single or multiple keys are allowed to use. When multiple filter conditions - # are specified, they are combined with a logical AND operator. Each metadata key in - # the filter conditions can specify the following type of filter values: - - # - single value:: - - # f.sel({parameter.variable: "t"}) - - # - list of values:: - - # f.sel({parameter.variable: ["u", "v"]}) - - # - slice of values (defines a closed interval, so treated as inclusive of both the start - # and stop values, unlike normal Python indexing). The following example filters the fields - # with "vertical.level" between 300 and 500 inclusively:: - - # f.sel({vertical.level: slice(300, 500)}) - - # Date and time related keys from the "time" field component are automatically normalised - # for comparison. This is also applied to the following keys from the - # raw metadata: "metadata.base_datetime", "metadata.valid_datetime" and "metadata.step_timedelta". - - # For example, when filtering by "time.valid_datetime" the following calls are equivalent: - - # >>> f.sel({ "time.valid_datetime": "2018-08-01T12:00:00"}) - # >>> f.sel({ "time.valid_datetime": "2018080112"}) - # >>> f.sel({ "time.valid_datetime": 2018080112}) - # >>> f.sel({ "time.valid_datetime": datetime(2018, 8, 1, 12, 0) }) - - # Similarly, when filtering by "time.step" the following calls are equivalent (values are assumed - # to be in hours when the unit is not specified): - - # >>> f.sel({ "time.step": "6h"}) - # >>> f.sel({ "time.step": 6}) - # >>> f.sel({ "time.step": "360m"}) - # >>> f.sel({ "time.step": timedelta(hours=6)}) - - # remapping: dict - # Define new metadata keys from existing ones to use in ``*args`` and ``**kwargs``. - # E.g. to define a new key "param_level" as the concatenated value of - # the "parameter.variable" and "vertical.level" keys use:: - - # >>> remapping={"param_level": "{parameter.variable}{vertical.level}"} - - # **kwargs: dict, optional - # Other keyword arguments specifying the filter conditions. - - # Returns - # ------- - # Field or None - # Returns the field itself if it matches the selection criteria, otherwise returns None. - # """ - # res = self._dispatch_to_fieldlist_method("sel", *args, **kwargs) - - # if res and len(res) == 1: - # return self - # return None - - # def order_by(self, *args, **kwargs): - # pass - def _unary_op(self, oper): v = oper(self.values) r = self._set_values(v) diff --git a/src/earthkit/data/encoders/grib.py b/src/earthkit/data/encoders/grib.py index 60d093e5..2701fd1f 100644 --- a/src/earthkit/data/encoders/grib.py +++ b/src/earthkit/data/encoders/grib.py @@ -19,38 +19,68 @@ LOG = logging.getLogger(__name__) -NOT_IN_EDITION_1 = ( +_NOT_IN_EDITION_1 = ( "productDefinitionTemplateNumber", "typeOfGeneratingProcess", ) -COMPULSORY = (("date", "referenceDate"), ("param", "paramId", "shortName")) +_COMPULSORY = (("date", "referenceDate"), ("param", "paramId", "shortName")) class GribEncodedData(EncodedData): + """The object representing the encoded GRIB message.""" + def __init__(self, handle): + """Initialize the GribEncodedData object. + + Parameters + ---------- + handle: GribCodesHandle + The handle to the GRIB message. + """ self.handle = handle def to_bytes(self): + """Return the GRIB message as bytes.""" return self.handle.get_buffer() def to_file(self, f): + """Write the GRIB message to a file. + + Parameters + ---------- + f: file-like object + The file-like object to write the GRIB message to. + + """ self.handle.write(f) def get(self, key, default=None): - if key: + """Get a value from the GRIB message metadata. + + Parameters + ---------- + key: str + The key of the metadata to retrieve. + default: any, optional + The default value to return if the key is not found. + + Returns + ------- + The value associated with the key, or the default value if the key is not found. + """ + if key.startswith("metadata."): + return self.handle.get(key[9:], default=default) + elif key: return self.to_field().get(key, default=default) else: raise NotImplementedError def to_field(self): - # from earthkit.data.readers.grib.memory import GribFieldInMemory - - # return GribFieldInMemory.from_buffer(self.to_bytes()) - - from earthkit.data.field.grib.create import create_grib_field_from_buffer + """Convert the GRIB message to a Field object.""" + from earthkit.data.field.grib.create import create_grib_field_from_message - return create_grib_field_from_buffer(self.to_bytes()) + return create_grib_field_from_message(self.to_bytes()) class Combined: @@ -107,7 +137,7 @@ class GribHandleMaker: """Create a new GribCodesHandle from a template, field or metadata.""" def __init__(self, template=None): - self.template = template + self.template = self.handle_from_template(template, clone=False) self._bbox = {} def make(self, values=None, metadata=None, template=None): @@ -124,40 +154,65 @@ def make(self, values=None, metadata=None, template=None): template: GribCoder A template to use for encoding """ - handle = self.handle_from_template(template) + if template is None: + template = self.template + + handle = self.handle_from_template(template, clone=True) if handle is not None: self.update_metadata_from_template(metadata, template, handle) - # if handle is None and field is not None: - # handle = self.handle_from_field(field) - # if handle is not None: - # self.update_metadata_from_template(metadata, field, handle) - if handle is None: if values is None: raise ValueError("No values to encode") - handle = self.handle_from_metadata(values, metadata, COMPULSORY) + handle = self.handle_from_metadata(values, metadata, _COMPULSORY) return handle - def handle_from_template(self, template): + @staticmethod + def handle_from_template(template, clone=True): handle = None - if template is None: - template = self.template - if template is not None: from earthkit.data.core.field import Field + def _result(handle): + return handle.clone() if clone else handle + if isinstance(template, Field): - return self.handle_from_field(template) + return GribHandleMaker.handle_from_field(template) + # GribMetadata or GribHandle elif hasattr(template, "handle"): handle = template.handle if handle is not None: - return handle.clone() + return _result(handle) + else: + from earthkit.data.readers.grib.handle import GribCodesHandle + + if isinstance(template, GribCodesHandle): + return _result(template) + + # message buffer as bytes + elif isinstance(template, bytes): + handle = GribCodesHandle.from_message(template) + if handle is not None: + return _result(handle) + # GRIB sample as string + elif isinstance(template, str): + handle = GribCodesHandle.from_sample(template) + if handle is not None: + return _result(handle) + # raw ecCodes handle + elif isinstance(template, int): + try: + handle = GribCodesHandle._from_raw_handle(template) + if handle is not None: + return _result(handle) + except Exception: + pass return None - def handle_from_field(self, field): + @staticmethod + def handle_from_field(field): r = {} field._get_grib_context(r) handle = r.pop("handle", None) @@ -198,7 +253,7 @@ def handle_from_metadata(self, values, metadata, compulsory): def update_metadata_from_template(self, metadata, template, handle): return - + # TODO: review this code # the template can contain extra metadata that is not encoded in the handle if "bitsPerValue" in metadata: return @@ -212,34 +267,6 @@ def update_metadata_from_template(self, metadata, template, handle): else: bpv = template_md.get("bitsPerValue", default=None) - # if bpv is None: - - # if hasattr(template, "metadata"): - # template_md = template.metadata() - # from earthkit.data.core.metadata import WrappedMetadata - - # if isinstance(template_md, WrappedMetadata): - # for k in template_md.extra.keys(): - # if k != "bitsPerValue" and k not in metadata: - # metadata[k] = template_md.get(k) - - # if "bitsPerValue" not in metadata: - # bpv = template.metadata("bitsPerValue", default=None) - - # # Either the handle has valid bitsPerValue or has to be extracted - # # from the template and added to the metadata to be encoded - # if "bitsPerValue" not in metadata: - # if bpv is None: - # try: - # bpv = template.handle.get("bitsPerValue", None) - # except Exception: - # bpv = None - - # if bpv is not None and bpv > 0: - # bpv_h = handle.get("bitsPerValue", None) - # if bpv != bpv_h: - # metadata["bitsPerValue"] = bpv - def _ll_field(self, values, metadata): Nj, Ni = values.shape metadata["Nj"] = Nj @@ -342,11 +369,73 @@ def _gg_field(self, values, metadata): class GribEncoder(Encoder): - """Encode GRIB data.""" - - def __init__(self, **kwargs): - super().__init__(**kwargs) + """Encoder for GRIB format. + + This class is used to encode data to GRIB format via the :meth:`encode` method. + + Parameters + ---------- + template: Field, GribCodesHandle, bytes, str, int, None + A preset template to use for encoding when :meth:`encode` is called without a template. + It can be a :py:class:`~earthkit.data.core.field.Field`, + a :py:class:`~earthkit.data.reader.grib.GribCodesHandle`, a GRIB message as + bytes, an ecCodes GRIB sample name as string, a raw ecCodes handle as an integer, or None. + See :meth:`encode` for more details on how the template is used. + metadata: dict + A preset metadata to encode. The keys must be ecCodes GRIB keys, optionally prefixed with "metadata.". + This metadata is used as default when :meth:`encode` is called without metadata. If metadata is provided + in the :meth:`encode` method, it is merged with this preset metadata, with the metadata provided + in the :meth:`encode` method taking precedence. + kwargs: dict + Additional keyword arguments interpreted as metadata to encode. The keys must be ecCodes GRIB keys, + optionally prefixed with "metadata.". + + Examples + -------- + See the howto examples for more details and examples of encoding GRIB data with :class:`GribEncoder`. + + - :ref:`/how-tos/target/grib_encoder.ipynb` + - :ref:`/how-tos/grib/grib_modify_metadata.ipynb` + - :ref:`/how-tos/grib/grib_modify_values.ipynb` + + Using with a preset template and metadata: + + >>> import earthkit.data as ekd + >>> template = ekd.from_source("sample", "test.grid").to_fieldlist()[0] + >>> template.get("metadata.shortName") + '2t' + >>> encoder = GribEncoder(template=template, metadata={"shortName": "msl"}) + >>> d = encoder.encode(values=template.values + 1.0, step=6) + >>> f = d.to_field() + >>> f.get("parameter.variable") + 'msl' + >>> f.get("parameter.shortName") + 'msl' + >>> f.get("parameter.units") + 'hPa' + >>> f.get("step") + 6 + + Using without preset template and metadata: + >>> encoder = GribEncoder() + >>> d = encoder.encode(values=template.values + 1.0, metadata={"shortName": "msl"}, step=6) + >>> f = d.to_field() + >>> f.get("parameter.variable") + 'msl' + >>> f.get("parameter.shortName") + 'msl' + >>> f.get("parameter.units") + 'hPa' + >>> f.get("step") + 6 + """ + + def __init__(self, template=None, metadata=None, **kwargs): + super().__init__(template=template, metadata=metadata, **kwargs) self._bbox = {} + # the template is stored as a handle to be used as a basis for encoding, + # (when available) + self.template = GribHandleMaker.handle_from_template(self.template, clone=False) @normalise_grib_keys @normalise("date", "date") @@ -376,24 +465,68 @@ def encode( **kwargs, ): """ + Encode new GRIB message(s). + Parameters ---------- - data: Field - The data to encode - values: numpy.ndarray - The values to encode + data: Field, FieldList, Numpy array, or None + The data to encode. Can be a :py:class:`~earthkit.data.core.field.Field`, a + :py:class:`~earthkit.data.core.fieldlist.FieldList`, a Numpy array, or None. + If None, the new GRIB message(s) will be created from the ``values``, ``metadata`` and + ``template``. Cannot be specified together with ``values`` and ``template``. + When ``template`` is also provided, it will be used as a basis for encoding, + but the values will be taken from ``data``. + values: numpy.ndarray or None + The values to encode. If None, the values will be taken from the ``data`` or + ``template``. Takes precedence over the values in ``data`` or ``template`` + if any of them are provided. If the values contain NaNs, they will be replaced with the + ``missing_value`` provided. Cannot be specified together with ``data`` and ``template``. check_nans: bool - Check for NaNs in the values and replace them with missing_value + Check for NaNs in the values and replace them with ``missing_value``. metadata: dict - Metadata to encode - template: GribCoder - A template to use for encoding - return_bytes: bool - Return the encoded message as bytes + Metadata to encode. The keys must be ecCodes GRIB keys, optionally prefixed with "metadata.". + template: Field, GribCodesHandle, bytes, str, int, None + A template to use for encoding. It can be a :py:class:`~earthkit.data.core.field.Field`, + a :py:class:`~earthkit.data.reader.grib.GribCodesHandle`, a GRIB message as + bytes, an ecCodes GRIB sample name as string, a raw ecCodes handle as an integer, or None. If None, a + the :obj:`GribEncoder.template` will be used if provided. Otherwise a new handle will be + created from the ``data``, ``values`` and ``metadata``. Takes precedence over the + ``data`` in forming the new GRIB message, but values are taken from the ``data`` if no + provided directly. Cannot be specified together with ``data`` and ``values``. missing_value: float - The value to use for NaNs + The value to use for NaNs. Default is 9999, which is the default missing value used by ecCode + when encoding with a template that does not have a valid "bitsPerValue" key. kwargs: dict - Additional metadata to encode + Additional metadata to encode. + + Returns + ------- + :obj:`GribEncodedData` or generator of :obj:`GribEncodedData` + The object representing the encoded GRIB message(s). When a single GRIB message is encoded, + a :obj:`GribEncodedData` object is returned. When multiple GRIB messages are encoded, a generator + of :obj:`GribEncodedData` objects is returned that can be processed in a loop. + + Notes + ----- + ``data``, ``values`` and ``template`` cannot be specified together. If more than one of them + are provided, the following rules applied: + + - ``data``, ``value``: The values will be taken from the ``values`` argument. + - ``data``, ``template``: The ``template`` will be used as a basis for encoding, but + the values will be taken from the ``data`` argument. + - ``values``, ``template``: The ``template`` will be used as a basis for encoding, but + the values will be taken from the ``values`` argument. + + When no ``data`` and ``template`` are provided, a new GRIB message will be created from the + ``values`` and ``metadata``. This is an experimental feature and only works for certain metadata + keys and the grid has to be either global lat-lon or reduced Gaussian grid. The geography is + inferred from the shape of the specified ``values``. + + Examples + -------- + See the howto examples for more details and examples of encoding GRIB data with :class:`GribEncoder`. + + - :ref:`/how-tos/target/grib_encoder.ipynb` """ if template is None: template = self.template @@ -462,6 +595,7 @@ def _encode_field(self, field, *, target=None, values=None, template=None, metad r = {} field._get_grib_context(r) + handle = r.pop("handle", None) field_values = r.pop("values", None) @@ -494,7 +628,7 @@ def _encode_field(self, field, *, target=None, values=None, template=None, metad if values is None: values = field_values - if values is None and template: + if values is None: values = field.values if template is None: @@ -538,7 +672,7 @@ def _make_message( if metadata is None: metadata = {} - compulsory = COMPULSORY + compulsory = _COMPULSORY self._update_metadata(handle, metadata, compulsory, can_infer_time) @@ -559,7 +693,7 @@ def _make_message( metadata["bitmapPresent"] = 1 if str(metadata.get("edition")) == "1": - for k in NOT_IN_EDITION_1: + for k in _NOT_IN_EDITION_1: metadata.pop(k, None) if int(metadata.get("deleteLocalDefinition", 0)): diff --git a/src/earthkit/data/field/grib/create.py b/src/earthkit/data/field/grib/create.py index fb66b438..95c2664f 100644 --- a/src/earthkit/data/field/grib/create.py +++ b/src/earthkit/data/field/grib/create.py @@ -11,7 +11,7 @@ from earthkit.data.field.handler.data import ArrayDataFieldComponentHandler -def create_grib_field(handle, data=None, cache=False, extra_keys=None): +def create_grib_field(handle, data=None, cache=False, extra_keys=None, template_field=None): from earthkit.data.core.field import Field from earthkit.data.field.grib.data import GribData from earthkit.data.field.grib.ensemble import GribEnsemble @@ -22,19 +22,9 @@ def create_grib_field(handle, data=None, cache=False, extra_keys=None): from earthkit.data.field.grib.time import GribTime from earthkit.data.field.grib.vertical import GribVertical - # from earthkit.data.specs.labels import SimpleLabels - if data is None: data = GribData(handle) - # parameter = GribParameter(handle) - # time = GribTime(handle) - # geography = GribGeography(handle) - # vertical = GribVertical(handle) - # labels = SimpleLabels() - # ensemble = GribEnsemble(handle) - # grib = GribLabels(handle) - time = GribTime(handle) geography = GribGeographyHandler(handle) vertical = GribVertical(handle) @@ -43,6 +33,10 @@ def create_grib_field(handle, data=None, cache=False, extra_keys=None): parameter = GribParameter(handle) grib = GribMetadata(handle, extra_keys=extra_keys, cache=cache) + labels = None + if template_field is not None: + labels = template_field.labels + r = Field( data=data, parameter=parameter, @@ -51,7 +45,7 @@ def create_grib_field(handle, data=None, cache=False, extra_keys=None): vertical=vertical, ensemble=ensemble, proc=proc, - # labels=labels, + labels=labels, ) r._set_private_data("metadata", grib) @@ -77,7 +71,10 @@ def _add(key, default=None): return new_field -def create_grib_field_from_buffer(buf): +def create_grib_field_from_message(buf, template_field=None): from earthkit.data.readers.grib.handle import MemoryGribHandle - return create_grib_field(MemoryGribHandle.from_message(buf), cache=False) + return create_grib_field(MemoryGribHandle.from_message(buf), template_field=template_field, cache=False) + + +create_grib_field_from_buffer = create_grib_field_from_message diff --git a/src/earthkit/data/field/grib/data.py b/src/earthkit/data/field/grib/data.py index 18914068..d331bc9b 100644 --- a/src/earthkit/data/field/grib/data.py +++ b/src/earthkit/data/field/grib/data.py @@ -28,6 +28,8 @@ def collect_keys(spec, context): class GribData(DataFieldComponentHandler): + COLLECTOR = COLLECTOR + def __init__(self, handle): self.handle = handle @@ -43,8 +45,8 @@ def get_values(self, dtype=None, copy=True, index=None): def check(self, owner): pass - def get_grib_context(self, context): - COLLECTOR.collect_keys(self, context) + # def get_grib_context(self, context): + # COLLECTOR.collect_keys(self, context) def __getstate__(self): state = {} diff --git a/src/earthkit/data/field/grib/metadata.py b/src/earthkit/data/field/grib/metadata.py index b31893c3..990689c7 100644 --- a/src/earthkit/data/field/grib/metadata.py +++ b/src/earthkit/data/field/grib/metadata.py @@ -31,6 +31,7 @@ def make(cache=None): return MetadataCacheHandler.make_default_cache() elif cache is not False and cache is not None: return cache + return None @staticmethod def make_default_cache(): @@ -192,7 +193,7 @@ def step_timedelta(self): v = self.get("step", None) return to_timedelta(v) - def message(self, deflate=False): + def message(self, deflate=False, owner=None): r"""Return a buffer containing the encoded message. Returns @@ -201,6 +202,14 @@ def message(self, deflate=False): """ if deflate: return self._handle.deflate().get_buffer() + elif owner is not None: + data = owner._components.get("data") + if not hasattr(data, "handle"): + data = owner.to_numpy(flatten=True) + handle = self._handle.clone() + handle.set_values(data) + return handle.get_buffer() + return self._handle.get_buffer() def as_namespace(self, namespace): @@ -231,10 +240,11 @@ def sync(self, owner): handle_new = v.handle if handle_new: - self._handle = handle_new for k, v in owner._components.items(): - if hasattr(v, "handle") and hasattr(v, "from_handle") and v.handle is not self.handle: + if hasattr(v, "handle") and hasattr(v, "from_handle") and v.handle is not handle_new: owner._components[k] = v.from_handle(handle_new) + return GribMetadata(handle_new, extra_keys=self.extra_keys, cache=True if self._cache is not None else None) + return self def get_extra_key(self, key, default=None): if self.extra_keys is not None: diff --git a/src/earthkit/data/field/handler/data.py b/src/earthkit/data/field/handler/data.py index e2507bde..f0f1adb9 100644 --- a/src/earthkit/data/field/handler/data.py +++ b/src/earthkit/data/field/handler/data.py @@ -184,7 +184,7 @@ def get_grib_context(self, context): """Get the GRIB context for the data component of the field.""" from earthkit.data.field.grib.data import COLLECTOR - COLLECTOR.collect_keys(self, context) + COLLECTOR.collect(self, context) @classmethod def create_empty(cls) -> "DataFieldComponentHandler": diff --git a/src/earthkit/data/targets/file_pattern.py b/src/earthkit/data/targets/file_pattern.py index 348cef04..c3a7d83c 100644 --- a/src/earthkit/data/targets/file_pattern.py +++ b/src/earthkit/data/targets/file_pattern.py @@ -88,7 +88,7 @@ def _f(self, data): def _convert(v): if v is None: return "None" - return str(v) + return v keys = [_convert(data.get(k.split(":")[0])) for k in self.split_output] path = self.filename.format(*keys) diff --git a/src/earthkit/data/utils/message.py b/src/earthkit/data/utils/message.py index 4cb55c94..b388dab5 100644 --- a/src/earthkit/data/utils/message.py +++ b/src/earthkit/data/utils/message.py @@ -198,7 +198,9 @@ def from_sample(cls, name): return cls(eccodes.codes_new_from_samples(name, cls.PRODUCT_ID), None, None) @classmethod - def _from_raw_handle(cls, handle): + def _from_raw_handle(cls, handle, clone=False): + if clone: + handle = eccodes.codes_clone(handle) return cls(handle, None, None) @classmethod @@ -213,6 +215,14 @@ def _raw_handle_from_file(cls, fp): def _raw_handle_from_message(cls, message): return eccodes.codes_new_from_message(message, cls.PRODUCT_ID) + def _raw_handle(self, clone=True): + # TODO: review if clone can be False at all. This object is managing the + # raw ecCodes handle and if clone is False, can cause issues. + if clone: + return eccodes.codes_clone(self._handle) + else: + return self._handle + # TODO: just a wrapper around the base class implementation to handle the # s,l,d qualifiers. Once these are implemented in the base class this method can # be removed. md5GridSection is also handled! diff --git a/src/earthkit/data/xr_engine/engine.py b/src/earthkit/data/xr_engine/engine.py index 97d8e397..3a443050 100644 --- a/src/earthkit/data/xr_engine/engine.py +++ b/src/earthkit/data/xr_engine/engine.py @@ -503,9 +503,9 @@ def _reference_field(self): try: if message: - from earthkit.data.field.grib.create import create_grib_field_from_buffer + from earthkit.data.field.grib.create import create_grib_field_from_message - return create_grib_field_from_buffer(message) + return create_grib_field_from_message(message) except Exception as e: raise ValueError( ( diff --git a/tests/encoders/test_grib_encoder.py b/tests/encoders/test_grib_encoder.py new file mode 100644 index 00000000..8837260e --- /dev/null +++ b/tests/encoders/test_grib_encoder.py @@ -0,0 +1,235 @@ +#!/usr/bin/env python3 + +# (C) Copyright 2020 ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. +# + +import datetime + +import numpy as np +import pytest + +from earthkit.data import create_encoder, from_source +from earthkit.data.utils.testing import earthkit_examples_file + + +@pytest.mark.parametrize("_args,_kwargs", [(("",), {}), ((), {"data": ""}), ((), {"template": ""})]) +def test_grib_encoder_field_1(_args, _kwargs): + f = from_source("file", earthkit_examples_file("test.grib")).to_fieldlist()[0] + + _args = tuple(f if v == "" else v for v in _args) + _kwargs = {k: (f if v == "" else v) for k, v in _kwargs.items()} + + encoder = create_encoder("grib") + r = encoder.encode(*_args, **_kwargs) + + assert r.to_bytes() == f.message() + + f_r = r.to_field() + assert f is not f_r + assert f.message() == f_r.message() + assert np.allclose(f.values, f_r.values) + assert f.get("parameter.variable") == f_r.get("parameter.variable") + + +@pytest.mark.parametrize("init_encoder", [None, ["template"]]) +@pytest.mark.parametrize("template_arg", ["field", "message", "handle", "raw_handle"]) +def test_grib_encoder_field_template_only(init_encoder, template_arg): + fl = from_source("file", earthkit_examples_file("test.grib")).to_fieldlist() + + template = fl[1] + + if template_arg == "message": + template_arg = template.message() + elif template_arg == "field": + template_arg = template + elif template_arg == "handle": + template_arg = template._get_grib().handle + elif template_arg == "raw_handle": + # this is the clone of the raw handle + template_arg = template._get_grib().handle._raw_handle() + else: + raise ValueError(f"Invalid template_arg: {template_arg}") + + assert template.get("parameter.variable") == "msl" + + encoder_kwargs = {} + encode_kwargs = {"template": template_arg} + if init_encoder is not None: + for key in init_encoder: + if key in encode_kwargs: + encoder_kwargs[key] = encode_kwargs.pop(key) + + encoder = create_encoder("grib", **encoder_kwargs) + r = encoder.encode(**encode_kwargs) + + assert r.to_bytes() == template.message() + + f_r = r.to_field() + assert f_r.message() is not None + assert template.message() == f_r.message() + assert np.allclose(template.values, f_r.values) + assert f_r.get("parameter.variable") == "msl" + + +@pytest.mark.parametrize("init_encoder", [None, ["template"]]) +@pytest.mark.parametrize("template_arg", ["field", "message", "handle", "raw_handle"]) +def test_grib_encoder_field_data_and_template(init_encoder, template_arg): + fl = from_source("file", earthkit_examples_file("test.grib")).to_fieldlist() + + f = fl[0] + template = fl[1] + + if template_arg == "message": + template_arg = template.message() + elif template_arg == "field": + template_arg = template + elif template_arg == "handle": + template_arg = template._get_grib().handle + elif template_arg == "raw_handle": + template_arg = template._get_grib().handle._raw_handle() + else: + raise ValueError(f"Invalid template_arg: {template_arg}") + + assert f.get("parameter.variable") == "2t" + assert template.get("parameter.variable") == "msl" + + encoder_kwargs = {} + encode_kwargs = {"template": template_arg} + if init_encoder is not None: + for key in init_encoder: + if key in encode_kwargs: + encoder_kwargs[key] = encode_kwargs.pop(key) + + encoder = create_encoder("grib", **encoder_kwargs) + r = encoder.encode(data=f, **encode_kwargs) + + assert r.to_bytes() != f.message() + + f_r = r.to_field() + assert f is not f_r + assert f_r.message() is not None + assert f.message() != f_r.message() + assert f_r.message() == r.to_bytes() + assert np.allclose(f.values, f_r.values) + assert f.get("parameter.variable") == "2t" + assert f_r.get("parameter.variable") == "msl" + + +@pytest.mark.parametrize("init_encoder", [None, ["template"]]) +@pytest.mark.parametrize("template_arg", ["field", "message", "handle", "raw_handle"]) +def test_grib_encoder_field_values_and_template(init_encoder, template_arg): + fl = from_source("file", earthkit_examples_file("test.grib")).to_fieldlist() + + f = fl[0] + vals = f.values + 1.0 + template = fl[1] + + if template_arg == "message": + template_arg = template.message() + elif template_arg == "field": + template_arg = template + elif template_arg == "handle": + template_arg = template._get_grib().handle + elif template_arg == "raw_handle": + template_arg = template._get_grib().handle._raw_handle() + else: + raise ValueError(f"Invalid template_arg: {template_arg}") + + assert f.get("parameter.variable") == "2t" + assert template.get("parameter.variable") == "msl" + + encoder_kwargs = {} + encode_kwargs = {"template": template_arg} + if init_encoder is not None: + for key in init_encoder: + if key in encode_kwargs: + encoder_kwargs[key] = encode_kwargs.pop(key) + + encoder = create_encoder("grib", **encoder_kwargs) + r = encoder.encode(values=vals, **encode_kwargs) + + assert r.to_bytes() != f.message() + + f_r = r.to_field() + assert f is not f_r + assert f_r.message() is not None + assert f.message() != f_r.message() + assert f_r.message() == r.to_bytes() + assert np.allclose(f.values + 1.0, f_r.values) + assert f.get("parameter.variable") == "2t" + assert f_r.get("parameter.variable") == "msl" + + +def test_grib_encoder_field_data_and_values_and_template(): + fl = from_source("file", earthkit_examples_file("test.grib")).to_fieldlist() + + f = fl[0] + vals = f.values + 1.0 + template = fl[1] + + encoder = create_encoder("grib") + with pytest.raises(ValueError): + encoder.encode(data=f, values=vals, template=template) + + +@pytest.mark.parametrize("init_encoder", [None, ["template", "metadata"], ["template"], ["metadata"]]) +def test_grib_encoder_field_metadata_1(init_encoder): + fl = from_source("file", earthkit_examples_file("test.grib")).to_fieldlist() + + f = fl[0] + + encoder_kwargs = {} + encode_kwargs = {"template": f, "metadata": {"date": 19980502}} + if init_encoder is not None: + for key in init_encoder: + if key in encode_kwargs: + encoder_kwargs[key] = encode_kwargs.pop(key) + + encoder = create_encoder("grib", **encoder_kwargs) + r = encoder.encode(data=f, **encode_kwargs) + + f_r = r.to_field() + assert f is not f_r + assert f.message() != f_r.message() + assert np.allclose(f.values, f_r.values) + assert f.get("time.base_datetime") == datetime.datetime(2020, 5, 13, 12, 0) + assert f_r.get("time.base_datetime") == datetime.datetime(1998, 5, 2, 12) + + +def test_grib_encoder_field_metadata_2(): + fl = from_source("file", earthkit_examples_file("test.grib")).to_fieldlist() + + f = fl[0] + + encoder = create_encoder("grib", metadata={"time": 0}) + r = encoder.encode(data=f, template=f, metadata={"date": 19980502}) + + f_r = r.to_field() + assert f is not f_r + assert f.message() != f_r.message() + assert np.allclose(f.values, f_r.values) + assert f.get("time.base_datetime") == datetime.datetime(2020, 5, 13, 12, 0) + assert f_r.get("time.base_datetime") == datetime.datetime(1998, 5, 2, 0) + + +def test_grib_encoder_field_metadata_3(): + fl = from_source("file", earthkit_examples_file("test.grib")).to_fieldlist() + + f = fl[0] + vals = f.values + 1.0 + + encoder = create_encoder("grib", metadata={"time": 0}) + r = encoder.encode(values=vals, template=f, metadata={"date": 19980502}) + + f_r = r.to_field() + assert f is not f_r + assert f.message() != f_r.message() + assert np.allclose(f.values + 1.0, f_r.values) + assert f.get("time.base_datetime") == datetime.datetime(2020, 5, 13, 12, 0) + assert f_r.get("time.base_datetime") == datetime.datetime(1998, 5, 2, 0) diff --git a/tests/grib/test_grib_message.py b/tests/grib/test_grib_message.py new file mode 100644 index 00000000..18814dfd --- /dev/null +++ b/tests/grib/test_grib_message.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python3 + +# (C) Copyright 2020 ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. +# + + +import numpy as np +import pytest +from grib_fixtures import ( + FL_FILE, # noqa: E402 + load_grib_data, # noqa: E402 +) + +from earthkit.data.readers.grib.handle import GribCodesHandle + + +@pytest.mark.parametrize("fl_type", FL_FILE) +def test_grib_message_core(fl_type): + f, _ = load_grib_data("test.grib", fl_type) + v = f[0].message() + assert len(v) == 316 + assert v[:4] == b"GRIB" + v = f[1].message() + assert len(v) == 316 + assert v[:4] == b"GRIB" + + +@pytest.mark.parametrize("fl_type", FL_FILE) +def test_grib_message_change_values(fl_type): + f, _ = load_grib_data("test.grib", fl_type) + m = f[0].message() + handle = GribCodesHandle.from_message(m) + assert handle.get("shortName") == "2t" + + # modify the values + f1 = f[0].set(values=f[0].values + 1) + m1 = f1.message() + assert m1[:4] == b"GRIB" + handle1 = GribCodesHandle.from_message(m1) + assert handle1.get("shortName") == "2t" + assert np.allclose(handle1.get_values(), f[0].values + 1) + + # the original field/handle is not modified + assert np.allclose(f[0].values, handle.get_values()) + assert np.allclose(handle.get_values(), f[0].values) + + +@pytest.mark.parametrize("fl_type", FL_FILE) +def test_grib_message_change_values_and_metadata(fl_type): + f, _ = load_grib_data("test.grib", fl_type) + m = f[0].message() + handle = GribCodesHandle.from_message(m) + assert handle.get("shortName") == "2t" + + # modify the values and the metadata. The grib handle in the field + # becomes out of sync with the field metadata, and the message cannot be generated. + f1 = f[0].set({"parameter.variable": "msl"}, values=f[0].values + 1) + m1 = f1.message() + assert m1 is None + + # the handle is no longer valid + assert f1._get_grib(strict=True) is None + + # the original handle is still kept, but we need to use strict=False to get + # it, as the metadata is now inconsistent with the handle + md = f1._get_grib(strict=False) + assert md is not None + assert md.get("shortName") == "2t" + assert np.allclose(md.handle.get_values(), f[0].values) diff --git a/tests/grib/test_grib_set.py b/tests/grib/test_grib_set.py index b55a1dca..ce373a19 100644 --- a/tests/grib/test_grib_set.py +++ b/tests/grib/test_grib_set.py @@ -20,20 +20,9 @@ @pytest.mark.parametrize("fl_type", ["file", "array", "memory"]) # @pytest.mark.parametrize("fl_type", ["file"]) -def test_grib_set_detailed(fl_type): +def test_grib_set_field_detailed_1(fl_type): ds_ori, _ = load_grib_data("test4.grib", fl_type) - # --------------- - # field - # --------------- - - # f = ds_ori[0].clone( - # param="q", - # levelist=_func1, - # mars_area=_func2, - # name=_func3, - # ) - f = ds_ori[0].set({ "parameter.variable": "q", "vertical.level": 600, @@ -42,28 +31,23 @@ def test_grib_set_detailed(fl_type): }) assert f.get("parameter.variable") == "q" - assert f.get("metadata.shortName") == "t" + assert f.get("metadata.shortName") is None assert f.get("vertical.level") == 600 - assert f.get("metadata.levelist") == 500 - assert f.get(("metadata.date", "parameter.variable")) == (20070101, "q") - assert f.get(("parameter.variable", "metadata.date")) == ("q", 20070101) + assert f.get("metadata.levelist") is None + assert f.get(("metadata.date", "parameter.variable")) == (None, "q") + assert f.get(("parameter.variable", "metadata.date")) == ("q", None) assert f.get("labels.my_shape") == (181, 360) assert f.get("labels.my_name") == "t_500" - # TODO: apply wrapped metadata to namespaces - # assert f.get(namespace="mars") == { - # "class": "ea", - # "date": 20070101, - # "domain": "g", - # "expver": "0001", - # "levelist": 500, - # "levtype": "pl", - # "param": "t", - # "step": 0, - # "stream": "oper", - # "time": 1200, - # "type": "an", - # } + f1 = f.sync() + assert f1.get("parameter.variable") == "q" + assert f1.get("metadata.shortName") == "q" + assert f1.get("vertical.level") == 600 + assert f1.get("metadata.levelist") == 600 + assert f1.get(("metadata.date", "parameter.variable")) == (20070101, "q") + assert f1.get(("parameter.variable", "metadata.date")) == ("q", 20070101) + assert f1.get("labels.my_shape") == (181, 360) + assert f1.get("labels.my_name") == "t_500" # write back to grib with temp_file() as tmp: @@ -83,9 +67,11 @@ def test_grib_set_detailed(fl_type): assert f_saved.get("vertical.level_type") == "pressure" assert f_saved.get("metadata.typeOfLevel") == "isobaricInhPa" - # --------------------- - # field - repeated use - # --------------------- + +@pytest.mark.parametrize("fl_type", ["file", "array", "memory"]) +# @pytest.mark.parametrize("fl_type", ["file"]) +def test_grib_set_field_detailed_2(fl_type): + ds_ori, _ = load_grib_data("test4.grib", fl_type) f = ds_ori[0].set({ "parameter.variable": "q", @@ -100,20 +86,31 @@ def test_grib_set_detailed(fl_type): }) assert f.get("parameter.variable") == "pt" - assert f.get("metadata.shortName") == "t" + assert f.get("metadata.shortName") is None assert f.get("vertical.level") == 800 - assert f.get("metadata.level") == 500 - assert f.get("metadata.levelist") == 500 - # TODO: this should be 800 - # assert f.metadata("levelist") == 700 - assert f.get(("metadata.date", "parameter.variable")) == (20070101, "pt") - assert f.get(("parameter.variable", "metadata.date")) == ("pt", 20070101) - # assert np.allclose(np.array(f.metadata("mars_area")), np.array([90.0, 0.0, -90.0, 359.0])) + assert f.get("metadata.level") is None + assert f.get("metadata.levelist") is None + assert f.get(("metadata.date", "parameter.variable")) == (None, "pt") + assert f.get(("parameter.variable", "metadata.date")) == ("pt", None) assert f.get("labels.my_name") == "t_500" - # --------------- - # fieldlist - # --------------- + f1 = f.sync() + assert f1.get("parameter.variable") == "pt" + assert f1.get("metadata.shortName") == "pt" + assert f1.get("vertical.level") == 800 + assert f1.get("metadata.level") == 800 + assert f1.get("metadata.levelist") == 800 + assert f1.get("metadata.typeOfLevel") == "isobaricInhPa" + assert f1.get("vertical.level_type") == "pressure" + assert f1.get(("metadata.date", "parameter.variable")) == (20070101, "pt") + assert f1.get(("parameter.variable", "metadata.date")) == ("pt", 20070101) + assert f1.get("labels.my_name") == "t_500" + + +@pytest.mark.parametrize("fl_type", ["file", "array", "memory"]) +# @pytest.mark.parametrize("fl_type", ["file"]) +def test_grib_set_fieldlist_detailed(fl_type): + ds_ori, _ = load_grib_data("test4.grib", fl_type) fields = [] for i in range(2): @@ -126,9 +123,9 @@ def test_grib_set_detailed(fl_type): ds = FieldList.from_fields(fields) assert ds.get("parameter.variable") == ["q", "q"] - assert ds.get("metadata.shortName") == ["t", "z"] + assert ds.get("metadata.shortName") == [None, None] assert ds.get("vertical.level") == [600, 600] - assert ds.get("metadata.levelist") == [500, 500] + assert ds.get("metadata.levelist") == [None, None] # write back to grib with temp_file() as tmp: @@ -167,11 +164,11 @@ def test_grib_set_combined(fl_type): }) assert f.get("parameter.variable") == "q" - assert f.get("metadata.shortName") == "t" + assert f.get("metadata.shortName") is None assert f.get("vertical.level") == 600 - assert f.get("metadata.levelist") == 500 - assert f.get(("metadata.date", "parameter.variable")) == (20070101, "q") - assert f.get(("parameter.variable", "metadata.date")) == ("q", 20070101) + assert f.get("metadata.levelist") is None + assert f.get(("metadata.date", "parameter.variable")) == (None, "q") + assert f.get(("parameter.variable", "metadata.date")) == ("q", None) assert np.allclose(f.values, vals_ori + 1) assert np.allclose(ds_ori[0].values, vals_ori) @@ -202,11 +199,11 @@ def test_grib_set_combined(fl_type): }) assert f.get("parameter.variable") == "pt" - assert f.get("metadata.shortName") == "t" + assert f.get("metadata.shortName") is None assert f.get("vertical.level") == 800 - assert f.get("metadata.levelist") == 500 - assert f.get(("metadata.date", "parameter.variable")) == (20070101, "pt") - assert f.get(("parameter.variable", "metadata.date")) == ("pt", 20070101) + assert f.get("metadata.levelist") is None + assert f.get(("metadata.date", "parameter.variable")) == (None, "pt") + assert f.get(("parameter.variable", "metadata.date")) == ("pt", None) assert np.allclose(f.values, vals_ori + 2) assert np.allclose(ds_ori[0].values, vals_ori) @@ -226,9 +223,9 @@ def test_grib_set_combined(fl_type): ds = FieldList.from_fields(fields) assert ds.get("parameter.variable") == ["q", "q"] - assert ds.get("metadata.shortName") == ["t", "z"] + assert ds.get("metadata.shortName") == [None, None] assert ds.get("vertical.level") == [600, 600] - assert ds.get("metadata.levelist") == [500, 500] + assert ds.get("metadata.levelist") == [None, None] assert np.allclose(ds[0].values, vals_ori + 1) assert np.allclose(ds[1].values, vals_ori + 2) @@ -419,3 +416,33 @@ def test_grib_set_no_args(fl_type): f = ds[0] r = f.set() assert r is f + + +@pytest.mark.parametrize("fl_type", ["file"]) +def test_grib_set_field_sync(fl_type): + ds, _ = load_grib_data("test4.grib", fl_type) + + f = ds[0] + f = f.set({ + "parameter.variable": "q", + "vertical.level": 600, + "labels.my_shape": (181, 360), + "labels.my_name": "t_500", + }) + + assert f.get("parameter.variable") == "q" + assert f.get("metadata.shortName") is None + assert f.get("vertical.level") == 600 + assert f.get("metadata.levelist") is None + assert f.get(("metadata.date", "parameter.variable")) == (None, "q") + assert f.get(("parameter.variable", "metadata.date")) == ("q", None) + + f1 = f.sync() + assert f1.get("parameter.variable") == "q" + assert f1.get("metadata.shortName") == "q" + assert f1.get("vertical.level") == 600 + assert f1.get("metadata.levelist") == 600 + assert f1.get(("metadata.date", "parameter.variable")) == (20070101, "q") + assert f1.get(("parameter.variable", "metadata.date")) == ("q", 20070101) + assert f1.get("labels.my_shape") == (181, 360) + assert f1.get("labels.my_name") == "t_500" diff --git a/tests/grib/test_grib_set_data.py b/tests/grib/test_grib_set_data.py index c3efef94..dca49485 100644 --- a/tests/grib/test_grib_set_data.py +++ b/tests/grib/test_grib_set_data.py @@ -36,6 +36,13 @@ def test_grib_set_data(fl_type): assert np.allclose(f.values, vals_ori + 1) assert np.allclose(ds_ori[0].values, vals_ori) + # # the field still stores the original GRIB metadata as private metadata, + # # which is hidden but used when writing back to GRIB + # grib_md = f._get_grib() + # assert grib_md.get("shortName") == "t" + # assert grib_md.get("levelist") == 500 + # assert grib_md.get("date") == 20070101 + # write back to grib with temp_file() as tmp: f.to_target("file", tmp) @@ -62,6 +69,13 @@ def test_grib_set_data(fl_type): assert np.allclose(f.values, vals_ori + 2) assert np.allclose(ds_ori[0].values, vals_ori) + # # the field still stores the original GRIB metadata as private metadata, + # # which is hidden but used when writing back to GRIB + # grib_md = f._get_grib() + # assert grib_md.get("shortName") == "t" + # assert grib_md.get("levelist") == 500 + # assert grib_md.get("date") == 20070101 + # --------------- # fieldlist # --------------- diff --git a/tests/grib/test_grib_set_ensemble.py b/tests/grib/test_grib_set_ensemble.py index 1f7442ac..6a3c9656 100644 --- a/tests/grib/test_grib_set_ensemble.py +++ b/tests/grib/test_grib_set_ensemble.py @@ -20,7 +20,7 @@ # @pytest.mark.parametrize("fl_type", ["file", "array", "memory"]) @pytest.mark.parametrize("fl_type", ["file"]) @pytest.mark.parametrize( - "_kwargs,ref_ori, ref_set,ref_saved", + "_kwargs,ref_ori, ref_set,ref_grib,ref_saved", [ ( {"ensemble.member": 3}, @@ -31,8 +31,12 @@ }, { "ensemble.member": "3", - "metadata.number": 1, - "metadata.level": 850, + "metadata.number": None, + "metadata.level": None, + }, + { + "number": 1, + "level": 850, }, { "ensemble.member": "3", @@ -42,7 +46,7 @@ ), ], ) -def test_grib_set_ensemble(fl_type, _kwargs, ref_ori, ref_set, ref_saved): +def test_grib_set_ensemble(fl_type, _kwargs, ref_ori, ref_set, ref_grib, ref_saved): ds_ori, _ = load_grib_data("ens_50.grib", fl_type, folder="data") f = ds_ori[0].set(**_kwargs) @@ -54,6 +58,12 @@ def test_grib_set_ensemble(fl_type, _kwargs, ref_ori, ref_set, ref_saved): for k, v in ref_ori.items(): assert ds_ori[0].get(k) == v + # the field still stores the original GRIB metadata as private metadata, + # which is hidden but used when writing back to GRIB + grib_md = f._get_grib() + for k, v in ref_grib.items(): + assert grib_md.get(k) == v + with temp_file() as tmp: f.to_target("file", tmp) f_saved = from_source("file", tmp).to_fieldlist() diff --git a/tests/grib/test_grib_set_parameter.py b/tests/grib/test_grib_set_parameter.py index 517d54b9..3ef96daa 100644 --- a/tests/grib/test_grib_set_parameter.py +++ b/tests/grib/test_grib_set_parameter.py @@ -20,16 +20,21 @@ # @pytest.mark.parametrize("fl_type", ["file", "array", "memory"]) @pytest.mark.parametrize("fl_type", ["file"]) @pytest.mark.parametrize( - "_kwargs,ref1,ref2", + "_kwargs,ref1,ref_grib,ref2", [ ( {"parameter.variable": "q", "parameter.units": "kg/kg"}, { "parameter.variable": "q", - "metadata.param": "t", - "metadata.shortName": "t", + "metadata.param": None, + "metadata.shortName": None, "parameter.units": "kg/kg", - "metadata.units": "K", + "metadata.units": None, + }, + { + "param": "t", + "shortName": "t", + "units": "K", }, { "parameter.variable": "q", @@ -45,7 +50,7 @@ # ), ], ) -def test_grib_set_parameter_1(fl_type, _kwargs, ref1, ref2): +def test_grib_set_parameter_1(fl_type, _kwargs, ref1, ref_grib, ref2): ds_ori, _ = load_grib_data("test4.grib", fl_type) f = ds_ori[0].set(**_kwargs) @@ -53,6 +58,12 @@ def test_grib_set_parameter_1(fl_type, _kwargs, ref1, ref2): for k, v in ref1.items(): assert f.get(k) == v + # the field still stores the original GRIB metadata as private metadata, + # which is hidden but used when writing back to GRIB + grib_md = f._get_grib() + for k, v in ref_grib.items(): + assert grib_md.get(k) == v + with temp_file() as tmp: f.to_target("file", tmp) f_saved = from_source("file", tmp).to_fieldlist() @@ -69,6 +80,6 @@ def test_grib_set_parameter_2( f = ds_ori[0].set({"parameter.variable": "ta", "parameter.units": "kg/kg"}) assert f.get("parameter.variable") == "ta" - assert f.get("metadata.shortName") == "t" + assert f.get("metadata.shortName") is None assert f.get("parameter.units") == "kg/kg" - assert f.get("metadata.units") == "K" + assert f.get("metadata.units") is None diff --git a/tests/grib/test_grib_set_vertical.py b/tests/grib/test_grib_set_vertical.py index c93679da..c7a2a3e8 100644 --- a/tests/grib/test_grib_set_vertical.py +++ b/tests/grib/test_grib_set_vertical.py @@ -21,7 +21,7 @@ @pytest.mark.parametrize("fl_type", ["file"]) @pytest.mark.parametrize("write_method", ["target"]) @pytest.mark.parametrize( - "_kwargs,ref1,ref2", + "_kwargs,ref1,grib_ref,ref2", [ ( { @@ -33,10 +33,16 @@ "vertical.level_type": "potential_temperature", "vertical.units": "K", "vertical.abbreviation": "pt", - "metadata.levelist": 500, - "metadata.level": 500, - "metadata.levtype": "pl", - "metadata.typeOfLevel": "isobaricInhPa", + "metadata.levelist": None, + "metadata.level": None, + "metadata.levtype": None, + "metadata.typeOfLevel": None, + }, + { + "levelist": 500, + "level": 500, + "levtype": "pl", + "typeOfLevel": "isobaricInhPa", }, { "vertical.level": 320, @@ -73,7 +79,7 @@ # ), ], ) -def test_grib_set_vertical(fl_type, write_method, _kwargs, ref1, ref2): +def test_grib_set_vertical(fl_type, write_method, _kwargs, ref1, grib_ref, ref2): ds_ori, _ = load_grib_data("test4.grib", fl_type) f = ds_ori[0].set(**_kwargs) @@ -85,6 +91,12 @@ def test_grib_set_vertical(fl_type, write_method, _kwargs, ref1, ref2): assert ds_ori[0].get("vertical.level") == 500 assert ds_ori[0].get("vertical.level_type") == "pressure" + # the field still stores the original GRIB metadata as private metadata, + # which is hidden but used when writing back to GRIB + grib_md = f._get_grib() + for k, v in grib_ref.items(): + assert grib_md.get(k) == v + with temp_file() as tmp: f.to_target("file", tmp) f_saved = from_source("file", tmp).to_fieldlist()