From d9007808f071304f45f2cf97d5c1a580bb07bf33 Mon Sep 17 00:00:00 2001 From: Sandor Kertesz Date: Mon, 20 Apr 2026 16:53:41 +0100 Subject: [PATCH 1/8] Disable grib access in modified fields --- .../how-tos/grib/grib_modify_metadata.ipynb | 1927 +++++++++-------- .../how-tos/grib/grib_modify_values.ipynb | 107 +- docs/source/how-tos/target/grib_encoder.ipynb | 27 +- docs/source/skip_api_rules.py | 5 +- src/earthkit/data/core/field.py | 215 +- src/earthkit/data/encoders/grib.py | 172 +- src/earthkit/data/field/grib/create.py | 2 +- src/earthkit/data/field/grib/data.py | 6 +- src/earthkit/data/field/grib/metadata.py | 16 +- src/earthkit/data/field/handler/data.py | 2 +- src/earthkit/data/targets/file_pattern.py | 9 +- src/earthkit/data/xr_engine/engine.py | 4 +- tests/grib/test_grib_message.py | 75 + tests/grib/test_grib_set.py | 42 +- tests/grib/test_grib_set_data.py | 14 + tests/grib/test_grib_set_ensemble.py | 18 +- tests/grib/test_grib_set_parameter.py | 25 +- tests/grib/test_grib_set_vertical.py | 24 +- 18 files changed, 1573 insertions(+), 1117 deletions(-) create mode 100644 tests/grib/test_grib_message.py diff --git a/docs/source/how-tos/grib/grib_modify_metadata.ipynb b/docs/source/how-tos/grib/grib_modify_metadata.ipynb index a6483e97d..4de20e4b2 100644 --- a/docs/source/how-tos/grib/grib_modify_metadata.ipynb +++ b/docs/source/how-tos/grib/grib_modify_metadata.ipynb @@ -1,927 +1,1006 @@ { - "cells": [ - { - "cell_type": "markdown", - "id": "5d21c14f-b851-4437-a1f1-1daac37a9e49", - "metadata": {}, - "source": [ - "# GRIB: modifying metadata" - ] - }, - { - "cell_type": "markdown", - "id": "ac126208-e64e-4658-b69a-83e9b387464b", - "metadata": {}, - "source": [ - "This notebook demonstrates how to modify the metadata in GRIB fields.\n", - "\n", - "First we read some GRIB data containing pressure level fields." - ] - }, - { - "cell_type": "markdown", - "id": "0edd73c7-9358-47cd-96b4-cf1e0d1cb720", - "metadata": {}, - "source": [] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "c11cc522-d388-4f39-a2d4-2f7b4c03517e", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "01b91013e30e499da1ed373f75523982", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "tuv_pl.grib: 0%| | 0.00/4.22k [00:00\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
0t2018-08-01 12:00:002018-08-01 12:00:000 days1000pressure0regular_ll
\n", + "" + ], + "text/plain": [ + " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", + "0 t 2018-08-01 12:00:00 2018-08-01 12:00:00 0 days \n", + "\n", + " vertical.level vertical.level_type ensemble.member geography.grid_type \n", + "0 1000 pressure 0 regular_ll " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "f = fl[0]\n", + "f.ls()" + ] + }, + { + "cell_type": "markdown", + "id": "d3f4f433-35f4-40b0-a100-d405205dd1bc", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Using set()" + ] + }, + { + "cell_type": "raw", + "id": "efd58137-2499-4929-86f0-211444c3a152", + "metadata": { + "editable": true, + "raw_mimetype": "text/restructuredtext", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "A field can be modified by using :py:meth:`~earthkit.data.core.field.Field.set`. It will create a new field with updated metadata." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "784a62cc-4534-44e4-af1a-5475ab82afdb", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
0u2018-08-01 12:00:002018-08-01 12:00:000 days500pressure0regular_ll
\n", + "
" + ], + "text/plain": [ + " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", + "0 u 2018-08-01 12:00:00 2018-08-01 12:00:00 0 days \n", + "\n", + " vertical.level vertical.level_type ensemble.member geography.grid_type \n", + "0 500 pressure 0 regular_ll " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "f1 = f.set({\"parameter.variable\": \"u\", \"parameter.units\": \"m/s\", \"vertical.level\": 500})\n", + "f1.ls()" + ] + }, + { + "cell_type": "raw", + "id": "296ef80e-510f-482c-bba3-99f353b5d4b5", + "metadata": { + "editable": true, + "raw_mimetype": "text/restructuredtext", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "Only the field component metadata keys can be used in :py:meth:`~earthkit.data.core.field.Field.set` and raw metadata keys are not allowed to use. E.g. since the field was created from GRIB data it has the raw (GRIB) metadata key ``metadata.shortName`` but we cannot set it. If you need to change the GRIB metadata see the \"Changing raw GRIB metadata\" section below." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "799fcb48-8860-442c-86dc-16054146ef19", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "t\n", + "'Key metadata.shortName cannot be set on the field.'\n" + ] + } + ], + "source": [ + "print(f.get(\"metadata.shortName\"))\n", + "try:\n", + " f.set({\"metadata.shortName\": \"u\"})\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "bbeceb32-7cde-462c-8af1-36c10302530c", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Setting time" + ] + }, + { + "cell_type": "markdown", + "id": "fb770237-20f6-453f-9fd6-6bcedea06a0d", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "Setting keys for the time field component allows using multiple formats. By default a \"datetime\" key takes a datatime.datetime object and a \"step\" key takes a datatime.timedelta object." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "77201dea-07e3-4e44-93d6-3220f249ea83", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
0t2000-12-18 18:00:002000-12-18 12:00:000 days 06:00:001000pressure0regular_ll
\n", + "
" + ], + "text/plain": [ + " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", + "0 t 2000-12-18 18:00:00 2000-12-18 12:00:00 0 days 06:00:00 \n", + "\n", + " vertical.level vertical.level_type ensemble.member geography.grid_type \n", + "0 1000 pressure 0 regular_ll " + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "f1 = f.set({\"time.base_datetime\": datetime.datetime(2000, 12, 18, 12), \"time.step\": datetime.timedelta(hours=6)})\n", + "f1.ls()" + ] + }, + { + "cell_type": "markdown", + "id": "e95e5812-d436-4d1f-9f5d-1c0e962cfa82", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "On top of that, we can also use many compatible formats, e.g:\n", + " - for datetime: ISO date strings, numpy datetime64 values, integers as yyyymmdd (the hour is assumed to be 0 in this case)\n", + " - for timedelta: integers (as hours), strings like \"6s\", \"6m\", \"6h\" (for seconds, minutes or hours)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "30a2c42b-56b9-42a4-ac22-f78d0bfe6c44", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
0t2000-12-18 18:00:002000-12-18 12:00:000 days 06:00:001000pressure0regular_ll
\n", + "
" + ], + "text/plain": [ + " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", + "0 t 2000-12-18 18:00:00 2000-12-18 12:00:00 0 days 06:00:00 \n", + "\n", + " vertical.level vertical.level_type ensemble.member geography.grid_type \n", + "0 1000 pressure 0 regular_ll " + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "f1 = f.set({\"time.base_datetime\": \"2000-12-18T12\", \"time.step\": 6})\n", + "f1.ls()" + ] + }, + { + "cell_type": "markdown", + "id": "82e0d38a-b766-46f3-b55f-68406dbb26d1", + "metadata": {}, + "source": [ + "Setting the step will automatically update the a valid time too." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "b384219d-0d52-4753-801e-39c6879754da", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
0t2018-08-01 12:00:102018-08-01 12:00:000 days 00:00:101000pressure0regular_ll
\n", + "
" + ], + "text/plain": [ + " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", + "0 t 2018-08-01 12:00:10 2018-08-01 12:00:00 0 days 00:00:10 \n", + "\n", + " vertical.level vertical.level_type ensemble.member geography.grid_type \n", + "0 1000 pressure 0 regular_ll " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "f1 = f.set({\"time.step\": \"10s\"})\n", + "f1.ls()" + ] + }, + { + "cell_type": "markdown", + "id": "cd3279a4-0a10-4ad7-abd1-864d1cabfe6c", + "metadata": {}, + "source": [ + "## Setting components" + ] + }, + { + "cell_type": "raw", + "id": "bcf1b39a-f7e3-4d62-8a52-92e8cc3aca8a", + "metadata": { + "editable": true, + "raw_mimetype": "text/restructuredtext", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "It is allowed to set individual field components with :py:meth:`~earthkit.data.core.field.Field.set`. The simplest way is to specify them as a dict. E.g. the following cell sets a new \"time\" component on the field." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "e05d2d32-6e0a-41b7-bc6f-889b01856884", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
0t2000-12-18 18:00:002000-12-18 12:00:000 days 06:00:001000pressure0regular_ll
\n", + "
" + ], + "text/plain": [ + " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", + "0 t 2000-12-18 18:00:00 2000-12-18 12:00:00 0 days 06:00:00 \n", + "\n", + " vertical.level vertical.level_type ensemble.member geography.grid_type \n", + "0 1000 pressure 0 regular_ll " + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "f1 = f.set(time={\"base_datetime\": \"2000-12-18T12\", \"step\": 6})\n", + "f1.ls()" + ] + }, + { + "cell_type": "markdown", + "id": "c43eb34e-a058-43bc-bd99-0b1b56ed0dd3", + "metadata": {}, + "source": [ + "If the dict is not fully specifying the component an exception is raised. E.g. \"step\" on it is own does not define a time component." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "758a4ee9-23a9-45ce-91d7-9af9d1c6a930", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Cannot create ForecastTime from keys: ['step'].\n" + ] + } + ], + "source": [ + "try:\n", + " f.set(time={\"step\": 6})\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "90ba2625-e203-4e09-a072-4dcdc438819b", + "metadata": {}, + "source": [ + "## Saving the modified field to disk" + ] + }, + { + "cell_type": "markdown", + "id": "4926dcde-b9ef-47b1-8c51-fb140e72a015", + "metadata": {}, + "source": [ + "We change the level and save the modified field into a GRIB file." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "f01193d1-cfbb-42fb-8f73-b11ab0b1737f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
0t2018-08-01 12:00:002018-08-01 12:00:000 days500pressure0regular_ll
\n", + "
" + ], + "text/plain": [ + " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", + "0 t 2018-08-01 12:00:00 2018-08-01 12:00:00 0 days \n", + "\n", + " vertical.level vertical.level_type ensemble.member geography.grid_type \n", + "0 500 pressure 0 regular_ll " + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "f1 = f.set({\"vertical.level\": 500})\n", + "f1.to_target(\"file\", \"_res_lev.grib\")\n", + "\n", + "# read back the data and compare the values in the first field\n", + "f1_w = ekd.from_source(\"file\", \"_res_lev.grib\").to_fieldlist()\n", + "f1_w.ls()" + ] + }, + { + "cell_type": "markdown", + "id": "e16dda68-bb85-4d0f-af9c-b814450a81c7", + "metadata": {}, + "source": [ + "## Modified fields and the associated GRIB message" + ] + }, + { + "cell_type": "raw", + "id": "56528846-a9e1-43a8-a299-e33b64aabd55", + "metadata": { + "editable": true, + "raw_mimetype": "text/restructuredtext", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "When a field was created from GRIB data the associated GRIB message can be accessed via the field with :func:`~earthkit.data.field.Field.message`." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "e403ad78-821d-4b14-8cae-4b2afef55093", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "b'GRIB\\x00\\x00\\x96\\x01\\x00\\x00'" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "f.message()[:10]" + ] + }, + { + "cell_type": "markdown", + "id": "d9aa4f53-01e1-46de-a0c1-ebb87970cf38", + "metadata": {}, + "source": [ + "Having modified the field metadata this GRIB message is not updated and we cannot access it any longer in the new field. The same is true for any raw GRIB metadata." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "8cddbcda-ea5c-4482-baa7-3535f94752a6", + "metadata": {}, + "outputs": [], + "source": [ + "f1 = f.set({\"vertical.level\": 500})\n", + "f1.message()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "8750f04b-b106-493b-96ad-2c02c413137d", + "metadata": {}, + "outputs": [], + "source": [ + "f1.get(\"metadata.shortName\")" + ] + }, + { + "cell_type": "markdown", + "id": "39666d1c-7db3-4e97-9cfc-55b3b8d71f7d", + "metadata": {}, + "source": [ + "## Changing raw GRIB metadata" + ] + }, + { + "cell_type": "raw", + "id": "89482515-0cf7-44ce-a267-c78e8bd0a6a6", + "metadata": { + "editable": true, + "raw_mimetype": "text/restructuredtext", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "Currently, changing the (raw) GRIB metadata in a field requires the usage of a :py:class:`~earthkit.data.encoders.grib.GribEncoder`. \n", + "When we call its :py:meth:`~earthkit.data.encoders.grib.GribEncoder.encode` method it will clone the underlying GRIB message, set the GRIB metadata on it and return an object that can be converted to a field." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "82e780fe-3539-4af9-ad4f-585739577dcc", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
0u2018-08-01 12:00:002018-08-01 12:00:000 days1000pressure0regular_ll
\n", + "
" + ], + "text/plain": [ + " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", + "0 u 2018-08-01 12:00:00 2018-08-01 12:00:00 0 days \n", + "\n", + " vertical.level vertical.level_type ensemble.member geography.grid_type \n", + "0 1000 pressure 0 regular_ll " + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "encoder = ekd.create_encoder(\"grib\")\n", + "r = encoder.encode(template=f, metadata={\"shortName\": \"u\"})\n", + "f1 = r.to_field()\n", + "f1.ls()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "60b11297-7499-445d-9b71-2d59709aed25", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "dev", + "language": "python", + "name": "dev" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.1" + } }, - "tags": [] - }, - "source": [ - "We will use the first field in the rest of the notebook." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "d1db47ab-8918-4112-af9e-f1a02a61eb37", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
0t2018-08-01 12:00:002018-08-01 12:00:000 days1000pressure0regular_ll
\n", - "
" - ], - "text/plain": [ - " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", - "0 t 2018-08-01 12:00:00 2018-08-01 12:00:00 0 days \n", - "\n", - " vertical.level vertical.level_type ensemble.member geography.grid_type \n", - "0 1000 pressure 0 regular_ll " - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "f = fl[0]\n", - "f.ls()" - ] - }, - { - "cell_type": "markdown", - "id": "d3f4f433-35f4-40b0-a100-d405205dd1bc", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "source": [ - "## Using set()" - ] - }, - { - "cell_type": "raw", - "id": "efd58137-2499-4929-86f0-211444c3a152", - "metadata": { - "editable": true, - "raw_mimetype": "text/restructuredtext", - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "source": [ - "A field can be modified by using :py:meth:`~earthkit.data.core.field.Field.set`. It will create a new field with updated metadata." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "784a62cc-4534-44e4-af1a-5475ab82afdb", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
0u2018-08-01 12:00:002018-08-01 12:00:000 days500pressure0regular_ll
\n", - "
" - ], - "text/plain": [ - " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", - "0 u 2018-08-01 12:00:00 2018-08-01 12:00:00 0 days \n", - "\n", - " vertical.level vertical.level_type ensemble.member geography.grid_type \n", - "0 500 pressure 0 regular_ll " - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "f1 = f.set({\"parameter.variable\": \"u\", \"parameter.units\": \"m/s\", \"vertical.level\": 500})\n", - "f1.ls()" - ] - }, - { - "cell_type": "raw", - "id": "296ef80e-510f-482c-bba3-99f353b5d4b5", - "metadata": { - "editable": true, - "raw_mimetype": "text/restructuredtext", - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "source": [ - "Only the field component metadata keys can be used in :py:meth:`~earthkit.data.core.field.Field.set` and raw metadata keys are not allowed to use. E.g. the field was created from GRIB data so it has the raw metadata key ``metadata.shortName`` but we cannot set it. If you need to change the GRIB metadata see the \"Changing raw GRIB metadata\" section below." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "799fcb48-8860-442c-86dc-16054146ef19", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "t\n", - "'Key metadata.shortName cannot be set on the field.'\n" - ] - } - ], - "source": [ - "print(f.get(\"metadata.shortName\"))\n", - "try:\n", - " f.set({\"metadata.shortName\": \"u\"})\n", - "except Exception as e:\n", - " print(e)" - ] - }, - { - "cell_type": "markdown", - "id": "bbeceb32-7cde-462c-8af1-36c10302530c", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "source": [ - "## Setting time" - ] - }, - { - "cell_type": "markdown", - "id": "fb770237-20f6-453f-9fd6-6bcedea06a0d", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "source": [ - "Setting keys for the time field component allows using multiple formats. By default a \"datetime\" key takes a datatime.datetime object and a \"step\" key takes a datatime.timedelta object." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "77201dea-07e3-4e44-93d6-3220f249ea83", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
0t2000-12-18 18:00:002000-12-18 12:00:000 days 06:00:001000pressure0regular_ll
\n", - "
" - ], - "text/plain": [ - " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", - "0 t 2000-12-18 18:00:00 2000-12-18 12:00:00 0 days 06:00:00 \n", - "\n", - " vertical.level vertical.level_type ensemble.member geography.grid_type \n", - "0 1000 pressure 0 regular_ll " - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "f1 = f.set({\"time.base_datetime\": datetime.datetime(2000, 12, 18, 12), \"time.step\": datetime.timedelta(hours=6)})\n", - "f1.ls()" - ] - }, - { - "cell_type": "markdown", - "id": "e95e5812-d436-4d1f-9f5d-1c0e962cfa82", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "source": [ - "On top of that, we can also use many compatible formats, e.g:\n", - " - for datetime: ISO date strings, numpy datetime64 values, integers as yyyymmdd (the hour is assumed to be 0 in this case)\n", - " - for timedelta: integers (as hours), strings like \"6s\", \"6m\", \"6h\" (for seconds, minutes or hours)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "30a2c42b-56b9-42a4-ac22-f78d0bfe6c44", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
0t2000-12-18 18:00:002000-12-18 12:00:000 days 06:00:001000pressure0regular_ll
\n", - "
" - ], - "text/plain": [ - " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", - "0 t 2000-12-18 18:00:00 2000-12-18 12:00:00 0 days 06:00:00 \n", - "\n", - " vertical.level vertical.level_type ensemble.member geography.grid_type \n", - "0 1000 pressure 0 regular_ll " - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "f1 = f.set({\"time.base_datetime\": \"2000-12-18T12\", \"time.step\": 6})\n", - "f1.ls()" - ] - }, - { - "cell_type": "markdown", - "id": "82e0d38a-b766-46f3-b55f-68406dbb26d1", - "metadata": {}, - "source": [ - "Setting the step will automatically update the a valid time too." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "b384219d-0d52-4753-801e-39c6879754da", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
0t2018-08-01 12:00:102018-08-01 12:00:000 days 00:00:101000pressure0regular_ll
\n", - "
" - ], - "text/plain": [ - " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", - "0 t 2018-08-01 12:00:10 2018-08-01 12:00:00 0 days 00:00:10 \n", - "\n", - " vertical.level vertical.level_type ensemble.member geography.grid_type \n", - "0 1000 pressure 0 regular_ll " - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "f1 = f.set({\"time.step\": \"10s\"})\n", - "f1.ls()" - ] - }, - { - "cell_type": "markdown", - "id": "cd3279a4-0a10-4ad7-abd1-864d1cabfe6c", - "metadata": {}, - "source": [ - "## Setting components" - ] - }, - { - "cell_type": "raw", - "id": "bcf1b39a-f7e3-4d62-8a52-92e8cc3aca8a", - "metadata": { - "editable": true, - "raw_mimetype": "text/restructuredtext", - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "source": [ - "It is allowed to set individual field components with :py:meth:`~earthkit.data.core.field.Field.set`. The simplest way is to specify them as a dict. E.g. the following cell sets a new \"time\" component on the field." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "e05d2d32-6e0a-41b7-bc6f-889b01856884", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
0t2000-12-18 18:00:002000-12-18 12:00:000 days 06:00:001000pressure0regular_ll
\n", - "
" - ], - "text/plain": [ - " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", - "0 t 2000-12-18 18:00:00 2000-12-18 12:00:00 0 days 06:00:00 \n", - "\n", - " vertical.level vertical.level_type ensemble.member geography.grid_type \n", - "0 1000 pressure 0 regular_ll " - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "f1 = f.set(time={\"base_datetime\": \"2000-12-18T12\", \"step\": 6})\n", - "f1.ls()" - ] - }, - { - "cell_type": "markdown", - "id": "c43eb34e-a058-43bc-bd99-0b1b56ed0dd3", - "metadata": {}, - "source": [ - "If the dict is not fully specifying the component an exception is raised. E.g. \"step\" on it is own does not define a time component." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "758a4ee9-23a9-45ce-91d7-9af9d1c6a930", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Cannot create ForecastTime from keys: ['step'].\n" - ] - } - ], - "source": [ - "try:\n", - " f.set(time={\"step\": 6})\n", - "except Exception as e:\n", - " print(e)" - ] - }, - { - "cell_type": "markdown", - "id": "90ba2625-e203-4e09-a072-4dcdc438819b", - "metadata": {}, - "source": [ - "## Saving the modified field to disk" - ] - }, - { - "cell_type": "markdown", - "id": "4926dcde-b9ef-47b1-8c51-fb140e72a015", - "metadata": {}, - "source": [ - "We change the level and save the modified field into a GRIB file." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "f01193d1-cfbb-42fb-8f73-b11ab0b1737f", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
0t2018-08-01 12:00:002018-08-01 12:00:000 days500pressure0regular_ll
\n", - "
" - ], - "text/plain": [ - " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", - "0 t 2018-08-01 12:00:00 2018-08-01 12:00:00 0 days \n", - "\n", - " vertical.level vertical.level_type ensemble.member geography.grid_type \n", - "0 500 pressure 0 regular_ll " - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "f1 = f.set({\"vertical.level\": 500})\n", - "f1.to_target(\"file\", \"_res_lev.grib\")\n", - "\n", - "# read back the data and compare the values in the first field\n", - "f1_w = ekd.from_source(\"file\", \"_res_lev.grib\").to_fieldlist()\n", - "f1_w.ls()" - ] - }, - { - "cell_type": "markdown", - "id": "39666d1c-7db3-4e97-9cfc-55b3b8d71f7d", - "metadata": {}, - "source": [ - "## Changing raw GRIB metadata" - ] - }, - { - "cell_type": "raw", - "id": "89482515-0cf7-44ce-a267-c78e8bd0a6a6", - "metadata": { - "editable": true, - "raw_mimetype": "text/restructuredtext", - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "source": [ - "Currently, changing the (raw) GRIB metadata in a field requires the usage of a :py:class:`~earthkit.data.encoders.grib.GribEncoder`. \n", - "When we call its :py:meth:`~earthkit.data.encoders.grib.GribEncoder.encode` method it will clone the underlying GRIB message, set the GRIB metadata on it and return an object that can be converted to a field." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "82e780fe-3539-4af9-ad4f-585739577dcc", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
0u2018-08-01 12:00:002018-08-01 12:00:000 days1000pressure0regular_ll
\n", - "
" - ], - "text/plain": [ - " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", - "0 u 2018-08-01 12:00:00 2018-08-01 12:00:00 0 days \n", - "\n", - " vertical.level vertical.level_type ensemble.member geography.grid_type \n", - "0 1000 pressure 0 regular_ll " - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "encoder = ekd.create_encoder(\"grib\")\n", - "r = encoder.encode(template=f, metadata={\"shortName\": \"u\"})\n", - "f1 = r.to_field()\n", - "f1.ls()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e16dda68-bb85-4d0f-af9c-b814450a81c7", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "dev", - "language": "python", - "name": "dev" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.13.1" - } - }, - "nbformat": 4, - "nbformat_minor": 5 + "nbformat": 4, + "nbformat_minor": 5 } diff --git a/docs/source/how-tos/grib/grib_modify_values.ipynb b/docs/source/how-tos/grib/grib_modify_values.ipynb index 2d49be611..5e444ae3f 100644 --- a/docs/source/how-tos/grib/grib_modify_values.ipynb +++ b/docs/source/how-tos/grib/grib_modify_values.ipynb @@ -39,7 +39,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "e39683874919433bbac0194d1c67e7b5", + "model_id": "3e2a5e8d5fec4ecf8ffa4b5319a2df21", "version_major": 2, "version_minor": 0 }, @@ -462,6 +462,111 @@ "t1_w = ekd.from_source(\"file\", \"_res_3.grib\").to_fieldlist()\n", "print(t[0].values.max(), t1_w[0].values.max())" ] + }, + { + "cell_type": "markdown", + "id": "09e4a417-a3be-42a0-977a-2053ab67c3e9", + "metadata": {}, + "source": [ + "## Modified fields and the associated GRIB message" + ] + }, + { + "cell_type": "raw", + "id": "5054e101-5208-4a8a-945b-67c048372c05", + "metadata": { + "editable": true, + "raw_mimetype": "text/restructuredtext", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "When a field is created from GRIB data the associated GRIB message can be accessed via the field using :func:`~earthkit.data.field.Field.message`. Having modified the field values this GRIB message is still available and the data that :func:`~earthkit.data.field.Field.message` returns will contain the updated values.\n", + "\n", + "The following example demonstrates this by creating new fields from the messages in the original and modified fields and comparing their values." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "5c405417-c51c-4fa3-8c37-847c54e36163", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(np.float64(272.5641784667969), np.float64(273.5641784667969))" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from earthkit.data.field.grib.create import create_grib_field_from_message\n", + "\n", + "f = fl[0]\n", + "f1 = f.set(values=f.values + 1)\n", + "\n", + "f_m = create_grib_field_from_message(f.message())\n", + "f1_m = create_grib_field_from_message(f1.message())\n", + "\n", + "f_m.values[0], f1_m.values[1]" + ] + }, + { + "cell_type": "markdown", + "id": "3c3ca0c5-fb30-4134-86f5-e90c12f4960d", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "If the metadata is also modified the associated GRIB message is not updated and we cannot access it any longer in the new field. The same is true for any raw GRIB metadata." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "a29ae7ac-4617-48c8-9176-98ad89c1ceec", + "metadata": {}, + "outputs": [], + "source": [ + "f = fl[0]\n", + "f1 = f.set({\"values\": f.values + 1, \"vertical.level\": 850})" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "b21a3608-693d-43ac-b851-ebd9998306c6", + "metadata": {}, + "outputs": [], + "source": [ + "f1.message()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "74cb432d-797a-4685-9415-fa2583f67e4e", + "metadata": {}, + "outputs": [], + "source": [ + "f1.get(\"metadata.shortName\")" + ] } ], "metadata": { diff --git a/docs/source/how-tos/target/grib_encoder.ipynb b/docs/source/how-tos/target/grib_encoder.ipynb index 590d8e460..9a092363d 100644 --- a/docs/source/how-tos/target/grib_encoder.ipynb +++ b/docs/source/how-tos/target/grib_encoder.ipynb @@ -60,7 +60,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "a2c2ac767de74fdb8f218c7243f31b2c", + "model_id": "4fa9e25c0fba4cc6bdc438143e62e78b", "version_major": 2, "version_minor": 0 }, @@ -152,7 +152,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 3, @@ -178,7 +178,7 @@ "tags": [] }, "source": [ - "The method to call is :meth:`GribEncoder.encode`. The ``template`` argument can be a GRIB field. In the example below :meth:`GribEncoder.encode` will simply create a copy (clone) of the underlying GRIB handle in the field." + "The method to call is :meth:`~earthkit.data.encoders.grib.GribEncoder.encode`. The ``template`` argument can be a GRIB field. In the example below :meth:`~earthkit.data.encoders.grib.GribEncoder.encode` will simply create a copy (clone) of the underlying GRIB handle in the field." ] }, { @@ -196,7 +196,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 4, @@ -247,6 +247,7 @@ } ], "source": [ + "# get message as bytes\n", "r.to_bytes()[:10]" ] }, @@ -323,6 +324,7 @@ } ], "source": [ + "# convert to field\n", "f = r.to_field()\n", "f.ls()" ] @@ -334,6 +336,7 @@ "metadata": {}, "outputs": [], "source": [ + "# write into a file\n", "with open(\"_res_encoded.grib\", \"wb\") as out:\n", " r.to_file(out)" ] @@ -924,7 +927,7 @@ { "data": { "text/plain": [ - "(np.float64(4.1302121976172), np.float64(4.130106449127197))" + "(np.float64(4.270901324854242), np.float64(4.270981311798096))" ] }, "execution_count": 15, @@ -1331,20 +1334,6 @@ "for d in encoder.encode(data=ds, step=18):\n", " print(d.to_field().ls(keys=[\"metadata.shortName\", \"metadata.step\"]))" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bd24efbb-9c27-47a0-870e-43e1e21b5cc3", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/docs/source/skip_api_rules.py b/docs/source/skip_api_rules.py index f3b5743d9..a8ed88b7b 100644 --- a/docs/source/skip_api_rules.py +++ b/docs/source/skip_api_rules.py @@ -27,6 +27,7 @@ def _ends_with_any(name, suffixes): "sources.SourceMaker", ], "data": [ + "encoders.grib.encoder", "sources.get_source", ], "method": [ @@ -43,7 +44,9 @@ def _skip_api_items(app, what, name, obj, skip, options): # if "ArrayLike" in name: # print(f"Skipping {what} {name} {obj}") - if what == "data" and ".ArrayLike" in name: + if name.endswith(".LOG"): + skip = True + elif what == "data" and ".ArrayLike" in name: skip = True else: s = _SKIP.get(what, []) diff --git a/src/earthkit/data/core/field.py b/src/earthkit/data/core/field.py index e7208a161..e4c163c6d 100644 --- a/src/earthkit/data/core/field.py +++ b/src/earthkit/data/core/field.py @@ -349,6 +349,55 @@ def from_field( v.sync(r) return r + def _from_set( + self, + data=None, + time=None, + parameter=None, + geography=None, + vertical=None, + ensemble=None, + proc=None, + labels=None, + ): + _kwargs = { + _DATA: data, + _TIME: time, + _PARAMETER: parameter, + _GEOGRAPHY: geography, + _VERTICAL: vertical, + _ENSEMBLE: ensemble, + _PROC: proc, + _LABELS: labels, + } + + has_new_metadata = False + for name in Field._COMPONENT_NAMES: + v = _kwargs[name] + if v is not None: + _kwargs[name] = _COMPONENT_MAKER.default_cls(name).from_any(v) + if name != "data" and name != "labels": + has_new_metadata = True + else: + _kwargs[name] = self._components[name] + + r = self.__class__(**_kwargs) + + # copy private data and initialize + if self._private: + r._private = self._private.copy() + if has_new_metadata: + for k in list(r._private.keys()): + v = r._private[k] + if not k.startswith("_"): + r._private.pop(k) + k = f"_{k}" + if hasattr(v, "sync"): + v = v.sync(r) + r._private[k] = v + + return r + @classmethod def from_dict(cls, d): r"""Create a Field from a dictionary. @@ -826,25 +875,26 @@ def _get_single(self, key, default=None, astype=None, raise_on_missing=False): if component: return component.get(key_name, default=default, astype=astype, raise_on_missing=raise_on_missing) elif component_name == _METADATA: - for _, private_component in self._private.items(): - if hasattr(private_component, "metadata"): - return private_component.metadata( - key_name, default=default, astype=astype, raise_on_missing=raise_on_missing - ) - else: + for p_name, p_component in self._private.items(): + if not p_name.startswith("_"): + if hasattr(p_component, "metadata"): + return p_component.metadata( + key_name, default=default, astype=astype, raise_on_missing=raise_on_missing + ) + else: - def _cast(v): - if callable(astype): - try: - return astype(v) - except Exception: - return None - return v + def _cast(v): + if callable(astype): + try: + return astype(v) + except Exception: + return None + return v - # TODO: review this - v = self.private_component.get(key) - if v is not None: - return _cast(v) + # TODO: review this + v = p_component.get(key) + if v is not None: + return _cast(v) if raise_on_missing: raise KeyError(f"Key {key} not found in field") @@ -1267,7 +1317,7 @@ def set(self, *args, **kwargs): >>> field.set({"parameter.variable": "t"}) >>> field.set({"parameter.variable": "t", "vertical.level": 1000}) - New data values can be set by using the "data" or "values" key with the new values + New data values can be set by using the "data" or "values" key with the new values as a value. For example, >>> field.set(data=new_values_array) @@ -1314,6 +1364,42 @@ def set(self, *args, **kwargs): Field A new field with the specified metadata keys set to the given values. + + Notes + ----- + When the field was created from a GRIB message, calling :meth:`set` does not modify the original + GRIB message and the new field returned by :meth:`set` is not linked to a GRIB message. In the new field + the GRIB message/handle will not be available and the GRIB specific keys in the raw metadata will not be + accessible. + + >>> import earthkit.data as ekd + >>> fl = ekd.from_source("sample", "test.grib").to_fieldlist() + >>> f = fl[0] + >>> f1 = f.set({"parameter.variable": "msl", "parameter.units": "Pa"}) + >>> f1.get("metadata.shortName") + None + >>> f1.metadata("shortName") + KeyError: 'metadata.shortName' not found in field + >>> f1.message() + None + + However, if only the "data" or "values" key is used in :meth:`set` to set new data values, the new + field returned by :meth:`set` is still linked to the original GRIB message and the GRIB specific keys + in the raw metadata are still accessible. When calling :meth:`message` on the new field, the original GRIB + message with the modified data values is returned. + + >>> import earthkit.data as ekd + >>> fl = ekd.from_source("sample", "test.grib").to_fieldlist() + >>> f = fl[0] + >>> f1 = f.set(values=f.values()+1) + >>> f1.get("metadata.shortName") + "2t" + >>> f1.metadata("shortName") + "2t" + >>> f1.message() + + + Examples -------- See the how-to examples for the :meth:`set` method in the following notebook: @@ -1372,7 +1458,7 @@ def set(self, *args, **kwargs): _components[component_name] = s if _components: - return Field.from_field(self, **_components) + return self._from_set(**_components) elif kwargs: raise ValueError("No valid keys to set in the field.") @@ -1689,15 +1775,19 @@ def tail(self, *args, **kwargs): return self.ls("tail", *args, **kwargs) def message(self): - r"""Return a buffer containing the encoded message for Fields generated from a message based format (e.g. GRIB). + r"""Return a buffer containing the encoded message associated with the field. + + Only available for fields generated from a message based format (e.g. GRIB). + Once the field metadata is modified by calling :meth:`set` the link to the original + message is lost and this method will return None. Returns ------- bytes """ - grib = self._get_grib() + grib = self._get_grib(strict=True) if grib is not None: - return grib.message() + return grib.message(owner=self) return None def _dispatch_to_fieldlist_method(self, method_name, *args, **kwargs): @@ -1710,10 +1800,15 @@ def _set_private_data(self, name, data): def _get_private_data(self, name): return self._private.get(name) - def _get_grib(self): - if self._private and "metadata" in self._private and getattr(self._private["metadata"], "NAME", None) == _GRIB: - return self._private["metadata"] - + def _get_grib(self, strict=False): + r"""Return the GRIB metadata object associated with the field if available.""" + if self._private: + names = ["metadata", "_metadata"] if not strict else ["metadata"] + for name in names: + if name in self._private: + md = self._private[name] + if getattr(md, "NAME", None) == _GRIB: + return md return None def _check(self): @@ -1744,74 +1839,6 @@ def _normalise_key_values(**kwargs): r"""Normalise the selection input for :meth:`FieldList.sel`.""" return kwargs - # def sel(self, *args, **kwargs): - # r"""Check if a field matches the given selection criteria. - - # Parameters - # ---------- - # *args: tuple - # Positional arguments specifying the filter conditions as a dict. - # Both single or multiple keys are allowed to use. When multiple filter conditions - # are specified, they are combined with a logical AND operator. Each metadata key in - # the filter conditions can specify the following type of filter values: - - # - single value:: - - # f.sel({parameter.variable: "t"}) - - # - list of values:: - - # f.sel({parameter.variable: ["u", "v"]}) - - # - slice of values (defines a closed interval, so treated as inclusive of both the start - # and stop values, unlike normal Python indexing). The following example filters the fields - # with "vertical.level" between 300 and 500 inclusively:: - - # f.sel({vertical.level: slice(300, 500)}) - - # Date and time related keys from the "time" field component are automatically normalised - # for comparison. This is also applied to the following keys from the - # raw metadata: "metadata.base_datetime", "metadata.valid_datetime" and "metadata.step_timedelta". - - # For example, when filtering by "time.valid_datetime" the following calls are equivalent: - - # >>> f.sel({ "time.valid_datetime": "2018-08-01T12:00:00"}) - # >>> f.sel({ "time.valid_datetime": "2018080112"}) - # >>> f.sel({ "time.valid_datetime": 2018080112}) - # >>> f.sel({ "time.valid_datetime": datetime(2018, 8, 1, 12, 0) }) - - # Similarly, when filtering by "time.step" the following calls are equivalent (values are assumed - # to be in hours when the unit is not specified): - - # >>> f.sel({ "time.step": "6h"}) - # >>> f.sel({ "time.step": 6}) - # >>> f.sel({ "time.step": "360m"}) - # >>> f.sel({ "time.step": timedelta(hours=6)}) - - # remapping: dict - # Define new metadata keys from existing ones to use in ``*args`` and ``**kwargs``. - # E.g. to define a new key "param_level" as the concatenated value of - # the "parameter.variable" and "vertical.level" keys use:: - - # >>> remapping={"param_level": "{parameter.variable}{vertical.level}"} - - # **kwargs: dict, optional - # Other keyword arguments specifying the filter conditions. - - # Returns - # ------- - # Field or None - # Returns the field itself if it matches the selection criteria, otherwise returns None. - # """ - # res = self._dispatch_to_fieldlist_method("sel", *args, **kwargs) - - # if res and len(res) == 1: - # return self - # return None - - # def order_by(self, *args, **kwargs): - # pass - def _unary_op(self, oper): v = oper(self.values) r = self._set_values(v) diff --git a/src/earthkit/data/encoders/grib.py b/src/earthkit/data/encoders/grib.py index 60d093e5f..7608a370d 100644 --- a/src/earthkit/data/encoders/grib.py +++ b/src/earthkit/data/encoders/grib.py @@ -19,38 +19,72 @@ LOG = logging.getLogger(__name__) -NOT_IN_EDITION_1 = ( +_NOT_IN_EDITION_1 = ( "productDefinitionTemplateNumber", "typeOfGeneratingProcess", ) -COMPULSORY = (("date", "referenceDate"), ("param", "paramId", "shortName")) +_COMPULSORY = (("date", "referenceDate"), ("param", "paramId", "shortName")) class GribEncodedData(EncodedData): + """The object representing the encoded GRIB message.""" + def __init__(self, handle): + """Initialize the GribEncodedData object. + + Parameters + ---------- + handle: GribCodesHandle + The handle to the GRIB message. + """ self.handle = handle def to_bytes(self): + """Return the GRIB message as bytes.""" return self.handle.get_buffer() def to_file(self, f): + """Write the GRIB message to a file. + + Parameters + ---------- + f: file-like object + The file-like object to write the GRIB message to. + + """ self.handle.write(f) def get(self, key, default=None): - if key: + """Get a value from the GRIB message metadata. + + Parameters + ---------- + key: str + The key of the metadata to retrieve. + default: any, optional + The default value to return if the key is not found. + + Returns + ------- + The value associated with the key, or the default value if the key is not found. + """ + if key.startswith("metadata."): + return self.handle.get(key[9:], default=default) + elif key: return self.to_field().get(key, default=default) else: raise NotImplementedError def to_field(self): + """Convert the GRIB message to a Field object.""" # from earthkit.data.readers.grib.memory import GribFieldInMemory # return GribFieldInMemory.from_buffer(self.to_bytes()) - from earthkit.data.field.grib.create import create_grib_field_from_buffer + from earthkit.data.field.grib.create import create_grib_field_from_message - return create_grib_field_from_buffer(self.to_bytes()) + return create_grib_field_from_message(self.to_bytes()) class Combined: @@ -107,7 +141,7 @@ class GribHandleMaker: """Create a new GribCodesHandle from a template, field or metadata.""" def __init__(self, template=None): - self.template = template + self.template = self.handle_from_template(template, clone=False) self._bbox = {} def make(self, values=None, metadata=None, template=None): @@ -124,7 +158,10 @@ def make(self, values=None, metadata=None, template=None): template: GribCoder A template to use for encoding """ - handle = self.handle_from_template(template) + if template is None: + template = self.template + + handle = self.handle_from_template(template, clone=True) if handle is not None: self.update_metadata_from_template(metadata, template, handle) @@ -136,28 +173,55 @@ def make(self, values=None, metadata=None, template=None): if handle is None: if values is None: raise ValueError("No values to encode") - handle = self.handle_from_metadata(values, metadata, COMPULSORY) + handle = self.handle_from_metadata(values, metadata, _COMPULSORY) return handle - def handle_from_template(self, template): + @staticmethod + def handle_from_template(template, clone=True): handle = None - if template is None: - template = self.template - if template is not None: from earthkit.data.core.field import Field + def _result(handle): + return handle.clone() if clone else handle + if isinstance(template, Field): - return self.handle_from_field(template) + return GribHandleMaker.handle_from_field(template) + # GribMetadata or GribHandle elif hasattr(template, "handle"): handle = template.handle if handle is not None: - return handle.clone() + return _result(handle) + else: + from earthkit.data.readers.grib.handle import GribCodesHandle + + if isinstance(template, GribCodesHandle): + return _result(template) + + # message buffer as bytes + elif isinstance(template, bytes): + handle = GribCodesHandle.from_message(template) + if handle is not None: + return _result(handle) + # GRIB sample as string + elif isinstance(handle, str): + handle = GribCodesHandle.from_sample(template) + if handle is not None: + return _result(handle) + # raw ecCodes handle + else: + try: + handle = GribCodesHandle._from_raw_handle(template) + if handle is not None: + return _result(handle) + except Exception: + pass return None - def handle_from_field(self, field): + @staticmethod + def handle_from_field(field): r = {} field._get_grib_context(r) handle = r.pop("handle", None) @@ -347,6 +411,9 @@ class GribEncoder(Encoder): def __init__(self, **kwargs): super().__init__(**kwargs) self._bbox = {} + # the template is stored as a handle to be used as a basis for encoding, + # (when available) + self.template = GribHandleMaker.handle_from_template(self.template, clone=False) @normalise_grib_keys @normalise("date", "date") @@ -376,24 +443,68 @@ def encode( **kwargs, ): """ + Encode new GRIB message(s). + Parameters ---------- - data: Field - The data to encode - values: numpy.ndarray - The values to encode + data: Field, FieldList, Numpy array, or None + The data to encode. Can be a :py:class:`~earthkit.data.core.field.Field`, a + :py:class:`~earthkit.data.core.fieldlist.FieldList`, a Numpy array, or None. + If None, the new GRIB message(s) will be created from the ``values``, ``metadata`` and + ``template``. Cannot be specified together with ``values`` and ``template``. + When ``template`` is also provided, it will be used as a basis for encoding, + but the values will be taken from ``data``. + values: numpy.ndarray or None + The values to encode. If None, the values will be taken from the ``data`` or + ``template``. Takes precedence over the values in ``data`` or ``template`` + if any of them are provided. If the values contain NaNs, they will be replaced with the + ``missing_value`` provided. Cannot be specified together with ``data`` and ``template``. check_nans: bool - Check for NaNs in the values and replace them with missing_value + Check for NaNs in the values and replace them with ``missing_value``. metadata: dict - Metadata to encode - template: GribCoder - A template to use for encoding - return_bytes: bool - Return the encoded message as bytes + Metadata to encode. The keys must be ecCodes GRIB keys, optionally prefixed with "metadata.". + template: Field, GribCodesHandle, bytes, str, int, None + A template to use for encoding. It can be a :py:class:`~earthkit.data.core.field.Field`, + a :py:class:`~earthkit.data.reader.grib.GribCodesHandle`, a GRIB message as + bytes, an ecCodes GRIB sample name as string, a raw ecCodes handle as an integer, or None. If None, a + the :obj:`GribEncoder.template` will be used if provided. Otherwise a new handle will be + created from the ``data``, ``values`` and ``metadata``. Takes precedence over the + ``data`` in forming the new GRIB message, but values are taken from the ``data`` if no + provided directly. Cannot be specified together with ``data`` and ``values``. missing_value: float - The value to use for NaNs + The value to use for NaNs. Default is 9999, which is the default missing value used by ecCode + when encoding with a template that does not have a valid "bitsPerValue" key. kwargs: dict - Additional metadata to encode + Additional metadata to encode. + + Returns + ------- + :obj:`GribEncodedData` or generator of :obj:`GribEncodedData` + The object representing the encoded GRIB message(s). When a single GRIB message is encoded, + a :obj:`GribEncodedData` object is returned. When multiple GRIB messages are encoded, a generator + of :obj:`GribEncodedData` objects is returned that can be processed in a loop. + + Notes + ----- + ``data``, ``values`` and ``template`` cannot be specified together. If more than one of them + are provided, the following rules applied: + + - ``data``, ``value``: The values will be taken from the ``values`` argument. + - ``data``, ``template``: The ``template`` will be used as a basis for encoding, but + the values will be taken from the ``data`` argument. + - ``values``, ``template``: The ``template`` will be used as a basis for encoding, but + the values will be taken from the ``values`` argument. + + When no ``data`` and ``template`` are provided, a new GRIB message will be created from the + ``values`` and ``metadata``. This is an experimental feature and only works for certain metadata + keys and the grid has to be either global lat-lon or reduced Gaussian grid. The geography is + inferred from the shape of the specified ``values``. + + Examples + -------- + See the howto examples for more details and examples of encoding GRIB data with :class:`GribEncoder`. + + - :ref:`howtos/target/grib_encoder.ipynb` """ if template is None: template = self.template @@ -462,6 +573,7 @@ def _encode_field(self, field, *, target=None, values=None, template=None, metad r = {} field._get_grib_context(r) + handle = r.pop("handle", None) field_values = r.pop("values", None) @@ -494,7 +606,7 @@ def _encode_field(self, field, *, target=None, values=None, template=None, metad if values is None: values = field_values - if values is None and template: + if values is None and template is None: values = field.values if template is None: @@ -538,7 +650,7 @@ def _make_message( if metadata is None: metadata = {} - compulsory = COMPULSORY + compulsory = _COMPULSORY self._update_metadata(handle, metadata, compulsory, can_infer_time) @@ -559,7 +671,7 @@ def _make_message( metadata["bitmapPresent"] = 1 if str(metadata.get("edition")) == "1": - for k in NOT_IN_EDITION_1: + for k in _NOT_IN_EDITION_1: metadata.pop(k, None) if int(metadata.get("deleteLocalDefinition", 0)): diff --git a/src/earthkit/data/field/grib/create.py b/src/earthkit/data/field/grib/create.py index fb66b4382..34f5dd2d5 100644 --- a/src/earthkit/data/field/grib/create.py +++ b/src/earthkit/data/field/grib/create.py @@ -77,7 +77,7 @@ def _add(key, default=None): return new_field -def create_grib_field_from_buffer(buf): +def create_grib_field_from_message(buf): from earthkit.data.readers.grib.handle import MemoryGribHandle return create_grib_field(MemoryGribHandle.from_message(buf), cache=False) diff --git a/src/earthkit/data/field/grib/data.py b/src/earthkit/data/field/grib/data.py index 189140686..d331bc9b5 100644 --- a/src/earthkit/data/field/grib/data.py +++ b/src/earthkit/data/field/grib/data.py @@ -28,6 +28,8 @@ def collect_keys(spec, context): class GribData(DataFieldComponentHandler): + COLLECTOR = COLLECTOR + def __init__(self, handle): self.handle = handle @@ -43,8 +45,8 @@ def get_values(self, dtype=None, copy=True, index=None): def check(self, owner): pass - def get_grib_context(self, context): - COLLECTOR.collect_keys(self, context) + # def get_grib_context(self, context): + # COLLECTOR.collect_keys(self, context) def __getstate__(self): state = {} diff --git a/src/earthkit/data/field/grib/metadata.py b/src/earthkit/data/field/grib/metadata.py index b31893c36..990689c72 100644 --- a/src/earthkit/data/field/grib/metadata.py +++ b/src/earthkit/data/field/grib/metadata.py @@ -31,6 +31,7 @@ def make(cache=None): return MetadataCacheHandler.make_default_cache() elif cache is not False and cache is not None: return cache + return None @staticmethod def make_default_cache(): @@ -192,7 +193,7 @@ def step_timedelta(self): v = self.get("step", None) return to_timedelta(v) - def message(self, deflate=False): + def message(self, deflate=False, owner=None): r"""Return a buffer containing the encoded message. Returns @@ -201,6 +202,14 @@ def message(self, deflate=False): """ if deflate: return self._handle.deflate().get_buffer() + elif owner is not None: + data = owner._components.get("data") + if not hasattr(data, "handle"): + data = owner.to_numpy(flatten=True) + handle = self._handle.clone() + handle.set_values(data) + return handle.get_buffer() + return self._handle.get_buffer() def as_namespace(self, namespace): @@ -231,10 +240,11 @@ def sync(self, owner): handle_new = v.handle if handle_new: - self._handle = handle_new for k, v in owner._components.items(): - if hasattr(v, "handle") and hasattr(v, "from_handle") and v.handle is not self.handle: + if hasattr(v, "handle") and hasattr(v, "from_handle") and v.handle is not handle_new: owner._components[k] = v.from_handle(handle_new) + return GribMetadata(handle_new, extra_keys=self.extra_keys, cache=True if self._cache is not None else None) + return self def get_extra_key(self, key, default=None): if self.extra_keys is not None: diff --git a/src/earthkit/data/field/handler/data.py b/src/earthkit/data/field/handler/data.py index e2507bde6..f0f1adb92 100644 --- a/src/earthkit/data/field/handler/data.py +++ b/src/earthkit/data/field/handler/data.py @@ -184,7 +184,7 @@ def get_grib_context(self, context): """Get the GRIB context for the data component of the field.""" from earthkit.data.field.grib.data import COLLECTOR - COLLECTOR.collect_keys(self, context) + COLLECTOR.collect(self, context) @classmethod def create_empty(cls) -> "DataFieldComponentHandler": diff --git a/src/earthkit/data/targets/file_pattern.py b/src/earthkit/data/targets/file_pattern.py index 348cef044..3b2d3c78c 100644 --- a/src/earthkit/data/targets/file_pattern.py +++ b/src/earthkit/data/targets/file_pattern.py @@ -88,9 +88,16 @@ def _f(self, data): def _convert(v): if v is None: return "None" - return str(v) + # return str(v) + return v + + # print("self.split_output", self.split_output) + # for k in self.split_output: + # print("split", k.split(":")) + # print("data", data.get(k.split(":")[0])) keys = [_convert(data.get(k.split(":")[0])) for k in self.split_output] + # print("keys", keys) path = self.filename.format(*keys) if path not in self._files: diff --git a/src/earthkit/data/xr_engine/engine.py b/src/earthkit/data/xr_engine/engine.py index 26ebf7531..a2df8db88 100644 --- a/src/earthkit/data/xr_engine/engine.py +++ b/src/earthkit/data/xr_engine/engine.py @@ -502,9 +502,9 @@ def _reference_field(self): try: if message: - from earthkit.data.field.grib.create import create_grib_field_from_buffer + from earthkit.data.field.grib.create import create_grib_field_from_message - return create_grib_field_from_buffer(message) + return create_grib_field_from_message(message) except Exception as e: raise ValueError( ( diff --git a/tests/grib/test_grib_message.py b/tests/grib/test_grib_message.py new file mode 100644 index 000000000..18814dfd3 --- /dev/null +++ b/tests/grib/test_grib_message.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python3 + +# (C) Copyright 2020 ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. +# + + +import numpy as np +import pytest +from grib_fixtures import ( + FL_FILE, # noqa: E402 + load_grib_data, # noqa: E402 +) + +from earthkit.data.readers.grib.handle import GribCodesHandle + + +@pytest.mark.parametrize("fl_type", FL_FILE) +def test_grib_message_core(fl_type): + f, _ = load_grib_data("test.grib", fl_type) + v = f[0].message() + assert len(v) == 316 + assert v[:4] == b"GRIB" + v = f[1].message() + assert len(v) == 316 + assert v[:4] == b"GRIB" + + +@pytest.mark.parametrize("fl_type", FL_FILE) +def test_grib_message_change_values(fl_type): + f, _ = load_grib_data("test.grib", fl_type) + m = f[0].message() + handle = GribCodesHandle.from_message(m) + assert handle.get("shortName") == "2t" + + # modify the values + f1 = f[0].set(values=f[0].values + 1) + m1 = f1.message() + assert m1[:4] == b"GRIB" + handle1 = GribCodesHandle.from_message(m1) + assert handle1.get("shortName") == "2t" + assert np.allclose(handle1.get_values(), f[0].values + 1) + + # the original field/handle is not modified + assert np.allclose(f[0].values, handle.get_values()) + assert np.allclose(handle.get_values(), f[0].values) + + +@pytest.mark.parametrize("fl_type", FL_FILE) +def test_grib_message_change_values_and_metadata(fl_type): + f, _ = load_grib_data("test.grib", fl_type) + m = f[0].message() + handle = GribCodesHandle.from_message(m) + assert handle.get("shortName") == "2t" + + # modify the values and the metadata. The grib handle in the field + # becomes out of sync with the field metadata, and the message cannot be generated. + f1 = f[0].set({"parameter.variable": "msl"}, values=f[0].values + 1) + m1 = f1.message() + assert m1 is None + + # the handle is no longer valid + assert f1._get_grib(strict=True) is None + + # the original handle is still kept, but we need to use strict=False to get + # it, as the metadata is now inconsistent with the handle + md = f1._get_grib(strict=False) + assert md is not None + assert md.get("shortName") == "2t" + assert np.allclose(md.handle.get_values(), f[0].values) diff --git a/tests/grib/test_grib_set.py b/tests/grib/test_grib_set.py index b55a1dcaa..092cc225d 100644 --- a/tests/grib/test_grib_set.py +++ b/tests/grib/test_grib_set.py @@ -42,11 +42,11 @@ def test_grib_set_detailed(fl_type): }) assert f.get("parameter.variable") == "q" - assert f.get("metadata.shortName") == "t" + assert f.get("metadata.shortName") is None assert f.get("vertical.level") == 600 - assert f.get("metadata.levelist") == 500 - assert f.get(("metadata.date", "parameter.variable")) == (20070101, "q") - assert f.get(("parameter.variable", "metadata.date")) == ("q", 20070101) + assert f.get("metadata.levelist") is None + assert f.get(("metadata.date", "parameter.variable")) == (None, "q") + assert f.get(("parameter.variable", "metadata.date")) == ("q", None) assert f.get("labels.my_shape") == (181, 360) assert f.get("labels.my_name") == "t_500" @@ -100,14 +100,14 @@ def test_grib_set_detailed(fl_type): }) assert f.get("parameter.variable") == "pt" - assert f.get("metadata.shortName") == "t" + assert f.get("metadata.shortName") is None assert f.get("vertical.level") == 800 - assert f.get("metadata.level") == 500 - assert f.get("metadata.levelist") == 500 + assert f.get("metadata.level") is None + assert f.get("metadata.levelist") is None # TODO: this should be 800 # assert f.metadata("levelist") == 700 - assert f.get(("metadata.date", "parameter.variable")) == (20070101, "pt") - assert f.get(("parameter.variable", "metadata.date")) == ("pt", 20070101) + assert f.get(("metadata.date", "parameter.variable")) == (None, "pt") + assert f.get(("parameter.variable", "metadata.date")) == ("pt", None) # assert np.allclose(np.array(f.metadata("mars_area")), np.array([90.0, 0.0, -90.0, 359.0])) assert f.get("labels.my_name") == "t_500" @@ -126,9 +126,9 @@ def test_grib_set_detailed(fl_type): ds = FieldList.from_fields(fields) assert ds.get("parameter.variable") == ["q", "q"] - assert ds.get("metadata.shortName") == ["t", "z"] + assert ds.get("metadata.shortName") == [None, None] assert ds.get("vertical.level") == [600, 600] - assert ds.get("metadata.levelist") == [500, 500] + assert ds.get("metadata.levelist") == [None, None] # write back to grib with temp_file() as tmp: @@ -167,11 +167,11 @@ def test_grib_set_combined(fl_type): }) assert f.get("parameter.variable") == "q" - assert f.get("metadata.shortName") == "t" + assert f.get("metadata.shortName") is None assert f.get("vertical.level") == 600 - assert f.get("metadata.levelist") == 500 - assert f.get(("metadata.date", "parameter.variable")) == (20070101, "q") - assert f.get(("parameter.variable", "metadata.date")) == ("q", 20070101) + assert f.get("metadata.levelist") is None + assert f.get(("metadata.date", "parameter.variable")) == (None, "q") + assert f.get(("parameter.variable", "metadata.date")) == ("q", None) assert np.allclose(f.values, vals_ori + 1) assert np.allclose(ds_ori[0].values, vals_ori) @@ -202,11 +202,11 @@ def test_grib_set_combined(fl_type): }) assert f.get("parameter.variable") == "pt" - assert f.get("metadata.shortName") == "t" + assert f.get("metadata.shortName") is None assert f.get("vertical.level") == 800 - assert f.get("metadata.levelist") == 500 - assert f.get(("metadata.date", "parameter.variable")) == (20070101, "pt") - assert f.get(("parameter.variable", "metadata.date")) == ("pt", 20070101) + assert f.get("metadata.levelist") is None + assert f.get(("metadata.date", "parameter.variable")) == (None, "pt") + assert f.get(("parameter.variable", "metadata.date")) == ("pt", None) assert np.allclose(f.values, vals_ori + 2) assert np.allclose(ds_ori[0].values, vals_ori) @@ -226,9 +226,9 @@ def test_grib_set_combined(fl_type): ds = FieldList.from_fields(fields) assert ds.get("parameter.variable") == ["q", "q"] - assert ds.get("metadata.shortName") == ["t", "z"] + assert ds.get("metadata.shortName") == [None, None] assert ds.get("vertical.level") == [600, 600] - assert ds.get("metadata.levelist") == [500, 500] + assert ds.get("metadata.levelist") == [None, None] assert np.allclose(ds[0].values, vals_ori + 1) assert np.allclose(ds[1].values, vals_ori + 2) diff --git a/tests/grib/test_grib_set_data.py b/tests/grib/test_grib_set_data.py index c3efef941..dca494858 100644 --- a/tests/grib/test_grib_set_data.py +++ b/tests/grib/test_grib_set_data.py @@ -36,6 +36,13 @@ def test_grib_set_data(fl_type): assert np.allclose(f.values, vals_ori + 1) assert np.allclose(ds_ori[0].values, vals_ori) + # # the field still stores the original GRIB metadata as private metadata, + # # which is hidden but used when writing back to GRIB + # grib_md = f._get_grib() + # assert grib_md.get("shortName") == "t" + # assert grib_md.get("levelist") == 500 + # assert grib_md.get("date") == 20070101 + # write back to grib with temp_file() as tmp: f.to_target("file", tmp) @@ -62,6 +69,13 @@ def test_grib_set_data(fl_type): assert np.allclose(f.values, vals_ori + 2) assert np.allclose(ds_ori[0].values, vals_ori) + # # the field still stores the original GRIB metadata as private metadata, + # # which is hidden but used when writing back to GRIB + # grib_md = f._get_grib() + # assert grib_md.get("shortName") == "t" + # assert grib_md.get("levelist") == 500 + # assert grib_md.get("date") == 20070101 + # --------------- # fieldlist # --------------- diff --git a/tests/grib/test_grib_set_ensemble.py b/tests/grib/test_grib_set_ensemble.py index 1f7442acd..6a3c96566 100644 --- a/tests/grib/test_grib_set_ensemble.py +++ b/tests/grib/test_grib_set_ensemble.py @@ -20,7 +20,7 @@ # @pytest.mark.parametrize("fl_type", ["file", "array", "memory"]) @pytest.mark.parametrize("fl_type", ["file"]) @pytest.mark.parametrize( - "_kwargs,ref_ori, ref_set,ref_saved", + "_kwargs,ref_ori, ref_set,ref_grib,ref_saved", [ ( {"ensemble.member": 3}, @@ -31,8 +31,12 @@ }, { "ensemble.member": "3", - "metadata.number": 1, - "metadata.level": 850, + "metadata.number": None, + "metadata.level": None, + }, + { + "number": 1, + "level": 850, }, { "ensemble.member": "3", @@ -42,7 +46,7 @@ ), ], ) -def test_grib_set_ensemble(fl_type, _kwargs, ref_ori, ref_set, ref_saved): +def test_grib_set_ensemble(fl_type, _kwargs, ref_ori, ref_set, ref_grib, ref_saved): ds_ori, _ = load_grib_data("ens_50.grib", fl_type, folder="data") f = ds_ori[0].set(**_kwargs) @@ -54,6 +58,12 @@ def test_grib_set_ensemble(fl_type, _kwargs, ref_ori, ref_set, ref_saved): for k, v in ref_ori.items(): assert ds_ori[0].get(k) == v + # the field still stores the original GRIB metadata as private metadata, + # which is hidden but used when writing back to GRIB + grib_md = f._get_grib() + for k, v in ref_grib.items(): + assert grib_md.get(k) == v + with temp_file() as tmp: f.to_target("file", tmp) f_saved = from_source("file", tmp).to_fieldlist() diff --git a/tests/grib/test_grib_set_parameter.py b/tests/grib/test_grib_set_parameter.py index 517d54b91..3ef96daad 100644 --- a/tests/grib/test_grib_set_parameter.py +++ b/tests/grib/test_grib_set_parameter.py @@ -20,16 +20,21 @@ # @pytest.mark.parametrize("fl_type", ["file", "array", "memory"]) @pytest.mark.parametrize("fl_type", ["file"]) @pytest.mark.parametrize( - "_kwargs,ref1,ref2", + "_kwargs,ref1,ref_grib,ref2", [ ( {"parameter.variable": "q", "parameter.units": "kg/kg"}, { "parameter.variable": "q", - "metadata.param": "t", - "metadata.shortName": "t", + "metadata.param": None, + "metadata.shortName": None, "parameter.units": "kg/kg", - "metadata.units": "K", + "metadata.units": None, + }, + { + "param": "t", + "shortName": "t", + "units": "K", }, { "parameter.variable": "q", @@ -45,7 +50,7 @@ # ), ], ) -def test_grib_set_parameter_1(fl_type, _kwargs, ref1, ref2): +def test_grib_set_parameter_1(fl_type, _kwargs, ref1, ref_grib, ref2): ds_ori, _ = load_grib_data("test4.grib", fl_type) f = ds_ori[0].set(**_kwargs) @@ -53,6 +58,12 @@ def test_grib_set_parameter_1(fl_type, _kwargs, ref1, ref2): for k, v in ref1.items(): assert f.get(k) == v + # the field still stores the original GRIB metadata as private metadata, + # which is hidden but used when writing back to GRIB + grib_md = f._get_grib() + for k, v in ref_grib.items(): + assert grib_md.get(k) == v + with temp_file() as tmp: f.to_target("file", tmp) f_saved = from_source("file", tmp).to_fieldlist() @@ -69,6 +80,6 @@ def test_grib_set_parameter_2( f = ds_ori[0].set({"parameter.variable": "ta", "parameter.units": "kg/kg"}) assert f.get("parameter.variable") == "ta" - assert f.get("metadata.shortName") == "t" + assert f.get("metadata.shortName") is None assert f.get("parameter.units") == "kg/kg" - assert f.get("metadata.units") == "K" + assert f.get("metadata.units") is None diff --git a/tests/grib/test_grib_set_vertical.py b/tests/grib/test_grib_set_vertical.py index c93679da9..c7a2a3e80 100644 --- a/tests/grib/test_grib_set_vertical.py +++ b/tests/grib/test_grib_set_vertical.py @@ -21,7 +21,7 @@ @pytest.mark.parametrize("fl_type", ["file"]) @pytest.mark.parametrize("write_method", ["target"]) @pytest.mark.parametrize( - "_kwargs,ref1,ref2", + "_kwargs,ref1,grib_ref,ref2", [ ( { @@ -33,10 +33,16 @@ "vertical.level_type": "potential_temperature", "vertical.units": "K", "vertical.abbreviation": "pt", - "metadata.levelist": 500, - "metadata.level": 500, - "metadata.levtype": "pl", - "metadata.typeOfLevel": "isobaricInhPa", + "metadata.levelist": None, + "metadata.level": None, + "metadata.levtype": None, + "metadata.typeOfLevel": None, + }, + { + "levelist": 500, + "level": 500, + "levtype": "pl", + "typeOfLevel": "isobaricInhPa", }, { "vertical.level": 320, @@ -73,7 +79,7 @@ # ), ], ) -def test_grib_set_vertical(fl_type, write_method, _kwargs, ref1, ref2): +def test_grib_set_vertical(fl_type, write_method, _kwargs, ref1, grib_ref, ref2): ds_ori, _ = load_grib_data("test4.grib", fl_type) f = ds_ori[0].set(**_kwargs) @@ -85,6 +91,12 @@ def test_grib_set_vertical(fl_type, write_method, _kwargs, ref1, ref2): assert ds_ori[0].get("vertical.level") == 500 assert ds_ori[0].get("vertical.level_type") == "pressure" + # the field still stores the original GRIB metadata as private metadata, + # which is hidden but used when writing back to GRIB + grib_md = f._get_grib() + for k, v in grib_ref.items(): + assert grib_md.get(k) == v + with temp_file() as tmp: f.to_target("file", tmp) f_saved = from_source("file", tmp).to_fieldlist() From 7133e120c2a8cfdab85c86a203b8d76e9b403e37 Mon Sep 17 00:00:00 2001 From: Sandor Kertesz Date: Tue, 21 Apr 2026 15:09:07 +0100 Subject: [PATCH 2/8] Disable grib access in modified fields --- tests/encoders/test_grib_encoder.py | 31 +++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 tests/encoders/test_grib_encoder.py diff --git a/tests/encoders/test_grib_encoder.py b/tests/encoders/test_grib_encoder.py new file mode 100644 index 000000000..18ccfde76 --- /dev/null +++ b/tests/encoders/test_grib_encoder.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python3 + +# (C) Copyright 2020 ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. +# + + +import numpy as np + +from earthkit.data import create_encoder, from_source +from earthkit.data.utils.testing import earthkit_examples_file + + +def test_grib_encoder(): + f = from_source("file", earthkit_examples_file("test.grib")).to_fieldlist()[0] + + encoder = create_encoder("grib") + r = encoder.encode(f) + + assert r.to_bytes() == f.message() + + f_r = r.to_field() + assert f is not f_r + assert f.message() == f_r.message() + assert np.allclose(f.values, f_r.values) + assert f.get("parameter.variable") == f_r.get("parameter.variable") From 5ea6fd5eaebdd1aff658963f4e33f9f82a1e0264 Mon Sep 17 00:00:00 2001 From: Sandor Kertesz Date: Tue, 21 Apr 2026 16:16:44 +0100 Subject: [PATCH 3/8] Disable grib access in modified fields --- src/earthkit/data/field/grib/create.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/earthkit/data/field/grib/create.py b/src/earthkit/data/field/grib/create.py index 34f5dd2d5..1822943cb 100644 --- a/src/earthkit/data/field/grib/create.py +++ b/src/earthkit/data/field/grib/create.py @@ -81,3 +81,6 @@ def create_grib_field_from_message(buf): from earthkit.data.readers.grib.handle import MemoryGribHandle return create_grib_field(MemoryGribHandle.from_message(buf), cache=False) + + +create_grib_field_from_buffer = create_grib_field_from_message From d30b248d73ea9501fb4eb2f70551805ba7f524e2 Mon Sep 17 00:00:00 2001 From: Sandor Kertesz Date: Wed, 22 Apr 2026 15:52:32 +0100 Subject: [PATCH 4/8] Disable grib access in modified fields --- src/earthkit/data/core/field.py | 8 +- src/earthkit/data/encoders/grib.py | 6 +- src/earthkit/data/field/grib/create.py | 22 +-- src/earthkit/data/utils/message.py | 12 +- tests/encoders/test_grib_encoder.py | 208 ++++++++++++++++++++++++- 5 files changed, 232 insertions(+), 24 deletions(-) diff --git a/src/earthkit/data/core/field.py b/src/earthkit/data/core/field.py index e4c163c6d..4260f6a60 100644 --- a/src/earthkit/data/core/field.py +++ b/src/earthkit/data/core/field.py @@ -1368,7 +1368,7 @@ def set(self, *args, **kwargs): Notes ----- When the field was created from a GRIB message, calling :meth:`set` does not modify the original - GRIB message and the new field returned by :meth:`set` is not linked to a GRIB message. In the new field + GRIB message and the new field is not linked to a GRIB message. In the new field the GRIB message/handle will not be available and the GRIB specific keys in the raw metadata will not be accessible. @@ -1383,10 +1383,10 @@ def set(self, *args, **kwargs): >>> f1.message() None - However, if only the "data" or "values" key is used in :meth:`set` to set new data values, the new + However, if only the labels or the values are set (the latter via the "data" or "values" keys), the new field returned by :meth:`set` is still linked to the original GRIB message and the GRIB specific keys - in the raw metadata are still accessible. When calling :meth:`message` on the new field, the original GRIB - message with the modified data values is returned. + in the raw metadata are still accessible. If the values were modified, when calling :meth:`message` on the + new field, the original GRIB message updated with the modified data values is returned. >>> import earthkit.data as ekd >>> fl = ekd.from_source("sample", "test.grib").to_fieldlist() diff --git a/src/earthkit/data/encoders/grib.py b/src/earthkit/data/encoders/grib.py index 7608a370d..03cdd761a 100644 --- a/src/earthkit/data/encoders/grib.py +++ b/src/earthkit/data/encoders/grib.py @@ -205,12 +205,12 @@ def _result(handle): if handle is not None: return _result(handle) # GRIB sample as string - elif isinstance(handle, str): + elif isinstance(template, str): handle = GribCodesHandle.from_sample(template) if handle is not None: return _result(handle) # raw ecCodes handle - else: + elif isinstance(template, int): try: handle = GribCodesHandle._from_raw_handle(template) if handle is not None: @@ -606,7 +606,7 @@ def _encode_field(self, field, *, target=None, values=None, template=None, metad if values is None: values = field_values - if values is None and template is None: + if values is None: values = field.values if template is None: diff --git a/src/earthkit/data/field/grib/create.py b/src/earthkit/data/field/grib/create.py index 1822943cb..95c2664f3 100644 --- a/src/earthkit/data/field/grib/create.py +++ b/src/earthkit/data/field/grib/create.py @@ -11,7 +11,7 @@ from earthkit.data.field.handler.data import ArrayDataFieldComponentHandler -def create_grib_field(handle, data=None, cache=False, extra_keys=None): +def create_grib_field(handle, data=None, cache=False, extra_keys=None, template_field=None): from earthkit.data.core.field import Field from earthkit.data.field.grib.data import GribData from earthkit.data.field.grib.ensemble import GribEnsemble @@ -22,19 +22,9 @@ def create_grib_field(handle, data=None, cache=False, extra_keys=None): from earthkit.data.field.grib.time import GribTime from earthkit.data.field.grib.vertical import GribVertical - # from earthkit.data.specs.labels import SimpleLabels - if data is None: data = GribData(handle) - # parameter = GribParameter(handle) - # time = GribTime(handle) - # geography = GribGeography(handle) - # vertical = GribVertical(handle) - # labels = SimpleLabels() - # ensemble = GribEnsemble(handle) - # grib = GribLabels(handle) - time = GribTime(handle) geography = GribGeographyHandler(handle) vertical = GribVertical(handle) @@ -43,6 +33,10 @@ def create_grib_field(handle, data=None, cache=False, extra_keys=None): parameter = GribParameter(handle) grib = GribMetadata(handle, extra_keys=extra_keys, cache=cache) + labels = None + if template_field is not None: + labels = template_field.labels + r = Field( data=data, parameter=parameter, @@ -51,7 +45,7 @@ def create_grib_field(handle, data=None, cache=False, extra_keys=None): vertical=vertical, ensemble=ensemble, proc=proc, - # labels=labels, + labels=labels, ) r._set_private_data("metadata", grib) @@ -77,10 +71,10 @@ def _add(key, default=None): return new_field -def create_grib_field_from_message(buf): +def create_grib_field_from_message(buf, template_field=None): from earthkit.data.readers.grib.handle import MemoryGribHandle - return create_grib_field(MemoryGribHandle.from_message(buf), cache=False) + return create_grib_field(MemoryGribHandle.from_message(buf), template_field=template_field, cache=False) create_grib_field_from_buffer = create_grib_field_from_message diff --git a/src/earthkit/data/utils/message.py b/src/earthkit/data/utils/message.py index 4cb55c945..b388dab56 100644 --- a/src/earthkit/data/utils/message.py +++ b/src/earthkit/data/utils/message.py @@ -198,7 +198,9 @@ def from_sample(cls, name): return cls(eccodes.codes_new_from_samples(name, cls.PRODUCT_ID), None, None) @classmethod - def _from_raw_handle(cls, handle): + def _from_raw_handle(cls, handle, clone=False): + if clone: + handle = eccodes.codes_clone(handle) return cls(handle, None, None) @classmethod @@ -213,6 +215,14 @@ def _raw_handle_from_file(cls, fp): def _raw_handle_from_message(cls, message): return eccodes.codes_new_from_message(message, cls.PRODUCT_ID) + def _raw_handle(self, clone=True): + # TODO: review if clone can be False at all. This object is managing the + # raw ecCodes handle and if clone is False, can cause issues. + if clone: + return eccodes.codes_clone(self._handle) + else: + return self._handle + # TODO: just a wrapper around the base class implementation to handle the # s,l,d qualifiers. Once these are implemented in the base class this method can # be removed. md5GridSection is also handled! diff --git a/tests/encoders/test_grib_encoder.py b/tests/encoders/test_grib_encoder.py index 18ccfde76..8837260e2 100644 --- a/tests/encoders/test_grib_encoder.py +++ b/tests/encoders/test_grib_encoder.py @@ -9,18 +9,24 @@ # nor does it submit to any jurisdiction. # +import datetime import numpy as np +import pytest from earthkit.data import create_encoder, from_source from earthkit.data.utils.testing import earthkit_examples_file -def test_grib_encoder(): +@pytest.mark.parametrize("_args,_kwargs", [(("",), {}), ((), {"data": ""}), ((), {"template": ""})]) +def test_grib_encoder_field_1(_args, _kwargs): f = from_source("file", earthkit_examples_file("test.grib")).to_fieldlist()[0] + _args = tuple(f if v == "" else v for v in _args) + _kwargs = {k: (f if v == "" else v) for k, v in _kwargs.items()} + encoder = create_encoder("grib") - r = encoder.encode(f) + r = encoder.encode(*_args, **_kwargs) assert r.to_bytes() == f.message() @@ -29,3 +35,201 @@ def test_grib_encoder(): assert f.message() == f_r.message() assert np.allclose(f.values, f_r.values) assert f.get("parameter.variable") == f_r.get("parameter.variable") + + +@pytest.mark.parametrize("init_encoder", [None, ["template"]]) +@pytest.mark.parametrize("template_arg", ["field", "message", "handle", "raw_handle"]) +def test_grib_encoder_field_template_only(init_encoder, template_arg): + fl = from_source("file", earthkit_examples_file("test.grib")).to_fieldlist() + + template = fl[1] + + if template_arg == "message": + template_arg = template.message() + elif template_arg == "field": + template_arg = template + elif template_arg == "handle": + template_arg = template._get_grib().handle + elif template_arg == "raw_handle": + # this is the clone of the raw handle + template_arg = template._get_grib().handle._raw_handle() + else: + raise ValueError(f"Invalid template_arg: {template_arg}") + + assert template.get("parameter.variable") == "msl" + + encoder_kwargs = {} + encode_kwargs = {"template": template_arg} + if init_encoder is not None: + for key in init_encoder: + if key in encode_kwargs: + encoder_kwargs[key] = encode_kwargs.pop(key) + + encoder = create_encoder("grib", **encoder_kwargs) + r = encoder.encode(**encode_kwargs) + + assert r.to_bytes() == template.message() + + f_r = r.to_field() + assert f_r.message() is not None + assert template.message() == f_r.message() + assert np.allclose(template.values, f_r.values) + assert f_r.get("parameter.variable") == "msl" + + +@pytest.mark.parametrize("init_encoder", [None, ["template"]]) +@pytest.mark.parametrize("template_arg", ["field", "message", "handle", "raw_handle"]) +def test_grib_encoder_field_data_and_template(init_encoder, template_arg): + fl = from_source("file", earthkit_examples_file("test.grib")).to_fieldlist() + + f = fl[0] + template = fl[1] + + if template_arg == "message": + template_arg = template.message() + elif template_arg == "field": + template_arg = template + elif template_arg == "handle": + template_arg = template._get_grib().handle + elif template_arg == "raw_handle": + template_arg = template._get_grib().handle._raw_handle() + else: + raise ValueError(f"Invalid template_arg: {template_arg}") + + assert f.get("parameter.variable") == "2t" + assert template.get("parameter.variable") == "msl" + + encoder_kwargs = {} + encode_kwargs = {"template": template_arg} + if init_encoder is not None: + for key in init_encoder: + if key in encode_kwargs: + encoder_kwargs[key] = encode_kwargs.pop(key) + + encoder = create_encoder("grib", **encoder_kwargs) + r = encoder.encode(data=f, **encode_kwargs) + + assert r.to_bytes() != f.message() + + f_r = r.to_field() + assert f is not f_r + assert f_r.message() is not None + assert f.message() != f_r.message() + assert f_r.message() == r.to_bytes() + assert np.allclose(f.values, f_r.values) + assert f.get("parameter.variable") == "2t" + assert f_r.get("parameter.variable") == "msl" + + +@pytest.mark.parametrize("init_encoder", [None, ["template"]]) +@pytest.mark.parametrize("template_arg", ["field", "message", "handle", "raw_handle"]) +def test_grib_encoder_field_values_and_template(init_encoder, template_arg): + fl = from_source("file", earthkit_examples_file("test.grib")).to_fieldlist() + + f = fl[0] + vals = f.values + 1.0 + template = fl[1] + + if template_arg == "message": + template_arg = template.message() + elif template_arg == "field": + template_arg = template + elif template_arg == "handle": + template_arg = template._get_grib().handle + elif template_arg == "raw_handle": + template_arg = template._get_grib().handle._raw_handle() + else: + raise ValueError(f"Invalid template_arg: {template_arg}") + + assert f.get("parameter.variable") == "2t" + assert template.get("parameter.variable") == "msl" + + encoder_kwargs = {} + encode_kwargs = {"template": template_arg} + if init_encoder is not None: + for key in init_encoder: + if key in encode_kwargs: + encoder_kwargs[key] = encode_kwargs.pop(key) + + encoder = create_encoder("grib", **encoder_kwargs) + r = encoder.encode(values=vals, **encode_kwargs) + + assert r.to_bytes() != f.message() + + f_r = r.to_field() + assert f is not f_r + assert f_r.message() is not None + assert f.message() != f_r.message() + assert f_r.message() == r.to_bytes() + assert np.allclose(f.values + 1.0, f_r.values) + assert f.get("parameter.variable") == "2t" + assert f_r.get("parameter.variable") == "msl" + + +def test_grib_encoder_field_data_and_values_and_template(): + fl = from_source("file", earthkit_examples_file("test.grib")).to_fieldlist() + + f = fl[0] + vals = f.values + 1.0 + template = fl[1] + + encoder = create_encoder("grib") + with pytest.raises(ValueError): + encoder.encode(data=f, values=vals, template=template) + + +@pytest.mark.parametrize("init_encoder", [None, ["template", "metadata"], ["template"], ["metadata"]]) +def test_grib_encoder_field_metadata_1(init_encoder): + fl = from_source("file", earthkit_examples_file("test.grib")).to_fieldlist() + + f = fl[0] + + encoder_kwargs = {} + encode_kwargs = {"template": f, "metadata": {"date": 19980502}} + if init_encoder is not None: + for key in init_encoder: + if key in encode_kwargs: + encoder_kwargs[key] = encode_kwargs.pop(key) + + encoder = create_encoder("grib", **encoder_kwargs) + r = encoder.encode(data=f, **encode_kwargs) + + f_r = r.to_field() + assert f is not f_r + assert f.message() != f_r.message() + assert np.allclose(f.values, f_r.values) + assert f.get("time.base_datetime") == datetime.datetime(2020, 5, 13, 12, 0) + assert f_r.get("time.base_datetime") == datetime.datetime(1998, 5, 2, 12) + + +def test_grib_encoder_field_metadata_2(): + fl = from_source("file", earthkit_examples_file("test.grib")).to_fieldlist() + + f = fl[0] + + encoder = create_encoder("grib", metadata={"time": 0}) + r = encoder.encode(data=f, template=f, metadata={"date": 19980502}) + + f_r = r.to_field() + assert f is not f_r + assert f.message() != f_r.message() + assert np.allclose(f.values, f_r.values) + assert f.get("time.base_datetime") == datetime.datetime(2020, 5, 13, 12, 0) + assert f_r.get("time.base_datetime") == datetime.datetime(1998, 5, 2, 0) + + +def test_grib_encoder_field_metadata_3(): + fl = from_source("file", earthkit_examples_file("test.grib")).to_fieldlist() + + f = fl[0] + vals = f.values + 1.0 + + encoder = create_encoder("grib", metadata={"time": 0}) + r = encoder.encode(values=vals, template=f, metadata={"date": 19980502}) + + f_r = r.to_field() + assert f is not f_r + assert f.message() != f_r.message() + assert np.allclose(f.values + 1.0, f_r.values) + assert f.get("time.base_datetime") == datetime.datetime(2020, 5, 13, 12, 0) + assert f_r.get("time.base_datetime") == datetime.datetime(1998, 5, 2, 0) From 132ef308c96ed82c7a15f1495d2ea91cad272bed Mon Sep 17 00:00:00 2001 From: Sandor Kertesz Date: Thu, 23 Apr 2026 11:06:35 +0100 Subject: [PATCH 5/8] Disable grib access in modified fields --- .../how-tos/grib/grib_modify_metadata.ipynb | 2136 +++++++++-------- .../how-tos/grib/grib_modify_values.ipynb | 2 +- docs/source/how-tos/target/grib_encoder.ipynb | 8 +- .../target/grib_to_file_pattern_target.ipynb | 2 +- .../how-tos/target/grib_to_file_target.ipynb | 2 +- src/earthkit/data/core/field.py | 38 + src/earthkit/data/encoders/grib.py | 41 +- src/earthkit/data/targets/file_pattern.py | 7 - 8 files changed, 1180 insertions(+), 1056 deletions(-) diff --git a/docs/source/how-tos/grib/grib_modify_metadata.ipynb b/docs/source/how-tos/grib/grib_modify_metadata.ipynb index 4de20e4b2..ac900ea3c 100644 --- a/docs/source/how-tos/grib/grib_modify_metadata.ipynb +++ b/docs/source/how-tos/grib/grib_modify_metadata.ipynb @@ -1,1006 +1,1136 @@ { - "cells": [ - { - "cell_type": "markdown", - "id": "5d21c14f-b851-4437-a1f1-1daac37a9e49", - "metadata": {}, - "source": [ - "# GRIB: modifying metadata" - ] - }, - { - "cell_type": "markdown", - "id": "ac126208-e64e-4658-b69a-83e9b387464b", - "metadata": {}, - "source": [ - "This notebook demonstrates how to modify the metadata in GRIB fields.\n", - "\n", - "First we read some GRIB data containing pressure level fields." - ] - }, - { - "cell_type": "markdown", - "id": "0edd73c7-9358-47cd-96b4-cf1e0d1cb720", - "metadata": {}, - "source": [] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "c11cc522-d388-4f39-a2d4-2f7b4c03517e", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "3ff5ce581718401b922612b489722de1", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "tuv_pl.grib: 0%| | 0.00/4.22k [00:00\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
0t2018-08-01 12:00:002018-08-01 12:00:000 days1000pressure0regular_ll
\n", - "" - ], - "text/plain": [ - " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", - "0 t 2018-08-01 12:00:00 2018-08-01 12:00:00 0 days \n", - "\n", - " vertical.level vertical.level_type ensemble.member geography.grid_type \n", - "0 1000 pressure 0 regular_ll " - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "f = fl[0]\n", - "f.ls()" - ] - }, - { - "cell_type": "markdown", - "id": "d3f4f433-35f4-40b0-a100-d405205dd1bc", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "source": [ - "## Using set()" - ] - }, - { - "cell_type": "raw", - "id": "efd58137-2499-4929-86f0-211444c3a152", - "metadata": { - "editable": true, - "raw_mimetype": "text/restructuredtext", - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "source": [ - "A field can be modified by using :py:meth:`~earthkit.data.core.field.Field.set`. It will create a new field with updated metadata." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "784a62cc-4534-44e4-af1a-5475ab82afdb", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
0u2018-08-01 12:00:002018-08-01 12:00:000 days500pressure0regular_ll
\n", - "
" - ], - "text/plain": [ - " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", - "0 u 2018-08-01 12:00:00 2018-08-01 12:00:00 0 days \n", - "\n", - " vertical.level vertical.level_type ensemble.member geography.grid_type \n", - "0 500 pressure 0 regular_ll " - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "f1 = f.set({\"parameter.variable\": \"u\", \"parameter.units\": \"m/s\", \"vertical.level\": 500})\n", - "f1.ls()" - ] - }, - { - "cell_type": "raw", - "id": "296ef80e-510f-482c-bba3-99f353b5d4b5", - "metadata": { - "editable": true, - "raw_mimetype": "text/restructuredtext", - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "source": [ - "Only the field component metadata keys can be used in :py:meth:`~earthkit.data.core.field.Field.set` and raw metadata keys are not allowed to use. E.g. since the field was created from GRIB data it has the raw (GRIB) metadata key ``metadata.shortName`` but we cannot set it. If you need to change the GRIB metadata see the \"Changing raw GRIB metadata\" section below." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "799fcb48-8860-442c-86dc-16054146ef19", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "t\n", - "'Key metadata.shortName cannot be set on the field.'\n" - ] - } - ], - "source": [ - "print(f.get(\"metadata.shortName\"))\n", - "try:\n", - " f.set({\"metadata.shortName\": \"u\"})\n", - "except Exception as e:\n", - " print(e)" - ] - }, - { - "cell_type": "markdown", - "id": "bbeceb32-7cde-462c-8af1-36c10302530c", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "source": [ - "## Setting time" - ] - }, - { - "cell_type": "markdown", - "id": "fb770237-20f6-453f-9fd6-6bcedea06a0d", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "source": [ - "Setting keys for the time field component allows using multiple formats. By default a \"datetime\" key takes a datatime.datetime object and a \"step\" key takes a datatime.timedelta object." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "77201dea-07e3-4e44-93d6-3220f249ea83", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
0t2000-12-18 18:00:002000-12-18 12:00:000 days 06:00:001000pressure0regular_ll
\n", - "
" - ], - "text/plain": [ - " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", - "0 t 2000-12-18 18:00:00 2000-12-18 12:00:00 0 days 06:00:00 \n", - "\n", - " vertical.level vertical.level_type ensemble.member geography.grid_type \n", - "0 1000 pressure 0 regular_ll " - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "f1 = f.set({\"time.base_datetime\": datetime.datetime(2000, 12, 18, 12), \"time.step\": datetime.timedelta(hours=6)})\n", - "f1.ls()" - ] - }, - { - "cell_type": "markdown", - "id": "e95e5812-d436-4d1f-9f5d-1c0e962cfa82", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "source": [ - "On top of that, we can also use many compatible formats, e.g:\n", - " - for datetime: ISO date strings, numpy datetime64 values, integers as yyyymmdd (the hour is assumed to be 0 in this case)\n", - " - for timedelta: integers (as hours), strings like \"6s\", \"6m\", \"6h\" (for seconds, minutes or hours)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "30a2c42b-56b9-42a4-ac22-f78d0bfe6c44", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
0t2000-12-18 18:00:002000-12-18 12:00:000 days 06:00:001000pressure0regular_ll
\n", - "
" - ], - "text/plain": [ - " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", - "0 t 2000-12-18 18:00:00 2000-12-18 12:00:00 0 days 06:00:00 \n", - "\n", - " vertical.level vertical.level_type ensemble.member geography.grid_type \n", - "0 1000 pressure 0 regular_ll " - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "f1 = f.set({\"time.base_datetime\": \"2000-12-18T12\", \"time.step\": 6})\n", - "f1.ls()" - ] - }, - { - "cell_type": "markdown", - "id": "82e0d38a-b766-46f3-b55f-68406dbb26d1", - "metadata": {}, - "source": [ - "Setting the step will automatically update the a valid time too." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "b384219d-0d52-4753-801e-39c6879754da", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
0t2018-08-01 12:00:102018-08-01 12:00:000 days 00:00:101000pressure0regular_ll
\n", - "
" - ], - "text/plain": [ - " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", - "0 t 2018-08-01 12:00:10 2018-08-01 12:00:00 0 days 00:00:10 \n", - "\n", - " vertical.level vertical.level_type ensemble.member geography.grid_type \n", - "0 1000 pressure 0 regular_ll " - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "f1 = f.set({\"time.step\": \"10s\"})\n", - "f1.ls()" - ] - }, - { - "cell_type": "markdown", - "id": "cd3279a4-0a10-4ad7-abd1-864d1cabfe6c", - "metadata": {}, - "source": [ - "## Setting components" - ] - }, - { - "cell_type": "raw", - "id": "bcf1b39a-f7e3-4d62-8a52-92e8cc3aca8a", - "metadata": { - "editable": true, - "raw_mimetype": "text/restructuredtext", - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "source": [ - "It is allowed to set individual field components with :py:meth:`~earthkit.data.core.field.Field.set`. The simplest way is to specify them as a dict. E.g. the following cell sets a new \"time\" component on the field." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "e05d2d32-6e0a-41b7-bc6f-889b01856884", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
0t2000-12-18 18:00:002000-12-18 12:00:000 days 06:00:001000pressure0regular_ll
\n", - "
" - ], - "text/plain": [ - " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", - "0 t 2000-12-18 18:00:00 2000-12-18 12:00:00 0 days 06:00:00 \n", - "\n", - " vertical.level vertical.level_type ensemble.member geography.grid_type \n", - "0 1000 pressure 0 regular_ll " - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "f1 = f.set(time={\"base_datetime\": \"2000-12-18T12\", \"step\": 6})\n", - "f1.ls()" - ] - }, - { - "cell_type": "markdown", - "id": "c43eb34e-a058-43bc-bd99-0b1b56ed0dd3", - "metadata": {}, - "source": [ - "If the dict is not fully specifying the component an exception is raised. E.g. \"step\" on it is own does not define a time component." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "758a4ee9-23a9-45ce-91d7-9af9d1c6a930", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Cannot create ForecastTime from keys: ['step'].\n" - ] - } - ], - "source": [ - "try:\n", - " f.set(time={\"step\": 6})\n", - "except Exception as e:\n", - " print(e)" - ] - }, - { - "cell_type": "markdown", - "id": "90ba2625-e203-4e09-a072-4dcdc438819b", - "metadata": {}, - "source": [ - "## Saving the modified field to disk" - ] - }, - { - "cell_type": "markdown", - "id": "4926dcde-b9ef-47b1-8c51-fb140e72a015", - "metadata": {}, - "source": [ - "We change the level and save the modified field into a GRIB file." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "f01193d1-cfbb-42fb-8f73-b11ab0b1737f", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
0t2018-08-01 12:00:002018-08-01 12:00:000 days500pressure0regular_ll
\n", - "
" - ], - "text/plain": [ - " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", - "0 t 2018-08-01 12:00:00 2018-08-01 12:00:00 0 days \n", - "\n", - " vertical.level vertical.level_type ensemble.member geography.grid_type \n", - "0 500 pressure 0 regular_ll " - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "f1 = f.set({\"vertical.level\": 500})\n", - "f1.to_target(\"file\", \"_res_lev.grib\")\n", - "\n", - "# read back the data and compare the values in the first field\n", - "f1_w = ekd.from_source(\"file\", \"_res_lev.grib\").to_fieldlist()\n", - "f1_w.ls()" - ] - }, - { - "cell_type": "markdown", - "id": "e16dda68-bb85-4d0f-af9c-b814450a81c7", - "metadata": {}, - "source": [ - "## Modified fields and the associated GRIB message" - ] - }, - { - "cell_type": "raw", - "id": "56528846-a9e1-43a8-a299-e33b64aabd55", - "metadata": { - "editable": true, - "raw_mimetype": "text/restructuredtext", - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "source": [ - "When a field was created from GRIB data the associated GRIB message can be accessed via the field with :func:`~earthkit.data.field.Field.message`." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "e403ad78-821d-4b14-8cae-4b2afef55093", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "b'GRIB\\x00\\x00\\x96\\x01\\x00\\x00'" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "f.message()[:10]" - ] - }, - { - "cell_type": "markdown", - "id": "d9aa4f53-01e1-46de-a0c1-ebb87970cf38", - "metadata": {}, - "source": [ - "Having modified the field metadata this GRIB message is not updated and we cannot access it any longer in the new field. The same is true for any raw GRIB metadata." - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "8cddbcda-ea5c-4482-baa7-3535f94752a6", - "metadata": {}, - "outputs": [], - "source": [ - "f1 = f.set({\"vertical.level\": 500})\n", - "f1.message()" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "8750f04b-b106-493b-96ad-2c02c413137d", - "metadata": {}, - "outputs": [], - "source": [ - "f1.get(\"metadata.shortName\")" - ] - }, - { - "cell_type": "markdown", - "id": "39666d1c-7db3-4e97-9cfc-55b3b8d71f7d", - "metadata": {}, - "source": [ - "## Changing raw GRIB metadata" - ] - }, - { - "cell_type": "raw", - "id": "89482515-0cf7-44ce-a267-c78e8bd0a6a6", - "metadata": { - "editable": true, - "raw_mimetype": "text/restructuredtext", - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "source": [ - "Currently, changing the (raw) GRIB metadata in a field requires the usage of a :py:class:`~earthkit.data.encoders.grib.GribEncoder`. \n", - "When we call its :py:meth:`~earthkit.data.encoders.grib.GribEncoder.encode` method it will clone the underlying GRIB message, set the GRIB metadata on it and return an object that can be converted to a field." - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "82e780fe-3539-4af9-ad4f-585739577dcc", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
0u2018-08-01 12:00:002018-08-01 12:00:000 days1000pressure0regular_ll
\n", - "
" - ], - "text/plain": [ - " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", - "0 u 2018-08-01 12:00:00 2018-08-01 12:00:00 0 days \n", - "\n", - " vertical.level vertical.level_type ensemble.member geography.grid_type \n", - "0 1000 pressure 0 regular_ll " - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "encoder = ekd.create_encoder(\"grib\")\n", - "r = encoder.encode(template=f, metadata={\"shortName\": \"u\"})\n", - "f1 = r.to_field()\n", - "f1.ls()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "60b11297-7499-445d-9b71-2d59709aed25", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "dev", - "language": "python", - "name": "dev" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.13.1" - } + "cells": [ + { + "cell_type": "markdown", + "id": "5d21c14f-b851-4437-a1f1-1daac37a9e49", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" }, - "nbformat": 4, - "nbformat_minor": 5 + "tags": [] + }, + "source": [ + "# GRIB: modifying metadata" + ] + }, + { + "cell_type": "markdown", + "id": "ac126208-e64e-4658-b69a-83e9b387464b", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "This notebook demonstrates how to modify the metadata in GRIB fields.\n", + "\n", + "First we read some GRIB data containing pressure level fields." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "c11cc522-d388-4f39-a2d4-2f7b4c03517e", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "404db52ddd70458886774e0b6edad7bf", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "tuv_pl.grib: 0%| | 0.00/4.22k [00:00\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
0t2018-08-01 12:00:002018-08-01 12:00:000 days1000pressure0regular_ll
\n", + "" + ], + "text/plain": [ + " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", + "0 t 2018-08-01 12:00:00 2018-08-01 12:00:00 0 days \n", + "\n", + " vertical.level vertical.level_type ensemble.member geography.grid_type \n", + "0 1000 pressure 0 regular_ll " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "f = fl[0]\n", + "f.ls()" + ] + }, + { + "cell_type": "markdown", + "id": "d3f4f433-35f4-40b0-a100-d405205dd1bc", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Using set()" + ] + }, + { + "cell_type": "raw", + "id": "efd58137-2499-4929-86f0-211444c3a152", + "metadata": { + "editable": true, + "raw_mimetype": "text/restructuredtext", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "A field can be modified by using :py:meth:`~earthkit.data.core.field.Field.set`. It will create a new field with updated metadata." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "784a62cc-4534-44e4-af1a-5475ab82afdb", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
0u2018-08-01 12:00:002018-08-01 12:00:000 days500pressure0regular_ll
\n", + "
" + ], + "text/plain": [ + " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", + "0 u 2018-08-01 12:00:00 2018-08-01 12:00:00 0 days \n", + "\n", + " vertical.level vertical.level_type ensemble.member geography.grid_type \n", + "0 500 pressure 0 regular_ll " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "f1 = f.set({\"parameter.variable\": \"u\", \"parameter.units\": \"m/s\", \"vertical.level\": 500})\n", + "f1.ls()" + ] + }, + { + "cell_type": "raw", + "id": "296ef80e-510f-482c-bba3-99f353b5d4b5", + "metadata": { + "editable": true, + "raw_mimetype": "text/restructuredtext", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "Only the field component metadata keys can be used in :py:meth:`~earthkit.data.core.field.Field.set` and raw metadata keys are not allowed to use. E.g. since the field was created from GRIB data it has the raw (GRIB) metadata key ``metadata.shortName`` but we cannot set it. If you need to change the GRIB metadata see the \"Changing raw GRIB metadata\" section below." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "799fcb48-8860-442c-86dc-16054146ef19", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "t\n", + "'Key metadata.shortName cannot be set on the field.'\n" + ] + } + ], + "source": [ + "print(f.get(\"metadata.shortName\"))\n", + "try:\n", + " f.set({\"metadata.shortName\": \"u\"})\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "bbeceb32-7cde-462c-8af1-36c10302530c", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Setting time" + ] + }, + { + "cell_type": "markdown", + "id": "fb770237-20f6-453f-9fd6-6bcedea06a0d", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "Setting keys for the \"time\" field component allows using multiple formats. By default a \"datetime\" key takes a datatime.datetime object and a \"step\" key takes a datatime.timedelta object." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "77201dea-07e3-4e44-93d6-3220f249ea83", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
0t2000-12-18 18:00:002000-12-18 12:00:000 days 06:00:001000pressure0regular_ll
\n", + "
" + ], + "text/plain": [ + " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", + "0 t 2000-12-18 18:00:00 2000-12-18 12:00:00 0 days 06:00:00 \n", + "\n", + " vertical.level vertical.level_type ensemble.member geography.grid_type \n", + "0 1000 pressure 0 regular_ll " + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "f1 = f.set({\"time.base_datetime\": datetime.datetime(2000, 12, 18, 12), \"time.step\": datetime.timedelta(hours=6)})\n", + "f1.ls()" + ] + }, + { + "cell_type": "markdown", + "id": "e95e5812-d436-4d1f-9f5d-1c0e962cfa82", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "On top of that, we can also use many compatible formats, e.g:\n", + " - for datetime: ISO date strings, numpy datetime64 values, integers as yyyymmdd (the hour is assumed to be 0 in this case)\n", + " - for timedelta: integers (as hours), strings like \"6s\", \"6m\", \"6h\" (for seconds, minutes or hours)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "30a2c42b-56b9-42a4-ac22-f78d0bfe6c44", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
0t2000-12-18 18:00:002000-12-18 12:00:000 days 06:00:001000pressure0regular_ll
\n", + "
" + ], + "text/plain": [ + " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", + "0 t 2000-12-18 18:00:00 2000-12-18 12:00:00 0 days 06:00:00 \n", + "\n", + " vertical.level vertical.level_type ensemble.member geography.grid_type \n", + "0 1000 pressure 0 regular_ll " + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "f1 = f.set({\"time.base_datetime\": \"2000-12-18T12\", \"time.step\": 6})\n", + "f1.ls()" + ] + }, + { + "cell_type": "markdown", + "id": "82e0d38a-b766-46f3-b55f-68406dbb26d1", + "metadata": {}, + "source": [ + "Setting the step will automatically update the a valid time too." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "b384219d-0d52-4753-801e-39c6879754da", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
0t2018-08-01 12:00:102018-08-01 12:00:000 days 00:00:101000pressure0regular_ll
\n", + "
" + ], + "text/plain": [ + " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", + "0 t 2018-08-01 12:00:10 2018-08-01 12:00:00 0 days 00:00:10 \n", + "\n", + " vertical.level vertical.level_type ensemble.member geography.grid_type \n", + "0 1000 pressure 0 regular_ll " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "f1 = f.set({\"time.step\": \"10s\"})\n", + "f1.ls()" + ] + }, + { + "cell_type": "markdown", + "id": "cd3279a4-0a10-4ad7-abd1-864d1cabfe6c", + "metadata": {}, + "source": [ + "## Setting components" + ] + }, + { + "cell_type": "raw", + "id": "bcf1b39a-f7e3-4d62-8a52-92e8cc3aca8a", + "metadata": { + "editable": true, + "raw_mimetype": "text/restructuredtext", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "It is allowed to set whole individual field components with :py:meth:`~earthkit.data.core.field.Field.set`. The simplest way is to specify them as a dict. E.g. the following cell sets a new \"time\" component on the field." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "e05d2d32-6e0a-41b7-bc6f-889b01856884", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
0t2000-12-18 18:00:002000-12-18 12:00:000 days 06:00:001000pressure0regular_ll
\n", + "
" + ], + "text/plain": [ + " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", + "0 t 2000-12-18 18:00:00 2000-12-18 12:00:00 0 days 06:00:00 \n", + "\n", + " vertical.level vertical.level_type ensemble.member geography.grid_type \n", + "0 1000 pressure 0 regular_ll " + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "f1 = f.set(time={\"base_datetime\": \"2000-12-18T12\", \"step\": 6})\n", + "f1.ls()" + ] + }, + { + "cell_type": "markdown", + "id": "c43eb34e-a058-43bc-bd99-0b1b56ed0dd3", + "metadata": {}, + "source": [ + "If the dict is not fully specifying the component an exception is raised. E.g. \"step\" on it is own does not define a time component." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "758a4ee9-23a9-45ce-91d7-9af9d1c6a930", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Cannot create ForecastTime from keys: ['step'].\n" + ] + } + ], + "source": [ + "try:\n", + " f.set(time={\"step\": 6})\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "90ba2625-e203-4e09-a072-4dcdc438819b", + "metadata": {}, + "source": [ + "## Saving the modified field to disk" + ] + }, + { + "cell_type": "markdown", + "id": "4926dcde-b9ef-47b1-8c51-fb140e72a015", + "metadata": {}, + "source": [ + "We change the level and save the modified field into a GRIB file." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "f01193d1-cfbb-42fb-8f73-b11ab0b1737f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
0t2018-08-01 12:00:002018-08-01 12:00:000 days500pressure0regular_ll
\n", + "
" + ], + "text/plain": [ + " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", + "0 t 2018-08-01 12:00:00 2018-08-01 12:00:00 0 days \n", + "\n", + " vertical.level vertical.level_type ensemble.member geography.grid_type \n", + "0 500 pressure 0 regular_ll " + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "f1 = f.set({\"vertical.level\": 500})\n", + "f1.to_target(\"file\", \"_res_lev.grib\")\n", + "\n", + "# read back the data and compare the values in the first field\n", + "f1_w = ekd.from_source(\"file\", \"_res_lev.grib\").to_fieldlist()\n", + "f1_w.ls()" + ] + }, + { + "cell_type": "markdown", + "id": "e16dda68-bb85-4d0f-af9c-b814450a81c7", + "metadata": {}, + "source": [ + "## Modified fields and the associated GRIB message" + ] + }, + { + "cell_type": "raw", + "id": "56528846-a9e1-43a8-a299-e33b64aabd55", + "metadata": { + "editable": true, + "raw_mimetype": "text/restructuredtext", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "When a field was created from GRIB data the associated GRIB message can be accessed via the field with :func:`~earthkit.data.core.field.Field.message`." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "e403ad78-821d-4b14-8cae-4b2afef55093", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "b'GRIB\\x00\\x00\\x96\\x01\\x00\\x00'" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "f.message()[:10]" + ] + }, + { + "cell_type": "markdown", + "id": "d9aa4f53-01e1-46de-a0c1-ebb87970cf38", + "metadata": {}, + "source": [ + "Having modified the field metadata this GRIB message is not updated and we cannot access it any longer in the new field. The same is true for any raw GRIB metadata." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "8cddbcda-ea5c-4482-baa7-3535f94752a6", + "metadata": {}, + "outputs": [], + "source": [ + "f1 = f.set({\"vertical.level\": 500})\n", + "f1.message()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "8750f04b-b106-493b-96ad-2c02c413137d", + "metadata": {}, + "outputs": [], + "source": [ + "f1.get(\"metadata.shortName\")" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "5faa2075-b6ae-48a1-a07b-0b83055459e2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "'Key metadata.shortName not found in field'\n" + ] + } + ], + "source": [ + "try:\n", + " f1.metadata(\"shortName\")\n", + "except KeyError as e:\n", + " print(e)" + ] + }, + { + "cell_type": "raw", + "id": "1c32e5c7-fc47-491b-bf2b-476512791831", + "metadata": { + "editable": true, + "raw_mimetype": "text/restructuredtext", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "If we want to keep a valid associated GRIB message in the modified field we need to call :func:`~earthkit.data.core.field.Field.sync_raw_metadata`. This will create a new GRIB handle, update the relevant metadata in it and create a new field out of it." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "5a2a63ec-0d95-4b3e-ba13-039f09234c81", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['t', 500]" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "f1 = f1.sync_raw_metadata()\n", + "f1.get([\"metadata.shortName\", \"metadata.level\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "f17ffe64-45d4-4bdc-a174-26e8a4fa28d7", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['t', 500]" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "f1.metadata([\"shortName\", \"level\"])" + ] + }, + { + "cell_type": "raw", + "id": "af6311d3-74b0-4793-976e-d02a907f5113", + "metadata": { + "editable": true, + "raw_mimetype": "text/restructuredtext", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "Alternatively, if your workflow is strictly GRIB-bound you can carry out the filed modification via the :py:class:`~earthkit.data.encoders.grib.GribEncoder` as shown in the next chapter. " + ] + }, + { + "cell_type": "markdown", + "id": "39666d1c-7db3-4e97-9cfc-55b3b8d71f7d", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Changing raw GRIB metadata" + ] + }, + { + "cell_type": "raw", + "id": "89482515-0cf7-44ce-a267-c78e8bd0a6a6", + "metadata": { + "editable": true, + "raw_mimetype": "text/restructuredtext", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "Currently, changing the (raw) GRIB metadata in a field requires the usage of a :py:class:`~earthkit.data.encoders.grib.GribEncoder`. \n", + "When we call its :py:meth:`~earthkit.data.encoders.grib.GribEncoder.encode` method it will clone the underlying GRIB message, set the GRIB metadata on it and return an object that can be converted to a field." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "82e780fe-3539-4af9-ad4f-585739577dcc", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
0u2018-08-01 12:00:002018-08-01 12:00:000 days1000pressure0regular_ll
\n", + "
" + ], + "text/plain": [ + " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", + "0 u 2018-08-01 12:00:00 2018-08-01 12:00:00 0 days \n", + "\n", + " vertical.level vertical.level_type ensemble.member geography.grid_type \n", + "0 1000 pressure 0 regular_ll " + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "encoder = ekd.create_encoder(\"grib\")\n", + "r = encoder.encode(template=f, metadata={\"shortName\": \"u\"})\n", + "f1 = r.to_field()\n", + "f1.ls()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "60b11297-7499-445d-9b71-2d59709aed25", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "dev", + "language": "python", + "name": "dev" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.1" + } + }, + "nbformat": 4, + "nbformat_minor": 5 } diff --git a/docs/source/how-tos/grib/grib_modify_values.ipynb b/docs/source/how-tos/grib/grib_modify_values.ipynb index 5e444ae3f..a63b4f549 100644 --- a/docs/source/how-tos/grib/grib_modify_values.ipynb +++ b/docs/source/how-tos/grib/grib_modify_values.ipynb @@ -39,7 +39,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "3e2a5e8d5fec4ecf8ffa4b5319a2df21", + "model_id": "0a3b6df9fe9e40d8a214a4d43332f8fa", "version_major": 2, "version_minor": 0 }, diff --git a/docs/source/how-tos/target/grib_encoder.ipynb b/docs/source/how-tos/target/grib_encoder.ipynb index 9a092363d..6031fd346 100644 --- a/docs/source/how-tos/target/grib_encoder.ipynb +++ b/docs/source/how-tos/target/grib_encoder.ipynb @@ -60,7 +60,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "4fa9e25c0fba4cc6bdc438143e62e78b", + "model_id": "cf241aa38f354b9db40f7e486bf0edc2", "version_major": 2, "version_minor": 0 }, @@ -152,7 +152,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 3, @@ -196,7 +196,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 4, @@ -927,7 +927,7 @@ { "data": { "text/plain": [ - "(np.float64(4.270901324854242), np.float64(4.270981311798096))" + "(np.float64(4.244827050861318), np.float64(4.244751930236816))" ] }, "execution_count": 15, diff --git a/docs/source/how-tos/target/grib_to_file_pattern_target.ipynb b/docs/source/how-tos/target/grib_to_file_pattern_target.ipynb index df08a0c5b..3f698e6d5 100644 --- a/docs/source/how-tos/target/grib_to_file_pattern_target.ipynb +++ b/docs/source/how-tos/target/grib_to_file_pattern_target.ipynb @@ -29,7 +29,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "d1c0f177bba543729b8ec1b4a7ceaeff", + "model_id": "97df2cbbbd9a49148b26e5c67a9ada59", "version_major": 2, "version_minor": 0 }, diff --git a/docs/source/how-tos/target/grib_to_file_target.ipynb b/docs/source/how-tos/target/grib_to_file_target.ipynb index 8264b272b..809c69388 100644 --- a/docs/source/how-tos/target/grib_to_file_target.ipynb +++ b/docs/source/how-tos/target/grib_to_file_target.ipynb @@ -29,7 +29,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "37999b81f3cf4e33a49aea96cc3d2d5a", + "model_id": "258dac7b98e541a18ffc86acd4561001", "version_major": 2, "version_minor": 0 }, diff --git a/src/earthkit/data/core/field.py b/src/earthkit/data/core/field.py index 4260f6a60..ac0691368 100644 --- a/src/earthkit/data/core/field.py +++ b/src/earthkit/data/core/field.py @@ -1468,6 +1468,44 @@ def _set_values(self, array): data = self._components[_DATA].set_values(array) return Field.from_field(self, data=data) + def sync_raw_metadata(self): + """Return a field with the raw metadata in sync with the field's components. + + When a field is created from a GRIB message, the field stores this associated GRIB message/handle + and the raw metadata is extracted from it. When the field's components are modified using :meth:`set`, + the raw metadata is not automatically updated to reflect the changes in the components and will + become hidden to the user. This method can be used to create a new field with the raw metadata updated + to be consistent with the current state of the field's components. + + Returns + ------- + Field + A field with the raw metadata in sync with the field's components. If the field is not associated with + a GRIB message or if the raw metadata is already in sync, the original field is returned. + + Examples + -------- + >>> import earthkit.data as ekd + >>> fl = ekd.from_source("sample", "test.grib").to_fieldlist() + >>> f = fl[0] + >>> f1 = f.set({"parameter.variable": "msl", "parameter.units": "Pa"}) + >>> f1.get("metadata.shortName") + None + >>> f1.metadata("shortName") + KeyError: 'metadata.shortName' not found in field + >>> f2 = f1.sync_raw_metadata() + >>> f2.get("metadata.shortName") + 'msl' + >>> f2.metadata("shortName") + 'msl' + """ + if self._get_grib() and self._private and "_metadata" in self._private: + from earthkit.data.encoders.grib import GribEncoder + + encoder = GribEncoder() + return encoder.encode(data=self).to_field() + return self + def to_target(self, target, *args, **kwargs): r"""Write the field into a target object. diff --git a/src/earthkit/data/encoders/grib.py b/src/earthkit/data/encoders/grib.py index 03cdd761a..dfd2fcc83 100644 --- a/src/earthkit/data/encoders/grib.py +++ b/src/earthkit/data/encoders/grib.py @@ -78,10 +78,6 @@ def get(self, key, default=None): def to_field(self): """Convert the GRIB message to a Field object.""" - # from earthkit.data.readers.grib.memory import GribFieldInMemory - - # return GribFieldInMemory.from_buffer(self.to_bytes()) - from earthkit.data.field.grib.create import create_grib_field_from_message return create_grib_field_from_message(self.to_bytes()) @@ -165,11 +161,6 @@ def make(self, values=None, metadata=None, template=None): if handle is not None: self.update_metadata_from_template(metadata, template, handle) - # if handle is None and field is not None: - # handle = self.handle_from_field(field) - # if handle is not None: - # self.update_metadata_from_template(metadata, field, handle) - if handle is None: if values is None: raise ValueError("No values to encode") @@ -262,7 +253,7 @@ def handle_from_metadata(self, values, metadata, compulsory): def update_metadata_from_template(self, metadata, template, handle): return - + # TODO: review this code # the template can contain extra metadata that is not encoded in the handle if "bitsPerValue" in metadata: return @@ -276,34 +267,6 @@ def update_metadata_from_template(self, metadata, template, handle): else: bpv = template_md.get("bitsPerValue", default=None) - # if bpv is None: - - # if hasattr(template, "metadata"): - # template_md = template.metadata() - # from earthkit.data.core.metadata import WrappedMetadata - - # if isinstance(template_md, WrappedMetadata): - # for k in template_md.extra.keys(): - # if k != "bitsPerValue" and k not in metadata: - # metadata[k] = template_md.get(k) - - # if "bitsPerValue" not in metadata: - # bpv = template.metadata("bitsPerValue", default=None) - - # # Either the handle has valid bitsPerValue or has to be extracted - # # from the template and added to the metadata to be encoded - # if "bitsPerValue" not in metadata: - # if bpv is None: - # try: - # bpv = template.handle.get("bitsPerValue", None) - # except Exception: - # bpv = None - - # if bpv is not None and bpv > 0: - # bpv_h = handle.get("bitsPerValue", None) - # if bpv != bpv_h: - # metadata["bitsPerValue"] = bpv - def _ll_field(self, values, metadata): Nj, Ni = values.shape metadata["Nj"] = Nj @@ -504,7 +467,7 @@ def encode( -------- See the howto examples for more details and examples of encoding GRIB data with :class:`GribEncoder`. - - :ref:`howtos/target/grib_encoder.ipynb` + - :ref:`/how-tos/target/grib_encoder.ipynb` """ if template is None: template = self.template diff --git a/src/earthkit/data/targets/file_pattern.py b/src/earthkit/data/targets/file_pattern.py index 3b2d3c78c..c3a7d83c6 100644 --- a/src/earthkit/data/targets/file_pattern.py +++ b/src/earthkit/data/targets/file_pattern.py @@ -88,16 +88,9 @@ def _f(self, data): def _convert(v): if v is None: return "None" - # return str(v) return v - # print("self.split_output", self.split_output) - # for k in self.split_output: - # print("split", k.split(":")) - # print("data", data.get(k.split(":")[0])) - keys = [_convert(data.get(k.split(":")[0])) for k in self.split_output] - # print("keys", keys) path = self.filename.format(*keys) if path not in self._files: From c035d4e76a04d1f4b909de506944475a9eb38d8e Mon Sep 17 00:00:00 2001 From: Sandor Kertesz Date: Thu, 23 Apr 2026 17:03:56 +0100 Subject: [PATCH 6/8] Disable grib access in modified fields --- .../how-tos/grib/grib_modify_metadata.ipynb | 6 +- docs/source/release-notes/migration_1.0.0.rst | 10 +- src/earthkit/data/core/field.py | 9 +- src/earthkit/data/encoders/grib.py | 65 ++++++++++++- tests/grib/test_grib_set.py | 97 ++++++++++++------- 5 files changed, 142 insertions(+), 45 deletions(-) diff --git a/docs/source/how-tos/grib/grib_modify_metadata.ipynb b/docs/source/how-tos/grib/grib_modify_metadata.ipynb index ac900ea3c..4c3ae7a06 100644 --- a/docs/source/how-tos/grib/grib_modify_metadata.ipynb +++ b/docs/source/how-tos/grib/grib_modify_metadata.ipynb @@ -45,7 +45,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "404db52ddd70458886774e0b6edad7bf", + "model_id": "6bb5a5212f7742f6bd69894ec6a167b3", "version_major": 2, "version_minor": 0 }, @@ -915,7 +915,7 @@ "tags": [] }, "source": [ - "If we want to keep a valid associated GRIB message in the modified field we need to call :func:`~earthkit.data.core.field.Field.sync_raw_metadata`. This will create a new GRIB handle, update the relevant metadata in it and create a new field out of it." + "If we want to keep a valid associated GRIB message in the modified field we need to call :func:`~earthkit.data.core.field.Field.sync`. This will create a new GRIB handle, update the relevant metadata in it and create a new field out of it." ] }, { @@ -942,7 +942,7 @@ } ], "source": [ - "f1 = f1.sync_raw_metadata()\n", + "f1 = f1.sync()\n", "f1.get([\"metadata.shortName\", \"metadata.level\"])" ] }, diff --git a/docs/source/release-notes/migration_1.0.0.rst b/docs/source/release-notes/migration_1.0.0.rst index c9d5da2b4..c5110172c 100644 --- a/docs/source/release-notes/migration_1.0.0.rst +++ b/docs/source/release-notes/migration_1.0.0.rst @@ -129,6 +129,13 @@ Raw metadata keys are still available but they are only accessible either by usi f.metadata("shortName") f.metadata("metadata.shortName") +Field modification +++++++++++++++++++++++++ + +Fields can be modified using the :py:meth:`~earthkit.data.core.field.Field.set` method. This method allows to set new data values and/or change metadata keys. See the notebook examples: + +- :ref:`/how-tos/grib/grib_modify_metadata.ipynb` +- :ref:`/how-tos/grib/grib_modify_values.ipynb` Field arithmetic ++++++++++++++++++++++++ @@ -319,8 +326,9 @@ The following table gives an overview of the changes in the Fieldlist API: Xarray engine ------------------ -The Xarray engine has been refactored and many of the internal classes and methods have been changed. The following table gives an overview of the changes in the Xarray engine: +The Xarray engine has been refactored and many of the internal classes and methods have been changed. The following list gives an overview of the changes in the Xarray engine: - a new default profile :ref:`earthkit ` has been added which is used when no profile is specified. This profile is designed to work with the new format independent metadata keys from :py:class:`~earthkit.data.core.field.Field` to generate the Xarray dataset. - the old :ref:`mars ` and :ref:`grib ` profiles were kept but they are now using some of the new format independent metadata keys to generate the Xarray dataset. - the "number" ``dim_role`` was renamed to "member" in line with the new format independent metadata keys. See: :ref:`xr_dim` for more details. +- the ``time_dim_mode`` kwarg in :func:`to_xarray` was replaced by ``time_dims`` and the meaning of some temporal dimensions in the ``dim_roles`` also changed. See :ref:`xr_time_dims` for more details. diff --git a/src/earthkit/data/core/field.py b/src/earthkit/data/core/field.py index ac0691368..092b7b7e0 100644 --- a/src/earthkit/data/core/field.py +++ b/src/earthkit/data/core/field.py @@ -1468,7 +1468,7 @@ def _set_values(self, array): data = self._components[_DATA].set_values(array) return Field.from_field(self, data=data) - def sync_raw_metadata(self): + def sync(self): """Return a field with the raw metadata in sync with the field's components. When a field is created from a GRIB message, the field stores this associated GRIB message/handle @@ -1493,7 +1493,7 @@ def sync_raw_metadata(self): None >>> f1.metadata("shortName") KeyError: 'metadata.shortName' not found in field - >>> f2 = f1.sync_raw_metadata() + >>> f2 = f1.sync() >>> f2.get("metadata.shortName") 'msl' >>> f2.metadata("shortName") @@ -1503,7 +1503,10 @@ def sync_raw_metadata(self): from earthkit.data.encoders.grib import GribEncoder encoder = GribEncoder() - return encoder.encode(data=self).to_field() + f = encoder.encode(data=self).to_field() + if self.labels: + f = f.set(labels=self.labels) + return f return self def to_target(self, target, *args, **kwargs): diff --git a/src/earthkit/data/encoders/grib.py b/src/earthkit/data/encoders/grib.py index dfd2fcc83..f3e037380 100644 --- a/src/earthkit/data/encoders/grib.py +++ b/src/earthkit/data/encoders/grib.py @@ -369,9 +369,68 @@ def _gg_field(self, values, metadata): class GribEncoder(Encoder): - """Encode GRIB data.""" - - def __init__(self, **kwargs): + """Encoder for GRIB format. + + This class is used to encode data to GRIB format via the :meth:`encode` method. + + Parameters + ---------- + template: Field, GribCodesHandle, bytes, str, int, None + A preset template to use for encoding when :meth:`encode` is called without a template. + It can be a :py:class:`~earthkit.data.core.field.Field`, + a :py:class:`~earthkit.data.reader.grib.GribCodesHandle`, a GRIB message as + bytes, an ecCodes GRIB sample name as string, a raw ecCodes handle as an integer, or None. + See :meth:`encode` for more details on how the template is used. + metadata: dict + A preset metadata to encode. The keys must be ecCodes GRIB keys, optionally prefixed with "metadata.". + This metadata is used as default when :meth:`encode` is called without metadata. If metadata is provided + in the :meth:`encode` method, it is merged with this preset metadata, with the metadata provided + in the :meth:`encode` method taking precedence. + kwargs: dict + Additional keyword arguments interpreted as metadata to encode. The keys must be ecCodes GRIB keys, + optionally prefixed with "metadata.". + + Examples + -------- + See the howto examples for more details and examples of encoding GRIB data with :class:`GribEncoder`. + + - :ref:`/how-tos/target/grib_encoder.ipynb` + - :ref:`/how-tos/grib/grib_modify_metadata.ipynb` + - :ref:`/how-tos/grib/grib_modify_values.ipynb` + + Using with a preset template and metadata: + + >>> import earthkit.data as ekd + >>> template = ekd.from_source("sample", "test.grid").to_fieldlist()[0] + >>> template.get("metadata.shortName") + '2t' + >>> encoder = GribEncoder(template=template, metadata={"shortName": "msl"}) + >>> d = encoder.encode(values=template.values + 1.0, step=6) + >>> f = d.to_field() + >>> f.get("parameter.variable") + 'msl' + >>> f.get("parameter.shortName") + 'msl' + >>> f.get("parameter.units") + 'hPa' + >>> f.get("step") + 6 + + Using without preset template and metadata: + >>> encoder = GribEncoder() + >>> d = encoder.encode(values=template.values + 1.0, metadata={"shortName": "msl"}, step=6) + >>> f = d.to_field() + >>> f.get("parameter.variable") + 'msl' + >>> f.get("parameter.shortName") + 'msl' + >>> f.get("parameter.units") + 'hPa' + >>> f.get("step") + 6 + """ + + def __init__(self, template=None, metadata=None, **kwargs): super().__init__(**kwargs) self._bbox = {} # the template is stored as a handle to be used as a basis for encoding, diff --git a/tests/grib/test_grib_set.py b/tests/grib/test_grib_set.py index 092cc225d..ce373a190 100644 --- a/tests/grib/test_grib_set.py +++ b/tests/grib/test_grib_set.py @@ -20,20 +20,9 @@ @pytest.mark.parametrize("fl_type", ["file", "array", "memory"]) # @pytest.mark.parametrize("fl_type", ["file"]) -def test_grib_set_detailed(fl_type): +def test_grib_set_field_detailed_1(fl_type): ds_ori, _ = load_grib_data("test4.grib", fl_type) - # --------------- - # field - # --------------- - - # f = ds_ori[0].clone( - # param="q", - # levelist=_func1, - # mars_area=_func2, - # name=_func3, - # ) - f = ds_ori[0].set({ "parameter.variable": "q", "vertical.level": 600, @@ -50,20 +39,15 @@ def test_grib_set_detailed(fl_type): assert f.get("labels.my_shape") == (181, 360) assert f.get("labels.my_name") == "t_500" - # TODO: apply wrapped metadata to namespaces - # assert f.get(namespace="mars") == { - # "class": "ea", - # "date": 20070101, - # "domain": "g", - # "expver": "0001", - # "levelist": 500, - # "levtype": "pl", - # "param": "t", - # "step": 0, - # "stream": "oper", - # "time": 1200, - # "type": "an", - # } + f1 = f.sync() + assert f1.get("parameter.variable") == "q" + assert f1.get("metadata.shortName") == "q" + assert f1.get("vertical.level") == 600 + assert f1.get("metadata.levelist") == 600 + assert f1.get(("metadata.date", "parameter.variable")) == (20070101, "q") + assert f1.get(("parameter.variable", "metadata.date")) == ("q", 20070101) + assert f1.get("labels.my_shape") == (181, 360) + assert f1.get("labels.my_name") == "t_500" # write back to grib with temp_file() as tmp: @@ -83,9 +67,11 @@ def test_grib_set_detailed(fl_type): assert f_saved.get("vertical.level_type") == "pressure" assert f_saved.get("metadata.typeOfLevel") == "isobaricInhPa" - # --------------------- - # field - repeated use - # --------------------- + +@pytest.mark.parametrize("fl_type", ["file", "array", "memory"]) +# @pytest.mark.parametrize("fl_type", ["file"]) +def test_grib_set_field_detailed_2(fl_type): + ds_ori, _ = load_grib_data("test4.grib", fl_type) f = ds_ori[0].set({ "parameter.variable": "q", @@ -104,16 +90,27 @@ def test_grib_set_detailed(fl_type): assert f.get("vertical.level") == 800 assert f.get("metadata.level") is None assert f.get("metadata.levelist") is None - # TODO: this should be 800 - # assert f.metadata("levelist") == 700 assert f.get(("metadata.date", "parameter.variable")) == (None, "pt") assert f.get(("parameter.variable", "metadata.date")) == ("pt", None) - # assert np.allclose(np.array(f.metadata("mars_area")), np.array([90.0, 0.0, -90.0, 359.0])) assert f.get("labels.my_name") == "t_500" - # --------------- - # fieldlist - # --------------- + f1 = f.sync() + assert f1.get("parameter.variable") == "pt" + assert f1.get("metadata.shortName") == "pt" + assert f1.get("vertical.level") == 800 + assert f1.get("metadata.level") == 800 + assert f1.get("metadata.levelist") == 800 + assert f1.get("metadata.typeOfLevel") == "isobaricInhPa" + assert f1.get("vertical.level_type") == "pressure" + assert f1.get(("metadata.date", "parameter.variable")) == (20070101, "pt") + assert f1.get(("parameter.variable", "metadata.date")) == ("pt", 20070101) + assert f1.get("labels.my_name") == "t_500" + + +@pytest.mark.parametrize("fl_type", ["file", "array", "memory"]) +# @pytest.mark.parametrize("fl_type", ["file"]) +def test_grib_set_fieldlist_detailed(fl_type): + ds_ori, _ = load_grib_data("test4.grib", fl_type) fields = [] for i in range(2): @@ -419,3 +416,33 @@ def test_grib_set_no_args(fl_type): f = ds[0] r = f.set() assert r is f + + +@pytest.mark.parametrize("fl_type", ["file"]) +def test_grib_set_field_sync(fl_type): + ds, _ = load_grib_data("test4.grib", fl_type) + + f = ds[0] + f = f.set({ + "parameter.variable": "q", + "vertical.level": 600, + "labels.my_shape": (181, 360), + "labels.my_name": "t_500", + }) + + assert f.get("parameter.variable") == "q" + assert f.get("metadata.shortName") is None + assert f.get("vertical.level") == 600 + assert f.get("metadata.levelist") is None + assert f.get(("metadata.date", "parameter.variable")) == (None, "q") + assert f.get(("parameter.variable", "metadata.date")) == ("q", None) + + f1 = f.sync() + assert f1.get("parameter.variable") == "q" + assert f1.get("metadata.shortName") == "q" + assert f1.get("vertical.level") == 600 + assert f1.get("metadata.levelist") == 600 + assert f1.get(("metadata.date", "parameter.variable")) == (20070101, "q") + assert f1.get(("parameter.variable", "metadata.date")) == ("q", 20070101) + assert f1.get("labels.my_shape") == (181, 360) + assert f1.get("labels.my_name") == "t_500" From 720021627307e033fbb306db1caa972c17e5755d Mon Sep 17 00:00:00 2001 From: Sandor Kertesz Date: Thu, 23 Apr 2026 20:43:27 +0100 Subject: [PATCH 7/8] Disable grib access in modified fields --- src/earthkit/data/encoders/grib.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/earthkit/data/encoders/grib.py b/src/earthkit/data/encoders/grib.py index f3e037380..2701fd1fc 100644 --- a/src/earthkit/data/encoders/grib.py +++ b/src/earthkit/data/encoders/grib.py @@ -431,7 +431,7 @@ class GribEncoder(Encoder): """ def __init__(self, template=None, metadata=None, **kwargs): - super().__init__(**kwargs) + super().__init__(template=template, metadata=metadata, **kwargs) self._bbox = {} # the template is stored as a handle to be used as a basis for encoding, # (when available) From 216b864856eb02ff9bafbd4c9610c70fb904935e Mon Sep 17 00:00:00 2001 From: Sandor Kertesz Date: Thu, 23 Apr 2026 21:29:08 +0100 Subject: [PATCH 8/8] Disable grib access in modified fields --- src/earthkit/data/core/field.py | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/src/earthkit/data/core/field.py b/src/earthkit/data/core/field.py index 092b7b7e0..9369b6dcd 100644 --- a/src/earthkit/data/core/field.py +++ b/src/earthkit/data/core/field.py @@ -1367,10 +1367,12 @@ def set(self, *args, **kwargs): Notes ----- - When the field was created from a GRIB message, calling :meth:`set` does not modify the original - GRIB message and the new field is not linked to a GRIB message. In the new field - the GRIB message/handle will not be available and the GRIB specific keys in the raw metadata will not be - accessible. + When the field is created from a GRIB message, calling :meth:`set` copies the associated + GRIB message into the new field without any modifications. Since it is now out of sync with the + new field's components, the new field will not provide access to any GRIB metadata + neither via :meth:`get` nor via :meth:`metadata`. Additionally, when calling + :meth:`message` on the new field, None is returned. (Use :meth:`sync` to synchronize the + associated GRIB message to the new field and expose the GRIB metadata keys again). >>> import earthkit.data as ekd >>> fl = ekd.from_source("sample", "test.grib").to_fieldlist() @@ -1385,8 +1387,8 @@ def set(self, *args, **kwargs): However, if only the labels or the values are set (the latter via the "data" or "values" keys), the new field returned by :meth:`set` is still linked to the original GRIB message and the GRIB specific keys - in the raw metadata are still accessible. If the values were modified, when calling :meth:`message` on the - new field, the original GRIB message updated with the modified data values is returned. + in the raw metadata are still accessible. If the values were modified, :meth:`message` will return + the original GRIB message updated with the modified data values. >>> import earthkit.data as ekd >>> fl = ekd.from_source("sample", "test.grib").to_fieldlist() @@ -1471,11 +1473,14 @@ def _set_values(self, array): def sync(self): """Return a field with the raw metadata in sync with the field's components. - When a field is created from a GRIB message, the field stores this associated GRIB message/handle - and the raw metadata is extracted from it. When the field's components are modified using :meth:`set`, - the raw metadata is not automatically updated to reflect the changes in the components and will - become hidden to the user. This method can be used to create a new field with the raw metadata updated - to be consistent with the current state of the field's components. + When a field is created from a GRIB message, it stores this associated GRIB message/handle + and the raw GRIB metadata is extracted from it e.g. when calling :meth:`get`. When the field's + components are modified using :meth:`set`, the GRIB message is copied into the new field but not + modified. Since it is now out of sync with the new field's components, the new field will + not provide access to any GRIB metadata either via :meth:`get` or via :meth:`metadata`. When + :meth:`sync` is called on such a field the GRIB message is re-encoded using the field's components + and the raw GRIB metadata will become available again and in sync with the field's components. + Returns -------