From b736d6f452171ac7518bc0291f5a123c9f6bc5de Mon Sep 17 00:00:00 2001 From: Sandor Kertesz Date: Thu, 16 Apr 2026 18:19:53 +0100 Subject: [PATCH] Improve field setting code and docs --- .gitignore | 2 +- .../how-tos/grib/grib_modify_metadata.ipynb | 927 +++++++++++ docs/source/how-tos/grib/index.rst | 1 + docs/source/how-tos/target/file_target.ipynb | 268 ++++ docs/source/how-tos/target/grib_encoder.ipynb | 1371 +++++++++++++++++ .../how-tos/target/grib_to_fdb_target.ipynb | 486 ++++++ .../target/grib_to_file_pattern_target.ipynb | 428 +++++ .../how-tos/target/grib_to_file_target.ipynb | 461 ++++++ .../how-tos/target/grib_to_geotiff.ipynb | 302 ++++ .../how-tos/target/grib_to_zarr_target.ipynb | 786 ++++++++++ docs/source/how-tos/target/index.rst | 15 + .../release-notes/version_1.0.0rc_updates.rst | 6 + src/earthkit/data/core/field.py | 65 +- src/earthkit/data/core/fieldlist.py | 2 - src/earthkit/data/field/component/time.py | 6 +- src/earthkit/data/field/grib/time.py | 8 +- src/earthkit/data/indexing/indexed.py | 2 - 17 files changed, 5121 insertions(+), 15 deletions(-) create mode 100644 docs/source/how-tos/grib/grib_modify_metadata.ipynb create mode 100644 docs/source/how-tos/target/file_target.ipynb create mode 100644 docs/source/how-tos/target/grib_encoder.ipynb create mode 100644 docs/source/how-tos/target/grib_to_fdb_target.ipynb create mode 100644 docs/source/how-tos/target/grib_to_file_pattern_target.ipynb create mode 100644 docs/source/how-tos/target/grib_to_file_target.ipynb create mode 100644 docs/source/how-tos/target/grib_to_geotiff.ipynb create mode 100644 docs/source/how-tos/target/grib_to_zarr_target.ipynb create mode 100644 docs/source/how-tos/target/index.rst diff --git a/.gitignore b/.gitignore index 291486491..b9b874860 100644 --- a/.gitignore +++ b/.gitignore @@ -225,7 +225,7 @@ autodocs/ # PyBuilder .pybuilder/ -target/ + # Jupyter Notebook diff --git a/docs/source/how-tos/grib/grib_modify_metadata.ipynb b/docs/source/how-tos/grib/grib_modify_metadata.ipynb new file mode 100644 index 000000000..a6483e97d --- /dev/null +++ b/docs/source/how-tos/grib/grib_modify_metadata.ipynb @@ -0,0 +1,927 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "5d21c14f-b851-4437-a1f1-1daac37a9e49", + "metadata": {}, + "source": [ + "# GRIB: modifying metadata" + ] + }, + { + "cell_type": "markdown", + "id": "ac126208-e64e-4658-b69a-83e9b387464b", + "metadata": {}, + "source": [ + "This notebook demonstrates how to modify the metadata in GRIB fields.\n", + "\n", + "First we read some GRIB data containing pressure level fields." + ] + }, + { + "cell_type": "markdown", + "id": "0edd73c7-9358-47cd-96b4-cf1e0d1cb720", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "c11cc522-d388-4f39-a2d4-2f7b4c03517e", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "01b91013e30e499da1ed373f75523982", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "tuv_pl.grib: 0%| | 0.00/4.22k [00:00\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
0t2018-08-01 12:00:002018-08-01 12:00:000 days1000pressure0regular_ll
\n", + "" + ], + "text/plain": [ + " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", + "0 t 2018-08-01 12:00:00 2018-08-01 12:00:00 0 days \n", + "\n", + " vertical.level vertical.level_type ensemble.member geography.grid_type \n", + "0 1000 pressure 0 regular_ll " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "f = fl[0]\n", + "f.ls()" + ] + }, + { + "cell_type": "markdown", + "id": "d3f4f433-35f4-40b0-a100-d405205dd1bc", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Using set()" + ] + }, + { + "cell_type": "raw", + "id": "efd58137-2499-4929-86f0-211444c3a152", + "metadata": { + "editable": true, + "raw_mimetype": "text/restructuredtext", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "A field can be modified by using :py:meth:`~earthkit.data.core.field.Field.set`. It will create a new field with updated metadata." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "784a62cc-4534-44e4-af1a-5475ab82afdb", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
0u2018-08-01 12:00:002018-08-01 12:00:000 days500pressure0regular_ll
\n", + "
" + ], + "text/plain": [ + " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", + "0 u 2018-08-01 12:00:00 2018-08-01 12:00:00 0 days \n", + "\n", + " vertical.level vertical.level_type ensemble.member geography.grid_type \n", + "0 500 pressure 0 regular_ll " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "f1 = f.set({\"parameter.variable\": \"u\", \"parameter.units\": \"m/s\", \"vertical.level\": 500})\n", + "f1.ls()" + ] + }, + { + "cell_type": "raw", + "id": "296ef80e-510f-482c-bba3-99f353b5d4b5", + "metadata": { + "editable": true, + "raw_mimetype": "text/restructuredtext", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "Only the field component metadata keys can be used in :py:meth:`~earthkit.data.core.field.Field.set` and raw metadata keys are not allowed to use. E.g. the field was created from GRIB data so it has the raw metadata key ``metadata.shortName`` but we cannot set it. If you need to change the GRIB metadata see the \"Changing raw GRIB metadata\" section below." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "799fcb48-8860-442c-86dc-16054146ef19", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "t\n", + "'Key metadata.shortName cannot be set on the field.'\n" + ] + } + ], + "source": [ + "print(f.get(\"metadata.shortName\"))\n", + "try:\n", + " f.set({\"metadata.shortName\": \"u\"})\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "bbeceb32-7cde-462c-8af1-36c10302530c", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Setting time" + ] + }, + { + "cell_type": "markdown", + "id": "fb770237-20f6-453f-9fd6-6bcedea06a0d", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "Setting keys for the time field component allows using multiple formats. By default a \"datetime\" key takes a datatime.datetime object and a \"step\" key takes a datatime.timedelta object." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "77201dea-07e3-4e44-93d6-3220f249ea83", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
0t2000-12-18 18:00:002000-12-18 12:00:000 days 06:00:001000pressure0regular_ll
\n", + "
" + ], + "text/plain": [ + " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", + "0 t 2000-12-18 18:00:00 2000-12-18 12:00:00 0 days 06:00:00 \n", + "\n", + " vertical.level vertical.level_type ensemble.member geography.grid_type \n", + "0 1000 pressure 0 regular_ll " + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "f1 = f.set({\"time.base_datetime\": datetime.datetime(2000, 12, 18, 12), \"time.step\": datetime.timedelta(hours=6)})\n", + "f1.ls()" + ] + }, + { + "cell_type": "markdown", + "id": "e95e5812-d436-4d1f-9f5d-1c0e962cfa82", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "On top of that, we can also use many compatible formats, e.g:\n", + " - for datetime: ISO date strings, numpy datetime64 values, integers as yyyymmdd (the hour is assumed to be 0 in this case)\n", + " - for timedelta: integers (as hours), strings like \"6s\", \"6m\", \"6h\" (for seconds, minutes or hours)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "30a2c42b-56b9-42a4-ac22-f78d0bfe6c44", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
0t2000-12-18 18:00:002000-12-18 12:00:000 days 06:00:001000pressure0regular_ll
\n", + "
" + ], + "text/plain": [ + " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", + "0 t 2000-12-18 18:00:00 2000-12-18 12:00:00 0 days 06:00:00 \n", + "\n", + " vertical.level vertical.level_type ensemble.member geography.grid_type \n", + "0 1000 pressure 0 regular_ll " + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "f1 = f.set({\"time.base_datetime\": \"2000-12-18T12\", \"time.step\": 6})\n", + "f1.ls()" + ] + }, + { + "cell_type": "markdown", + "id": "82e0d38a-b766-46f3-b55f-68406dbb26d1", + "metadata": {}, + "source": [ + "Setting the step will automatically update the a valid time too." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "b384219d-0d52-4753-801e-39c6879754da", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
0t2018-08-01 12:00:102018-08-01 12:00:000 days 00:00:101000pressure0regular_ll
\n", + "
" + ], + "text/plain": [ + " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", + "0 t 2018-08-01 12:00:10 2018-08-01 12:00:00 0 days 00:00:10 \n", + "\n", + " vertical.level vertical.level_type ensemble.member geography.grid_type \n", + "0 1000 pressure 0 regular_ll " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "f1 = f.set({\"time.step\": \"10s\"})\n", + "f1.ls()" + ] + }, + { + "cell_type": "markdown", + "id": "cd3279a4-0a10-4ad7-abd1-864d1cabfe6c", + "metadata": {}, + "source": [ + "## Setting components" + ] + }, + { + "cell_type": "raw", + "id": "bcf1b39a-f7e3-4d62-8a52-92e8cc3aca8a", + "metadata": { + "editable": true, + "raw_mimetype": "text/restructuredtext", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "It is allowed to set individual field components with :py:meth:`~earthkit.data.core.field.Field.set`. The simplest way is to specify them as a dict. E.g. the following cell sets a new \"time\" component on the field." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "e05d2d32-6e0a-41b7-bc6f-889b01856884", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
0t2000-12-18 18:00:002000-12-18 12:00:000 days 06:00:001000pressure0regular_ll
\n", + "
" + ], + "text/plain": [ + " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", + "0 t 2000-12-18 18:00:00 2000-12-18 12:00:00 0 days 06:00:00 \n", + "\n", + " vertical.level vertical.level_type ensemble.member geography.grid_type \n", + "0 1000 pressure 0 regular_ll " + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "f1 = f.set(time={\"base_datetime\": \"2000-12-18T12\", \"step\": 6})\n", + "f1.ls()" + ] + }, + { + "cell_type": "markdown", + "id": "c43eb34e-a058-43bc-bd99-0b1b56ed0dd3", + "metadata": {}, + "source": [ + "If the dict is not fully specifying the component an exception is raised. E.g. \"step\" on it is own does not define a time component." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "758a4ee9-23a9-45ce-91d7-9af9d1c6a930", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Cannot create ForecastTime from keys: ['step'].\n" + ] + } + ], + "source": [ + "try:\n", + " f.set(time={\"step\": 6})\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "90ba2625-e203-4e09-a072-4dcdc438819b", + "metadata": {}, + "source": [ + "## Saving the modified field to disk" + ] + }, + { + "cell_type": "markdown", + "id": "4926dcde-b9ef-47b1-8c51-fb140e72a015", + "metadata": {}, + "source": [ + "We change the level and save the modified field into a GRIB file." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "f01193d1-cfbb-42fb-8f73-b11ab0b1737f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
0t2018-08-01 12:00:002018-08-01 12:00:000 days500pressure0regular_ll
\n", + "
" + ], + "text/plain": [ + " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", + "0 t 2018-08-01 12:00:00 2018-08-01 12:00:00 0 days \n", + "\n", + " vertical.level vertical.level_type ensemble.member geography.grid_type \n", + "0 500 pressure 0 regular_ll " + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "f1 = f.set({\"vertical.level\": 500})\n", + "f1.to_target(\"file\", \"_res_lev.grib\")\n", + "\n", + "# read back the data and compare the values in the first field\n", + "f1_w = ekd.from_source(\"file\", \"_res_lev.grib\").to_fieldlist()\n", + "f1_w.ls()" + ] + }, + { + "cell_type": "markdown", + "id": "39666d1c-7db3-4e97-9cfc-55b3b8d71f7d", + "metadata": {}, + "source": [ + "## Changing raw GRIB metadata" + ] + }, + { + "cell_type": "raw", + "id": "89482515-0cf7-44ce-a267-c78e8bd0a6a6", + "metadata": { + "editable": true, + "raw_mimetype": "text/restructuredtext", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "Currently, changing the (raw) GRIB metadata in a field requires the usage of a :py:class:`~earthkit.data.encoders.grib.GribEncoder`. \n", + "When we call its :py:meth:`~earthkit.data.encoders.grib.GribEncoder.encode` method it will clone the underlying GRIB message, set the GRIB metadata on it and return an object that can be converted to a field." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "82e780fe-3539-4af9-ad4f-585739577dcc", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
0u2018-08-01 12:00:002018-08-01 12:00:000 days1000pressure0regular_ll
\n", + "
" + ], + "text/plain": [ + " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", + "0 u 2018-08-01 12:00:00 2018-08-01 12:00:00 0 days \n", + "\n", + " vertical.level vertical.level_type ensemble.member geography.grid_type \n", + "0 1000 pressure 0 regular_ll " + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "encoder = ekd.create_encoder(\"grib\")\n", + "r = encoder.encode(template=f, metadata={\"shortName\": \"u\"})\n", + "f1 = r.to_field()\n", + "f1.ls()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e16dda68-bb85-4d0f-af9c-b814450a81c7", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "dev", + "language": "python", + "name": "dev" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.1" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/source/how-tos/grib/index.rst b/docs/source/how-tos/grib/index.rst index 14e63f289..3021005d0 100644 --- a/docs/source/how-tos/grib/index.rst +++ b/docs/source/how-tos/grib/index.rst @@ -14,6 +14,7 @@ GRIB grib_order_by.ipynb grib_indexing.ipynb grib_modify_values.ipynb + grib_modify_metadata.ipynb grib_missing.ipynb grib_array_namespace.ipynb grib_nearest_gridpoint.ipynb diff --git a/docs/source/how-tos/target/file_target.ipynb b/docs/source/how-tos/target/file_target.ipynb new file mode 100644 index 000000000..35729d1aa --- /dev/null +++ b/docs/source/how-tos/target/file_target.ipynb @@ -0,0 +1,268 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "b2c9066b-db07-4ebc-aebe-41691abad1c0", + "metadata": {}, + "source": [ + "# Writing to a file target" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "c32a1f28-fd92-42df-ba34-5e5d410fa4ce", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "1c80d16ac5244f1d8e5ce79f5b010743", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "test.grib: 0%| | 0.00/1.03k [00:00` is automatically guessed from the input data. Alternatively, we can create a target object with :func:`create_target` and directly write to it." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "216afc9e-c6b3-48c2-a62a-9aca346447df", + "metadata": {}, + "outputs": [], + "source": [ + "# calling to_target\n", + "ds.to_target(\"file\", \"_res_t_file_handler.grib\")\n", + "\n", + "# using write on the target object\n", + "with ekd.create_target(\"file\", \"_res_t_file_handler.grib\") as t:\n", + " t.write(ds)" + ] + }, + { + "cell_type": "markdown", + "id": "253b32b5-d6cd-41f7-96d4-5933931aec3a", + "metadata": {}, + "source": [ + "#### Using a file-like object" + ] + }, + { + "cell_type": "markdown", + "id": "41473048-e8a1-42b2-a39a-9cc7cf46617e", + "metadata": {}, + "source": [ + "A file-like object passed to the target is not closed, even when the target is closed or created with a context manager." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "90375bdd-2f72-40ad-a94c-4ab635acb54d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "False\n" + ] + } + ], + "source": [ + "fp = open(\"_res_t_file_handler.grib\", \"wb\")\n", + "ds.to_target(\"file\", fp)\n", + "\n", + "# the file object is still open\n", + "print(fp.closed)\n", + "\n", + "# we need to close it manually\n", + "fp.close()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "348834e3-0197-4c79-ba27-738855c4d2eb", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "False\n" + ] + } + ], + "source": [ + "fp = open(\"_res_t_file_handler.grib\", \"wb\")\n", + "\n", + "# the context manager will call close() on the target\n", + "with ekd.create_target(\"file\", fp) as t:\n", + " t.write(ds)\n", + "\n", + "# the file object is still open\n", + "print(fp.closed)\n", + "\n", + "# we need to close it manually\n", + "fp.close()" + ] + }, + { + "cell_type": "markdown", + "id": "278183b2-f502-4b24-9975-f4e704fc14a9", + "metadata": {}, + "source": [ + "The simplest solution to this problem is to use a context manager for the file-like object. " + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "63a34551-35d8-4262-8759-3e69fca05bc7", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "with open(\"_res_t_file_handler.grib\", \"wb\") as fp:\n", + " ds.to_target(\"file\", fp)" + ] + }, + { + "cell_type": "markdown", + "id": "233e46d1-c052-405c-bbf8-9bad9f512497", + "metadata": {}, + "source": [ + "#### Appending to a file" + ] + }, + { + "cell_type": "markdown", + "id": "92fad9cb-f9f4-4e79-9357-b3d888504890", + "metadata": {}, + "source": [ + "When using a file path we can use the ``append=True`` option to append to the output." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "afe1de08-866a-4943-9918-4755f1e5ce68", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# calling to_target\n", + "out_file = \"_res_t1_file_handler.grib\"\n", + "if os.path.isfile(out_file):\n", + " os.remove(out_file)\n", + "\n", + "ds[0].to_target(\"file\", out_file, append=True)\n", + "len(ekd.from_source(\"file\", out_file).to_fieldlist())" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "96583f35-48bd-4bc9-a4e6-811758d5ea06", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds[1].to_target(\"file\", out_file, append=True)\n", + "len(ekd.from_source(\"file\", out_file).to_fieldlist())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3709c465-551b-4369-9b03-61ea5f50e56f", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "dev", + "language": "python", + "name": "dev" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.1" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/source/how-tos/target/grib_encoder.ipynb b/docs/source/how-tos/target/grib_encoder.ipynb new file mode 100644 index 000000000..590d8e460 --- /dev/null +++ b/docs/source/how-tos/target/grib_encoder.ipynb @@ -0,0 +1,1371 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "e5dd8e8d-437e-4ef4-9cf5-ef2a4cd09e81", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "# GRIB encoder" + ] + }, + { + "cell_type": "raw", + "id": "b7ff9549-beae-4ef9-8076-a7a73263d653", + "metadata": { + "editable": true, + "raw_mimetype": "text/restructuredtext", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "An :ref:`encoder ` is used to generate data in a suitable format that can be written/added to a :ref:`target `. Encoders are typically used implicitly via :func:`to_target` but we can also instantiate an object and work with it directly." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "9d9760b5-fb39-4e0f-9f3f-e6bf2c15e1a2", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "import earthkit.data as ekd" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "20bc3e5a-0e84-419f-8d3c-1887b2cd2cac", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "a2c2ac767de74fdb8f218c7243f31b2c", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "test.grib: 0%| | 0.00/1.03k [00:00\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
02t2020-05-13 12:00:002020-05-13 12:00:000 days0surface0regular_ll
\n", + "" + ], + "text/plain": [ + " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", + "0 2t 2020-05-13 12:00:00 2020-05-13 12:00:00 0 days \n", + "\n", + " vertical.level vertical.level_type ensemble.member geography.grid_type \n", + "0 0 surface 0 regular_ll " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# get some input GRIB data\n", + "ds = ekd.from_source(\"sample\", \"test.grib\").to_fieldlist()\n", + "ds[0].ls()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "a375d5a3-da37-40ab-a851-92c40e3f4b32", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# create a GribEncoder\n", + "encoder = ekd.create_encoder(\"grib\")\n", + "encoder" + ] + }, + { + "cell_type": "raw", + "id": "9815a232-4848-4950-a9ee-6e07d593c6fe", + "metadata": { + "editable": true, + "raw_mimetype": "text/restructuredtext", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "The method to call is :meth:`GribEncoder.encode`. The ``template`` argument can be a GRIB field. In the example below :meth:`GribEncoder.encode` will simply create a copy (clone) of the underlying GRIB handle in the field." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "77a3548b-cac3-4749-bc0d-a0a2f02e20b1", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "r = encoder.encode(template=ds[0])\n", + "r" + ] + }, + { + "cell_type": "markdown", + "id": "17bf5946-71b7-40c3-a122-78d4660c9b84", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "The resulting object can be used in various ways." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "e2b4d8df-1ab6-4c83-b65a-7ff510c09ec1", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "b'GRIB\\x00\\x02\\x0e\\x01\\x00\\x00'" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "r.to_bytes()[:10]" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "8ae392c9-fa79-4ff6-9ef3-4f13bf69329f", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
02t2020-05-13 12:00:002020-05-13 12:00:000 days0surface0regular_ll
\n", + "
" + ], + "text/plain": [ + " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", + "0 2t 2020-05-13 12:00:00 2020-05-13 12:00:00 0 days \n", + "\n", + " vertical.level vertical.level_type ensemble.member geography.grid_type \n", + "0 0 surface 0 regular_ll " + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "f = r.to_field()\n", + "f.ls()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "8a9f93d9-d04e-48d9-9e7b-94851f270882", + "metadata": {}, + "outputs": [], + "source": [ + "with open(\"_res_encoded.grib\", \"wb\") as out:\n", + " r.to_file(out)" + ] + }, + { + "cell_type": "markdown", + "id": "9873e7b3-0bc2-4a7c-8bdc-3d16f9d89e67", + "metadata": {}, + "source": [ + "#### Specifying metadata" + ] + }, + { + "cell_type": "markdown", + "id": "e3db5691-9529-4a28-92a6-447189c24335", + "metadata": {}, + "source": [ + "When we specify GRIB metadata it will be written into the resulting GRIB fields." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "ac1e1525-c9b9-497b-841d-17f830b1a400", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
02t2021-05-14 12:00:002021-05-14 12:00:000 days0surface0regular_ll
\n", + "
" + ], + "text/plain": [ + " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", + "0 2t 2021-05-14 12:00:00 2021-05-14 12:00:00 0 days \n", + "\n", + " vertical.level vertical.level_type ensemble.member geography.grid_type \n", + "0 0 surface 0 regular_ll " + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "r = encoder.encode(template=ds[0], metadata={\"date\": 20210514})\n", + "r.to_field().ls()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "05028656-a641-4530-8bab-02b9ec97be76", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
02t2021-05-14 12:00:002021-05-14 12:00:000 days0surface0regular_ll
\n", + "
" + ], + "text/plain": [ + " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", + "0 2t 2021-05-14 12:00:00 2021-05-14 12:00:00 0 days \n", + "\n", + " vertical.level vertical.level_type ensemble.member geography.grid_type \n", + "0 0 surface 0 regular_ll " + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "r = encoder.encode(template=ds[0], date=20210514)\n", + "r.to_field().ls()" + ] + }, + { + "cell_type": "markdown", + "id": "aaded4d0-0996-4064-8719-a480eb068409", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "#### Specifying new values" + ] + }, + { + "cell_type": "markdown", + "id": "aa339c80-7bb8-46c0-b6a0-08b170741a10", + "metadata": {}, + "source": [ + "To replace the values in the resulting fields we need to use the ``values`` keyword argument." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "83a519ba-d744-488e-bbb1-cf4fff5a15db", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(np.float64(315.4599609375), np.float64(316.4599609375))" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "vals = ds[0].values\n", + "r = encoder.encode(values=vals + 1, template=ds[0])\n", + "ds[0].values.max(), r.to_field().values.max()" + ] + }, + { + "cell_type": "markdown", + "id": "986e8d46-fad4-4d1c-9009-5b6e7d260010", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "#### Specifying a field" + ] + }, + { + "cell_type": "markdown", + "id": "d6d3860a-c46c-4d30-8e2f-c517f0fb254d", + "metadata": { + "editable": true, + "raw_mimetype": "text/restructuredtext", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "When a field is specified as the ``data`` it is used as a template." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "4b6c42c2-cf5c-4bf5-94a6-c5581e3cd665", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
0msl2021-05-14 12:00:002021-05-14 12:00:000 days0surface0regular_ll
\n", + "
" + ], + "text/plain": [ + " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", + "0 msl 2021-05-14 12:00:00 2021-05-14 12:00:00 0 days \n", + "\n", + " vertical.level vertical.level_type ensemble.member geography.grid_type \n", + "0 0 surface 0 regular_ll " + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "r = encoder.encode(data=ds[1], metadata={\"date\": 20210514})\n", + "r.to_field().ls()" + ] + }, + { + "cell_type": "markdown", + "id": "667bdff9-d36a-4627-8849-38f17d3ff8bb", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "When both ``data`` and ``template`` are specified, the values from the field in ``data`` will be copied into the GRIB message created from the template." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "05b95db7-f5b4-4c9f-8f44-b0a574bc5c7f", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
02t2021-05-14 12:00:002021-05-14 12:00:000 days0surface0regular_ll
\n", + "
" + ], + "text/plain": [ + " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", + "0 2t 2021-05-14 12:00:00 2021-05-14 12:00:00 0 days \n", + "\n", + " vertical.level vertical.level_type ensemble.member geography.grid_type \n", + "0 0 surface 0 regular_ll " + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "r = encoder.encode(data=ds[1], template=ds[0], metadata={\"date\": 20210514})\n", + "r.to_field().ls()" + ] + }, + { + "cell_type": "markdown", + "id": "77b56c59-b2ba-40bb-a6b0-18f94bef978b", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "We cannot use ``data``, ``values`` and ``template`` together." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "a8337dbc-1838-4383-9d1c-c4a4763b661b", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Cannot provide data, values and template together\n" + ] + } + ], + "source": [ + "try:\n", + " r = encoder.encode(data=ds[1], template=ds[0], values=vals, metadata={\"date\": 20210514})\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "5638a8cf-0cb7-4ac0-b50d-fbdff299fc80", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "#### Encoding without a template" + ] + }, + { + "cell_type": "markdown", + "id": "41e869d3-1c30-41d2-b7d1-ff72a988c439", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "It is possible to encode GRIB data without providing a template using only values and metadata. This is an **experimental feature** and only works for certain metadata keys and the grid has to be either global lat-lon or reduced Gaussian grid. The geography is inferred from the shape of the specified values." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "8951336c-382a-47be-8897-6df7c643c573", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
02t2025-01-09 12:00:002025-01-08 12:00:001 days2height_above_ground_levelNoneregular_ll
\n", + "
" + ], + "text/plain": [ + " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", + "0 2t 2025-01-09 12:00:00 2025-01-08 12:00:00 1 days \n", + "\n", + " vertical.level vertical.level_type ensemble.member \\\n", + "0 2 height_above_ground_level None \n", + "\n", + " geography.grid_type \n", + "0 regular_ll " + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# global 1x1 degree data\n", + "import numpy as np\n", + "\n", + "vals = np.random.normal(0, 1, (181, 360))\n", + "r = encoder.encode(values=vals, date=20250108, param=\"2t\", time=12, step=24, edition=2)\n", + "r.to_field().ls()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "94683efb-0ce7-4deb-916b-f2fbae6ea2a6", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(np.float64(4.1302121976172), np.float64(4.130106449127197))" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "vals.max(), r.to_field().values.max()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "cf755892-72b8-4560-8614-a623d0ca2fee", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
0t2025-01-09 12:00:002025-01-08 12:00:001 days700pressureNoneregular_ll
\n", + "
" + ], + "text/plain": [ + " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", + "0 t 2025-01-09 12:00:00 2025-01-08 12:00:00 1 days \n", + "\n", + " vertical.level vertical.level_type ensemble.member geography.grid_type \n", + "0 700 pressure None regular_ll " + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# encode as a pressure level field\n", + "r = encoder.encode(values=vals, date=20250108, param=\"t\", level=700, levtype=\"pl\", time=12, step=24, edition=2)\n", + "r.to_field().ls()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "6ea3f635-7aee-4563-8e9a-c2e1c8cc851a", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_typemetadata.isOctahedralmetadata.N
02t2025-01-09 12:00:002025-01-08 12:00:001 days2height_above_ground_levelNonereduced_gg196
\n", + "
" + ], + "text/plain": [ + " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", + "0 2t 2025-01-09 12:00:00 2025-01-08 12:00:00 1 days \n", + "\n", + " vertical.level vertical.level_type ensemble.member \\\n", + "0 2 height_above_ground_level None \n", + "\n", + " geography.grid_type metadata.isOctahedral metadata.N \n", + "0 reduced_gg 1 96 " + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# global O96 data\n", + "vals = np.random.normal(0, 1, 40320)\n", + "r = encoder.encode(values=vals, date=20250108, param=\"2t\", time=12, step=24, edition=2)\n", + "r.to_field().ls(extra_keys=[\"metadata.isOctahedral\", \"metadata.N\"])" + ] + }, + { + "cell_type": "markdown", + "id": "5fdab838-4890-4e71-95d1-5b15af13f3a1", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "#### Using preset options" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "9dd4b17d-0601-45e3-9423-56ae73c06fd6", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# create a GribEncoder with preset options\n", + "encoder = ekd.create_encoder(\"grib\", date=20250108, template=ds[0])\n", + "\n", + "d1 = encoder.encode(step=12)\n", + "d2 = encoder.encode(step=24)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "12247fcb-98e9-46b5-821c-0660e5dfb788", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
02t2025-01-092025-01-08 12:00:000 days 12:00:000surface0regular_ll
\n", + "
" + ], + "text/plain": [ + " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", + "0 2t 2025-01-09 2025-01-08 12:00:00 0 days 12:00:00 \n", + "\n", + " vertical.level vertical.level_type ensemble.member geography.grid_type \n", + "0 0 surface 0 regular_ll " + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d1.to_field().ls()" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "4457ac71-cfe3-40ea-8be1-298ace04ec58", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
02t2025-01-09 12:00:002025-01-08 12:00:001 days0surface0regular_ll
\n", + "
" + ], + "text/plain": [ + " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", + "0 2t 2025-01-09 12:00:00 2025-01-08 12:00:00 1 days \n", + "\n", + " vertical.level vertical.level_type ensemble.member geography.grid_type \n", + "0 0 surface 0 regular_ll " + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d2.to_field().ls()" + ] + }, + { + "cell_type": "markdown", + "id": "12871d90-a4ab-4da7-8ecc-87a286c0de72", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "#### Working with fieldlists" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "1c1bfb89-f504-4d6b-8402-aa05c426b47f", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " metadata.shortName metadata.step\n", + "0 2t 18\n", + " metadata.shortName metadata.step\n", + "0 msl 18\n" + ] + } + ], + "source": [ + "# create a GribEncoder\n", + "encoder = ekd.create_encoder(\"grib\")\n", + "\n", + "# encode now returns a generator\n", + "for d in encoder.encode(data=ds, step=18):\n", + " print(d.to_field().ls(keys=[\"metadata.shortName\", \"metadata.step\"]))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bd24efbb-9c27-47a0-870e-43e1e21b5cc3", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "dev", + "language": "python", + "name": "dev" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.1" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/source/how-tos/target/grib_to_fdb_target.ipynb b/docs/source/how-tos/target/grib_to_fdb_target.ipynb new file mode 100644 index 000000000..0d934f178 --- /dev/null +++ b/docs/source/how-tos/target/grib_to_fdb_target.ipynb @@ -0,0 +1,486 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "50c3f565-f9a8-4100-a977-f9d3818853da", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "# Writing GRIB to an FDB target" + ] + }, + { + "cell_type": "markdown", + "id": "d4878bcc-d223-46ba-8d1d-183b60e6085b", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "This example demonstrates how to **write earthkit-data GRIB fields into an FDB**. \n", + "\n", + "FDB (Fields DataBase) is a domain-specific object store developed at ECMWF for storing, indexing and retrieving GRIB data. For more information on FBD please consult the following pages:\n", + "\n", + "- [FDB](https://fields-database.readthedocs.io/en/latest/)\n", + "- [pyfdb](https://pyfdb.readthedocs.io/en/latest/)" + ] + }, + { + "cell_type": "markdown", + "id": "5fc7ae99-bd9d-4e7b-b3e5-73f12e15d047", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "#### Setting up the target FDB" + ] + }, + { + "cell_type": "markdown", + "id": "fbbaf36f-a912-41e2-88db-3964e363fd3f", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "In this example we will create an FDB in the current folder using the schema taken from the pyfdb test suite. To do so first we need to ensure the directory exists. Next, we have to specify the configuration." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "e3f1ece1-a079-4b72-bafe-4c5f92a1bd26", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "import os\n", + "\n", + "fdb_schema = \"../default_fdb_schema\"\n", + "fdb_dir = \"./_fdb_target_demo\"\n", + "os.makedirs(fdb_dir, exist_ok=True)\n", + "\n", + "config = {\n", + " \"type\": \"local\",\n", + " \"engine\": \"toc\",\n", + " \"schema\": fdb_schema,\n", + " \"spaces\": [{\"handler\": \"Default\", \"roots\": [{\"path\": fdb_dir}]}],\n", + "}" + ] + }, + { + "cell_type": "markdown", + "id": "a319a355-ce2e-4c64-9d75-c786828049bf", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "Working with FDB requires pyfdb and fdb to be installed. The path to the fdb installation should also be set e.g. via the FDB5_DIR environment variable." + ] + }, + { + "cell_type": "markdown", + "id": "3a9d9260-ae51-4646-bb7e-8cb0120645fd", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "#### Getting the input data" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "8a7930df-cf89-4be4-a25e-bdb7f362352d", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "0e1c1b34b269405b95cd245d585dfd74", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "tuv_pl.grib: 0%| | 0.00/4.22k [00:00\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
0u2018-08-01 12:00:002018-08-01 12:00:000 days500pressure0regular_ll
1v2018-08-01 12:00:002018-08-01 12:00:000 days500pressure0regular_ll
\n", + "" + ], + "text/plain": [ + " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", + "0 u 2018-08-01 12:00:00 2018-08-01 12:00:00 0 days \n", + "1 v 2018-08-01 12:00:00 2018-08-01 12:00:00 0 days \n", + "\n", + " vertical.level vertical.level_type ensemble.member geography.grid_type \n", + "0 500 pressure 0 regular_ll \n", + "1 500 pressure 0 regular_ll " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# checking the result\n", + "request = {\n", + " \"class\": \"od\",\n", + " \"expver\": \"0001\",\n", + " \"stream\": \"oper\",\n", + " \"date\": \"20180801\",\n", + " \"time\": 1200,\n", + " \"domain\": \"g\",\n", + " \"type\": \"an\",\n", + " \"levtype\": \"pl\",\n", + " \"levelist\": 500,\n", + " \"step\": 0,\n", + " \"param\": [131, 132],\n", + "}\n", + "\n", + "ekd.from_source(\"fdb\", request, config=config, stream=False).to_fieldlist().ls()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "0d115cb7-c189-4b9d-a945-ad4eb8c6f052", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# setting GRIB keys for the output\n", + "ds.to_target(\"fdb\", config=config, metadata={\"date\": 20250108})" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "59d005bc-a100-41a3-8cc6-f7c5c02950ec", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
0u2025-01-08 12:00:002025-01-08 12:00:000 days500pressure0regular_ll
1v2025-01-08 12:00:002025-01-08 12:00:000 days500pressure0regular_ll
\n", + "
" + ], + "text/plain": [ + " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", + "0 u 2025-01-08 12:00:00 2025-01-08 12:00:00 0 days \n", + "1 v 2025-01-08 12:00:00 2025-01-08 12:00:00 0 days \n", + "\n", + " vertical.level vertical.level_type ensemble.member geography.grid_type \n", + "0 500 pressure 0 regular_ll \n", + "1 500 pressure 0 regular_ll " + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# checking the result\n", + "request_1 = dict(**request)\n", + "request_1.update({\"date\": 20250108})\n", + "ekd.from_source(\"fdb\", request_1, config=config, stream=False).to_fieldlist().ls()" + ] + }, + { + "cell_type": "markdown", + "id": "c51966f3-db61-43cf-8c46-43989dc61d0a", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "#### Using a Target object" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "0916438e-ed09-47b7-9618-75e47f3f1589", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# basic usage\n", + "target = ekd.create_target(\"fdb\", config=config)\n", + "target.write(ds)\n", + "target.flush()\n", + "\n", + "# can be used as a context manager, no need to call flush() in the end\n", + "with ekd.create_target(\"fdb\", config=config) as target:\n", + " target.write(ds)\n", + "\n", + "# a fieldlist can be written field by field into the target\n", + "with ekd.create_target(\"fdb\", config=config) as target:\n", + " for f in ds:\n", + " target.write(f)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a30bab1e-974d-4313-8358-8c83ccc76509", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "dev", + "language": "python", + "name": "dev" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.1" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/source/how-tos/target/grib_to_file_pattern_target.ipynb b/docs/source/how-tos/target/grib_to_file_pattern_target.ipynb new file mode 100644 index 000000000..df08a0c5b --- /dev/null +++ b/docs/source/how-tos/target/grib_to_file_pattern_target.ipynb @@ -0,0 +1,428 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "6094679c-dc47-4822-bae4-150f6b541bd4", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "# Writing GRIB to file-pattern target" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "b7db8543-3a37-4eaf-8729-261f46e3ca18", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "d1c0f177bba543729b8ec1b4a7ceaeff", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "tuv_pl.grib: 0%| | 0.00/4.22k [00:00\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
0t2018-08-01 12:00:002018-08-01 12:00:000 days850pressure0regular_ll
\n", + "" + ], + "text/plain": [ + " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", + "0 t 2018-08-01 12:00:00 2018-08-01 12:00:00 0 days \n", + "\n", + " vertical.level vertical.level_type ensemble.member geography.grid_type \n", + "0 850 pressure 0 regular_ll " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# writing only temperature fields\n", + "ds.sel({\"parameter.variable\": \"t\"}).to_target(\"file-pattern\", out_pattern)\n", + "\n", + "# writing a whole fieldlist\n", + "ds.to_target(\"file-pattern\", out_pattern)\n", + "\n", + "# checking one output file\n", + "ekd.from_source(\"file\", \"_res_pattern_t_850.grib\").to_fieldlist().ls()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "00bf762d-c937-4967-bf75-30d277c93182", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_typemetadata.bitsPerValue
0t2025-01-08 12:00:002025-01-08 12:00:000 days850pressure0regular_ll8
\n", + "
" + ], + "text/plain": [ + " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", + "0 t 2025-01-08 12:00:00 2025-01-08 12:00:00 0 days \n", + "\n", + " vertical.level vertical.level_type ensemble.member geography.grid_type \\\n", + "0 850 pressure 0 regular_ll \n", + "\n", + " metadata.bitsPerValue \n", + "0 8 " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# setting GRIB keys for the output\n", + "ds.to_target(\"file-pattern\", out_pattern, metadata={\"date\": 20250108}, bitsPerValue=8)\n", + "\n", + "# checking one output file\n", + "ekd.from_source(\"file\", \"_res_pattern_t_850.grib\").to_fieldlist().ls(extra_keys=[\"metadata.bitsPerValue\"])" + ] + }, + { + "cell_type": "markdown", + "id": "211ac929-3464-4165-a2a3-d8fb4903a7e3", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "#### Using a Target object" + ] + }, + { + "cell_type": "raw", + "id": "c6fbbe17-99c6-49ce-9820-8fbd6391448d", + "metadata": { + "editable": true, + "raw_mimetype": "text/restructuredtext", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "We can create a target object with :func:`get_target` and use :meth:`write` to write/add data to it." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "47cef925-6caf-4cee-bec0-e4467db510f0", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# basic usage\n", + "target = ekd.create_target(\"file-pattern\", out_pattern)\n", + "target.write(ds)\n", + "target.close()\n", + "\n", + "# can be used as a context manager, no need to call close() in the end\n", + "with ekd.create_target(\"file-pattern\", out_pattern) as target:\n", + " target.write(ds)\n", + "\n", + "# a fieldlist can be written field by field into the target\n", + "with ekd.create_target(\"file-pattern\", out_pattern) as target:\n", + " for f in ds:\n", + " target.write(f)" + ] + }, + { + "cell_type": "markdown", + "id": "5c4ce913-461a-4d3b-89ae-7feace1157a9", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "#### Using encoders" + ] + }, + { + "cell_type": "markdown", + "id": "2aa6644d-1c90-4550-a88f-af2ee0d6b909", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "These calls are equivalent." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "a86840f7-4b99-4ab6-911d-60726d771a31", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# setting metadata and other GRIB keys for the output\n", + "ds.to_target(\"file-pattern\", out_pattern, metadata={\"date\": 20250108}, bitsPerValue=8)\n", + "\n", + "# explicitly specifying the encoder\n", + "ds.to_target(\"file-pattern\", out_pattern, encoder=\"grib\", metadata={\"date\": 20250108}, bitsPerValue=8)\n", + "\n", + "# using an encoder object\n", + "encoder = ekd.create_encoder(\"grib\", metadata={\"date\": 20250108})\n", + "ds.to_target(\"file-pattern\", out_pattern, encoder=encoder, bitsPerValue=8)" + ] + }, + { + "cell_type": "markdown", + "id": "176b71c1-9830-4f4e-af75-6857467308bb", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "The same can be done with a target object." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "310476ce-9d73-4624-a933-2df468586a23", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "encoder = ekd.create_encoder(\"grib\", metadata={\"date\": 20250108})\n", + "with ekd.create_target(\"file-pattern\", out_pattern) as target:\n", + " target.write(ds, encoder=encoder, bitsPerValue=8)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "dev", + "language": "python", + "name": "dev" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.1" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/source/how-tos/target/grib_to_file_target.ipynb b/docs/source/how-tos/target/grib_to_file_target.ipynb new file mode 100644 index 000000000..8264b272b --- /dev/null +++ b/docs/source/how-tos/target/grib_to_file_target.ipynb @@ -0,0 +1,461 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "6094679c-dc47-4822-bae4-150f6b541bd4", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "# Writing GRIB to a file target" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "b7db8543-3a37-4eaf-8729-261f46e3ca18", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "37999b81f3cf4e33a49aea96cc3d2d5a", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "test.grib: 0%| | 0.00/1.03k [00:00\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_typemetadata.bitsPerValue
02t2025-01-08 12:00:002025-01-08 12:00:000 days0surface0regular_ll8
1msl2025-01-08 12:00:002025-01-08 12:00:000 days0surface0regular_ll8
\n", + "" + ], + "text/plain": [ + " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", + "0 2t 2025-01-08 12:00:00 2025-01-08 12:00:00 0 days \n", + "1 msl 2025-01-08 12:00:00 2025-01-08 12:00:00 0 days \n", + "\n", + " vertical.level vertical.level_type ensemble.member geography.grid_type \\\n", + "0 0 surface 0 regular_ll \n", + "1 0 surface 0 regular_ll \n", + "\n", + " metadata.bitsPerValue \n", + "0 8 \n", + "1 8 " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# setting GRIB keys for the output\n", + "ds.to_target(\"file\", \"_res_t.grib\", metadata={\"date\": 20250108}, bitsPerValue=8)\n", + "\n", + "ekd.from_source(\"file\", \"_res_t.grib\").to_fieldlist().ls(extra_keys=[\"metadata.bitsPerValue\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "9002ac52-f9da-45fc-9ef1-0296daa675b8", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_typemetadata.bitsPerValue
0msl2020-05-13 12:00:002020-05-13 12:00:000 days0surface0regular_ll16
12t2020-05-13 12:00:002020-05-13 12:00:000 days0surface0regular_ll16
\n", + "
" + ], + "text/plain": [ + " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", + "0 msl 2020-05-13 12:00:00 2020-05-13 12:00:00 0 days \n", + "1 2t 2020-05-13 12:00:00 2020-05-13 12:00:00 0 days \n", + "\n", + " vertical.level vertical.level_type ensemble.member geography.grid_type \\\n", + "0 0 surface 0 regular_ll \n", + "1 0 surface 0 regular_ll \n", + "\n", + " metadata.bitsPerValue \n", + "0 16 \n", + "1 16 " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# data can be appended to the output file\n", + "ds[1].to_target(\"file\", \"_res_t.grib\")\n", + "ds[0].to_target(\"file\", \"_res_t.grib\", append=True)\n", + "ekd.from_source(\"file\", \"_res_t.grib\").to_fieldlist().ls(extra_keys=[\"metadata.bitsPerValue\"])" + ] + }, + { + "cell_type": "markdown", + "id": "211ac929-3464-4165-a2a3-d8fb4903a7e3", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "#### Using a Target object" + ] + }, + { + "cell_type": "raw", + "id": "c6fbbe17-99c6-49ce-9820-8fbd6391448d", + "metadata": { + "editable": true, + "raw_mimetype": "text/restructuredtext", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "We can create a target object with :func:`get_target` and use :meth:`write` to write/add data to it." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "47cef925-6caf-4cee-bec0-e4467db510f0", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# basic usage\n", + "target = ekd.create_target(\"file\", \"_res_t.grib\")\n", + "target.write(ds)\n", + "target.close()\n", + "\n", + "# can be used as a context manager, no need to call close() in the end\n", + "with ekd.create_target(\"file\", \"_res_t.grib\") as target:\n", + " target.write(ds)\n", + "\n", + "# a filedlist can be written field by field into the target\n", + "with ekd.create_target(\"file\", \"_res_t.grib\") as target:\n", + " for f in ds:\n", + " target.write(f)" + ] + }, + { + "cell_type": "markdown", + "id": "5c4ce913-461a-4d3b-89ae-7feace1157a9", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "#### Using encoders" + ] + }, + { + "cell_type": "markdown", + "id": "2aa6644d-1c90-4550-a88f-af2ee0d6b909", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "These calls are equivalent." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "a86840f7-4b99-4ab6-911d-60726d771a31", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# setting metadata and other GRIB keys for the output\n", + "ds.to_target(\"file\", \"_res_t.grib\", metadata={\"date\": 20250108}, bitsPerValue=8)\n", + "\n", + "# explicitly specifying the encoder\n", + "ds.to_target(\"file\", \"_res_t.grib\", encoder=\"grib\", metadata={\"date\": 20250108}, bitsPerValue=8)\n", + "\n", + "# using an encoder object\n", + "encoder = ekd.create_encoder(\"grib\", metadata={\"date\": 20250108})\n", + "ds.to_target(\"file\", \"_res_t.grib\", encoder=encoder, bitsPerValue=8)" + ] + }, + { + "cell_type": "markdown", + "id": "176b71c1-9830-4f4e-af75-6857467308bb", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "The same can be done with a target object." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "310476ce-9d73-4624-a933-2df468586a23", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "encoder = ekd.create_encoder(\"grib\", metadata={\"date\": 20250108})\n", + "with ekd.create_target(\"file\", \"_res_t.grib\") as target:\n", + " target.write(ds, encoder=encoder, bitsPerValue=8)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "dev", + "language": "python", + "name": "dev" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.1" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/source/how-tos/target/grib_to_geotiff.ipynb b/docs/source/how-tos/target/grib_to_geotiff.ipynb new file mode 100644 index 000000000..f663d7693 --- /dev/null +++ b/docs/source/how-tos/target/grib_to_geotiff.ipynb @@ -0,0 +1,302 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "bf7d8446-8a4a-4cef-b286-19ade2f966cd", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "# Converting GRIB to GeoTIFF" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "b22378eb-dde1-41fc-ace3-89c2fa729fcb", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "3c41d3816235476db3435762899b9775", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "test.grib: 0%| | 0.00/1.03k [00:00\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
02t2020-05-13 12:00:002020-05-13 12:00:000 days0surface0regular_ll
1msl2020-05-13 12:00:002020-05-13 12:00:000 days0surface0regular_ll
\n", + "" + ], + "text/plain": [ + " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", + "0 2t 2020-05-13 12:00:00 2020-05-13 12:00:00 0 days \n", + "1 msl 2020-05-13 12:00:00 2020-05-13 12:00:00 0 days \n", + "\n", + " vertical.level vertical.level_type ensemble.member geography.grid_type \n", + "0 0 surface 0 regular_ll \n", + "1 0 surface 0 regular_ll " + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import earthkit.data as ekd\n", + "\n", + "ds = ekd.from_source(\"sample\", \"test.grib\")\n", + "ds.to_fieldlist().ls()" + ] + }, + { + "cell_type": "raw", + "id": "13ebbd7c-9a81-47eb-917c-97b498bd0c71", + "metadata": { + "editable": true, + "raw_mimetype": "text/restructuredtext", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "We use :func:`to_target` to write the GRIB fieldlist/field into a file. The encoder is automatically guessed from the target file suffix." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "4631e8fd-f957-4825-ab5b-6da5724cbfcf", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "ds.to_target(\"file\", \"_test.tiff\")" + ] + }, + { + "cell_type": "markdown", + "id": "b1aa1482-d994-486c-8c68-be58256a24ff", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "Check the resulting GeoTIFF file." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "c8248004-84d8-491e-b1c1-24f14d2006b6", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
parameter.variabletime.valid_datetimetime.base_datetimetime.stepvertical.levelvertical.level_typeensemble.membergeography.grid_type
02 metre temperatureNoneNoneNoneNoneunknownNoneNone
1Mean sea level pressureNoneNoneNoneNoneunknownNoneNone
\n", + "
" + ], + "text/plain": [ + " parameter.variable time.valid_datetime time.base_datetime time.step \\\n", + "0 2 metre temperature None None None \n", + "1 Mean sea level pressure None None None \n", + "\n", + " vertical.level vertical.level_type ensemble.member geography.grid_type \n", + "0 None unknown None None \n", + "1 None unknown None None " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds1 = ekd.from_source(\"file\", \"_test.tiff\")\n", + "ds1.to_fieldlist().ls()" + ] + }, + { + "cell_type": "markdown", + "id": "e64156b5-2df7-4db6-8bfd-31a2382077bc", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "Please note that to generate GeoTIFF the GRIB data is converted into Xarray internally. Right now the GeoTIFF output can only be generated if all the DataArrays in the Xarray are 2D." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "24ae9be7-8222-4d8d-91c8-657d1b003b5d", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "dev", + "language": "python", + "name": "dev" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.1" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/source/how-tos/target/grib_to_zarr_target.ipynb b/docs/source/how-tos/target/grib_to_zarr_target.ipynb new file mode 100644 index 000000000..243e07af5 --- /dev/null +++ b/docs/source/how-tos/target/grib_to_zarr_target.ipynb @@ -0,0 +1,786 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "8e308cd3-7f5a-4b62-bd2d-027850282c00", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "# Writing GRIB data to Zarr" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "62b00621-67cd-46b0-81ef-16278a6eee18", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "1f3c6a0e84fb4853a95b166dd03a3105", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "pl.grib: 0%| | 0.00/48.8k [00:00Name/tTypezarr.core.ArrayData typefloat64Shape(4, 2, 2, 19, 36)Chunk shape(1, 1, 1, 19, 36)OrderCRead-onlyFalseCompressorBlosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)Store typezarr.storage.DirectoryStoreNo. bytes87552 (85.5K)No. bytes stored26897 (26.3K)Storage ratio3.3Chunks initialized16/16" + ], + "text/plain": [ + "Name : /t\n", + "Type : zarr.core.Array\n", + "Data type : float64\n", + "Shape : (4, 2, 2, 19, 36)\n", + "Chunk shape : (1, 1, 1, 19, 36)\n", + "Order : C\n", + "Read-only : False\n", + "Compressor : Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)\n", + "Store type : zarr.storage.DirectoryStore\n", + "No. bytes : 87552 (85.5K)\n", + "No. bytes stored : 26897 (26.3K)\n", + "Storage ratio : 3.3\n", + "Chunks initialized : 16/16" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "root[\"t\"].info" + ] + }, + { + "cell_type": "markdown", + "id": "ef19bc33-fcc7-4b5a-83b0-ee59da4179f0", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "The zarr store can be loaded to Xarray to check its content." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "706d4467-64b8-46bf-894d-346088208fa2", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 176kB\n",
+       "Dimensions:                  (forecast_reference_time: 4, step: 2, level: 2,\n",
+       "                              latitude: 19, longitude: 36)\n",
+       "Coordinates:\n",
+       "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 32B 202...\n",
+       "  * step                     (step) timedelta64[ns] 16B 00:00:00 06:00:00\n",
+       "  * level                    (level) int64 16B 500 700\n",
+       "  * latitude                 (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n",
+       "  * longitude                (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n",
+       "Data variables:\n",
+       "    r                        (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n",
+       "    t                        (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n",
+       "Attributes:\n",
+       "    Conventions:  CF-1.8\n",
+       "    institution:  ECMWF
" + ], + "text/plain": [ + " Size: 176kB\n", + "Dimensions: (forecast_reference_time: 4, step: 2, level: 2,\n", + " latitude: 19, longitude: 36)\n", + "Coordinates:\n", + " * forecast_reference_time (forecast_reference_time) datetime64[ns] 32B 202...\n", + " * step (step) timedelta64[ns] 16B 00:00:00 06:00:00\n", + " * level (level) int64 16B 500 700\n", + " * latitude (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n", + " * longitude (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n", + "Data variables:\n", + " r (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n", + " t (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n", + "Attributes:\n", + " Conventions: CF-1.8\n", + " institution: ECMWF" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import xarray\n", + "\n", + "xarray.open_dataset(\"_pl.zarr\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8479a12e-e907-43de-a3a6-aefb8cbfa754", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "dev", + "language": "python", + "name": "dev" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.1" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/source/how-tos/target/index.rst b/docs/source/how-tos/target/index.rst new file mode 100644 index 000000000..3fe4ad555 --- /dev/null +++ b/docs/source/how-tos/target/index.rst @@ -0,0 +1,15 @@ +. _target_examples: + +Targets and encoders ++++++++++++++++++++++ + +.. toctree:: + :maxdepth: 1 + + file_target.ipynb + grib_to_file_target.ipynb + grib_to_file_pattern_target.ipynb + grib_to_fdb_target.ipynb + grib_to_geotiff.ipynb + grib_to_zarr_target.ipynb + grib_encoder.ipynb diff --git a/docs/source/release-notes/version_1.0.0rc_updates.rst b/docs/source/release-notes/version_1.0.0rc_updates.rst index 5f8ad0c24..2def15a3d 100644 --- a/docs/source/release-notes/version_1.0.0rc_updates.rst +++ b/docs/source/release-notes/version_1.0.0rc_updates.rst @@ -2,6 +2,12 @@ Version 1.0.0 Release Candidate Updates /////////////////////////////////////// +Version 1.0.0rc3 +================== + +- Fixed issue when setting time on a Field failed + + Version 1.0.0rc2 ================== diff --git a/src/earthkit/data/core/field.py b/src/earthkit/data/core/field.py index 402dfc97b..e7208a161 100644 --- a/src/earthkit/data/core/field.py +++ b/src/earthkit/data/core/field.py @@ -1261,16 +1261,75 @@ def set(self, *args, **kwargs): *args: tuple Positional arguments used to specify the metadata keys and values to set. Each argument can be a dict with keys and values to set. When multiple dicts are given - they are merged together with the latter dicts taking precedence over the former ones. - **kwargs: dict, optional + they are merged together with the latter dicts taking precedence over the former + ones. + + >>> field.set({"parameter.variable": "t"}) + >>> field.set({"parameter.variable": "t", "vertical.level": 1000}) + + New data values can be set by using the "data" or "values" key with the new values + as a value. For example, + + >>> field.set(data=new_values_array) + + will replace the data values in the field with the values in ``new_values_array``. + + Only high-level metadata keys (and "data" or "values") are allowed here, i.e. keys that + belong to a component. Modifying raw metadata keys is not allowed and we cannot use them + in :meth:`set` with or without the "metadata." prefix. For example, although + in fields generated from GRIB we can use the "metadata.shortName" key in the :meth:`get` + method to access the "shortName" key we cannot use it in :meth:`set`. + + Entire components can be set by using the component name as a key and the component + object or the equivalent dict as a value. For example, + + >>> field.set(parameter={"variable": "t", "units": "K"}) + + will replace the entire parameter component. + + Date and time related keys from the "time" field component can take + different formats of date/time/duration values as input. For example, when + setting by "time.base_datetime" the following calls are equivalent: + + >>> fl.set({ "time.base_datetime": "2018-08-01T12"}) + >>> fl.set({ "time.base_datetime": datetime(2018, 8, 1, 12, 0) }) + + Similarly, when setting "time.step" the following calls are equivalent. + + >>> fl.set({ "time.step": "6h"}) + >>> fl.set({ "time.step": 6}) + >>> fl.set({ "time.step": "360m"}) + >>> fl.set({ "time.step": timedelta(hours=6)}) + + Values are assumed to be in hours when the unit is not specified. When the unit is specified + it can be either "h", "m" or "s" for hours, minutes or seconds, respectively. + + **kwargs: dict Keyword arguments used to specify the metadata keys and values to set. They take - precedence over the positional arguments. + precedence over the positional arguments. The same rules for the keys and values + as for the positional arguments apply here. Returns ------- Field A new field with the specified metadata keys set to the given values. + Examples + -------- + See the how-to examples for the :meth:`set` method in the following notebook: + + - :ref:`/how-tos/grib/grib_modify_metadata.ipynb` + - :ref:`/how-tos/grib/grib_modify_values.ipynb` + + Further examples: + + >>> import earthkit.data as ekd + >>> fl = ekd.from_source("sample", "test.grib").to_fieldlist() + >>> f = fl[0] + >>> f2 = f.set({"parameter.variable": "10t", "parameter.units": "K"}) + >>> f2.get(["parameter.variable", "parameter.units"]) + ['10t', 'K'] + """ kwargs = kwargs.copy() for a in args: diff --git a/src/earthkit/data/core/fieldlist.py b/src/earthkit/data/core/fieldlist.py index 6f8be7ffa..fd35a21d1 100644 --- a/src/earthkit/data/core/fieldlist.py +++ b/src/earthkit/data/core/fieldlist.py @@ -673,8 +673,6 @@ def sel(self, *args, remapping=None, **kwargs) -> "FieldList": For example, when filtering by "time.valid_datetime" the following calls are equivalent: >>> fl.sel({ "time.valid_datetime": "2018-08-01T12:00:00"}) - >>> fl.sel({ "time.valid_datetime": "2018080112"}) - >>> fl.sel({ "time.valid_datetime": 2018080112}) >>> fl.sel({ "time.valid_datetime": datetime(2018, 8, 1, 12, 0) }) Similarly, when filtering by "time.step" the following calls are equivalent (values are assumed diff --git a/src/earthkit/data/field/component/time.py b/src/earthkit/data/field/component/time.py index 12f133ab9..6a402c4b6 100644 --- a/src/earthkit/data/field/component/time.py +++ b/src/earthkit/data/field/component/time.py @@ -503,7 +503,7 @@ def from_dict(cls, d): data = method(**d1) return data - raise ValueError(f"Invalid keys in data: {list(d.keys())}. Expected one of {KEYS}.") + raise ValueError(f"Cannot create ForecastTime from keys: {list(d.keys())}.") def set(self, *args, **kwargs): """Create a new instance with updated data. @@ -579,7 +579,7 @@ def set(self, *args, **kwargs): data = method(**d) return data - raise ValueError(f"Invalid keys in data: {list(d.keys())}. Allowed keys: {KEYS}.") + raise ValueError(f"Cannot create ForecastTime from keys: {list(d.keys())}.") def _set_generic( self, @@ -1046,7 +1046,7 @@ def from_dict(cls, d): data = method(**d1) return data - raise ValueError(f"Invalid keys in data: {list(d.keys())}. Expected one of {KEYS}.") + raise ValueError(f"Cannot create MonthlyForecastTime from keys: {list(d.keys())}.") def set(self, *args, **kwargs): """Create a new instance with updated data. diff --git a/src/earthkit/data/field/grib/time.py b/src/earthkit/data/field/grib/time.py index b175bd790..26f12183f 100644 --- a/src/earthkit/data/field/grib/time.py +++ b/src/earthkit/data/field/grib/time.py @@ -77,10 +77,10 @@ def collect_keys(handler, context): r["time"] = time r["step"] = step - if component.forecast_month is not None: - r["forecastMonth"] = component.forecast_month - if component.indexing_datetime is not None: - idate, itime = datetime_to_grib(component.indexing_datetime) + if component.forecast_month() is not None: + r["forecastMonth"] = component.forecast_month() + if component.indexing_datetime() is not None: + idate, itime = datetime_to_grib(component.indexing_datetime()) r["indexingDate"] = idate r["indexingTime"] = itime diff --git a/src/earthkit/data/indexing/indexed.py b/src/earthkit/data/indexing/indexed.py index a19a8a20d..27da6b04d 100644 --- a/src/earthkit/data/indexing/indexed.py +++ b/src/earthkit/data/indexing/indexed.py @@ -117,8 +117,6 @@ def sel(self, *args, remapping=None, **kwargs) -> "FieldList": For example, when filtering by "time.valid_datetime" the following calls are equivalent: >>> fl.sel({ "time.valid_datetime": "2018-08-01T12:00:00"}) - >>> fl.sel({ "time.valid_datetime": "2018080112"}) - >>> fl.sel({ "time.valid_datetime": 2018080112}) >>> fl.sel({ "time.valid_datetime": datetime(2018, 8, 1, 12, 0) }) Similarly, when filtering by "time.step" the following calls are equivalent (values are assumed