diff --git a/docs/experimental/ekd_test_on_cds_era5_single_level.ipynb b/docs/experimental/ekd_test_on_cds_era5_single_level.ipynb new file mode 100644 index 000000000..716627916 --- /dev/null +++ b/docs/experimental/ekd_test_on_cds_era5_single_level.ipynb @@ -0,0 +1,1366 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "3d1a00c3-d3ae-48de-98ae-74e61f59c648", + "metadata": {}, + "source": [ + "# Testing earthkit-data xarray engine on single level ERA5 data from CDS" + ] + }, + { + "cell_type": "markdown", + "id": "199144cc-ac0a-423f-bd73-c7bff49a01f5", + "metadata": {}, + "source": [ + "To run this notebook, install this version of earthkit-data package:\n", + "https://github.com/ecmwf/earthkit-data/tree/feature/improve-xr-splitter" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "6e74f84a-6a55-4549-b80f-1ec65fd90aa7", + "metadata": {}, + "outputs": [], + "source": [ + "import earthkit.data as ekd" + ] + }, + { + "cell_type": "markdown", + "id": "abadacb8-0062-4dde-be20-d2cb52964d15", + "metadata": {}, + "source": [ + "Load a dataset containing 30k+ GRIB messages with single level ERA5 data from CDS (with degraded spatial resolution)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "8584b68f-07f0-4ce0-918f-e3493a5825c2", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " " + ] + } + ], + "source": [ + "fl = ekd.from_source('url', 'https://get.ecmwf.int/repository/test-data/earthkit-data/test-data/xr_engine/cds-reanalysis-era5-single-levels-20230101-low-resol.grib')" + ] + }, + { + "cell_type": "markdown", + "id": "1a615dcf-e31c-4770-b5ac-9606cf64938e", + "metadata": {}, + "source": [ + "Explore the content of the dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "f0d85e0b-3eb6-4c95-9114-ba78d4be9e37", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'edition': (1, 2),\n", + " 'stream': ('oper', 'wave', 'ewda', 'enda'),\n", + " 'dataType': ('an', 'fc', 'em', 'es'),\n", + " 'stepType': ('instant', 'accum', 'max', 'avg'),\n", + " 'gridType': ('regular_ll',),\n", + " 'Ni': (36, 18, 12)}" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fl.unique_values('edition', 'stream', 'dataType', 'stepType', 'edition', 'gridType', 'Ni')" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "ebd189c1-897d-49e0-bcf7-247b644a4cf7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
centreshortNametypeOfLevelleveldataDatedataTimestepRangedataTypenumbergridType...stepTypestepNiNjvalidityDatevalidityTimemd5GridSectionbitmapPresentgridSpecedition
0ecmf10usurface02023010100an0regular_ll...instant0361920230101033c7d6025995e1b4913811e77d38ec500None1
1ecmf10vsurface02023010100an0regular_ll...instant0361920230101033c7d6025995e1b4913811e77d38ec500None1
2ecmf2dsurface02023010100an0regular_ll...instant0361920230101033c7d6025995e1b4913811e77d38ec500None1
3ecmf2tsurface02023010100an0regular_ll...instant0361920230101033c7d6025995e1b4913811e77d38ec500None1
4ecmfmslsurface02023010100an0regular_ll...instant0361920230101033c7d6025995e1b4913811e77d38ec500None1
..................................................................
12611ecmfswvl1depthBelowLandLayer02023010121000es0regular_ll...instant01892023010121003d13e67882e20f1c127f846bdc4725640None1
12612ecmfswvl2depthBelowLandLayer72023010121000es0regular_ll...instant01892023010121003d13e67882e20f1c127f846bdc4725640None1
12613ecmfswvl3depthBelowLandLayer282023010121000es0regular_ll...instant01892023010121003d13e67882e20f1c127f846bdc4725640None1
12614ecmfswvl4depthBelowLandLayer1002023010121000es0regular_ll...instant01892023010121003d13e67882e20f1c127f846bdc4725640None1
12615ecmfdeg0lsurface02023010118003es0regular_ll...instant31892023010121003d13e67882e20f1c127f846bdc4725640None1
\n", + "

12616 rows × 21 columns

\n", + "
" + ], + "text/plain": [ + " centre shortName typeOfLevel level dataDate dataTime \\\n", + "0 ecmf 10u surface 0 20230101 0 \n", + "1 ecmf 10v surface 0 20230101 0 \n", + "2 ecmf 2d surface 0 20230101 0 \n", + "3 ecmf 2t surface 0 20230101 0 \n", + "4 ecmf msl surface 0 20230101 0 \n", + "... ... ... ... ... ... ... \n", + "12611 ecmf swvl1 depthBelowLandLayer 0 20230101 2100 \n", + "12612 ecmf swvl2 depthBelowLandLayer 7 20230101 2100 \n", + "12613 ecmf swvl3 depthBelowLandLayer 28 20230101 2100 \n", + "12614 ecmf swvl4 depthBelowLandLayer 100 20230101 2100 \n", + "12615 ecmf deg0l surface 0 20230101 1800 \n", + "\n", + " stepRange dataType number gridType ... stepType step Ni Nj \\\n", + "0 0 an 0 regular_ll ... instant 0 36 19 \n", + "1 0 an 0 regular_ll ... instant 0 36 19 \n", + "2 0 an 0 regular_ll ... instant 0 36 19 \n", + "3 0 an 0 regular_ll ... instant 0 36 19 \n", + "4 0 an 0 regular_ll ... instant 0 36 19 \n", + "... ... ... ... ... ... ... ... .. .. \n", + "12611 0 es 0 regular_ll ... instant 0 18 9 \n", + "12612 0 es 0 regular_ll ... instant 0 18 9 \n", + "12613 0 es 0 regular_ll ... instant 0 18 9 \n", + "12614 0 es 0 regular_ll ... instant 0 18 9 \n", + "12615 3 es 0 regular_ll ... instant 3 18 9 \n", + "\n", + " validityDate validityTime md5GridSection \\\n", + "0 20230101 0 33c7d6025995e1b4913811e77d38ec50 \n", + "1 20230101 0 33c7d6025995e1b4913811e77d38ec50 \n", + "2 20230101 0 33c7d6025995e1b4913811e77d38ec50 \n", + "3 20230101 0 33c7d6025995e1b4913811e77d38ec50 \n", + "4 20230101 0 33c7d6025995e1b4913811e77d38ec50 \n", + "... ... ... ... \n", + "12611 20230101 2100 3d13e67882e20f1c127f846bdc472564 \n", + "12612 20230101 2100 3d13e67882e20f1c127f846bdc472564 \n", + "12613 20230101 2100 3d13e67882e20f1c127f846bdc472564 \n", + "12614 20230101 2100 3d13e67882e20f1c127f846bdc472564 \n", + "12615 20230101 2100 3d13e67882e20f1c127f846bdc472564 \n", + "\n", + " bitmapPresent gridSpec edition \n", + "0 0 None 1 \n", + "1 0 None 1 \n", + "2 0 None 1 \n", + "3 0 None 1 \n", + "4 0 None 1 \n", + "... ... ... ... \n", + "12611 0 None 1 \n", + "12612 0 None 1 \n", + "12613 0 None 1 \n", + "12614 0 None 1 \n", + "12615 0 None 1 \n", + "\n", + "[12616 rows x 21 columns]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fl_ls = fl.ls(extra_keys=['stream', 'stepType', 'step', 'Ni', 'Nj', 'validityDate', 'validityTime', \n", + " 'gridType', 'md5GridSection', 'bitmapPresent', 'gridSpec', 'edition'])\n", + "fl_ls" + ] + }, + { + "cell_type": "markdown", + "id": "3588a1c7-6bda-42c1-b0d9-fce7b31c7afc", + "metadata": {}, + "source": [ + "Here we see that md5GridSection key is not ideal to look for GRIB messages having the same grid: grid section is organised differently in GRIB edition 1 and 2" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "633977b6-a858-4b8a-85ad-63463d985dc7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
editionNimd5GridSectioncount
2112e09e4d6171c0ac85da1d256b2f8acf881840
01183d13e67882e20f1c127f846bdc4725645640
321882a7e502a7ebe916255822ef509349d824
113633c7d6025995e1b4913811e77d38ec505112
\n", + "
" + ], + "text/plain": [ + " edition Ni md5GridSection count\n", + "2 1 12 e09e4d6171c0ac85da1d256b2f8acf88 1840\n", + "0 1 18 3d13e67882e20f1c127f846bdc472564 5640\n", + "3 2 18 82a7e502a7ebe916255822ef509349d8 24\n", + "1 1 36 33c7d6025995e1b4913811e77d38ec50 5112" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fl_ls[['edition', 'Ni', 'md5GridSection']].value_counts().reset_index().sort_values('Ni')" + ] + }, + { + "cell_type": "markdown", + "id": "596fb8cd-b404-4484-8158-4e338b3bdfd3", + "metadata": {}, + "source": [ + "Some further metadata exploration" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "bf2d985d-34b9-4c30-96e6-1cf1490bbebe", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
dataDatedataTimestepTypestepstepRangevalidityDatevalidityTimecount
020230101900instant0020230101900568
1202301012100instant00202301012100568
220230101300instant0020230101300568
3202301011800instant00202301011800568
420230101600instant0020230101600568
...........................
13320230101600max76-72023010113005
13420230101600max87-82023010114005
13520230101600max109-102023010116005
13620230101600max1110-112023010117005
13720230101600max10-1202301017005
\n", + "

138 rows × 8 columns

\n", + "
" + ], + "text/plain": [ + " dataDate dataTime stepType step stepRange validityDate validityTime \\\n", + "0 20230101 900 instant 0 0 20230101 900 \n", + "1 20230101 2100 instant 0 0 20230101 2100 \n", + "2 20230101 300 instant 0 0 20230101 300 \n", + "3 20230101 1800 instant 0 0 20230101 1800 \n", + "4 20230101 600 instant 0 0 20230101 600 \n", + ".. ... ... ... ... ... ... ... \n", + "133 20230101 600 max 7 6-7 20230101 1300 \n", + "134 20230101 600 max 8 7-8 20230101 1400 \n", + "135 20230101 600 max 10 9-10 20230101 1600 \n", + "136 20230101 600 max 11 10-11 20230101 1700 \n", + "137 20230101 600 max 1 0-1 20230101 700 \n", + "\n", + " count \n", + "0 568 \n", + "1 568 \n", + "2 568 \n", + "3 568 \n", + "4 568 \n", + ".. ... \n", + "133 5 \n", + "134 5 \n", + "135 5 \n", + "136 5 \n", + "137 5 \n", + "\n", + "[138 rows x 8 columns]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fl_ls[['dataDate', 'dataTime', 'stepType', 'step', 'stepRange', 'validityDate', 'validityTime']].value_counts().reset_index()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "0b5eba4e-09e7-4624-91da-2ff21d2debed", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "number\n", + "0 12616\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fl_ls['number'].value_counts()" + ] + }, + { + "cell_type": "markdown", + "id": "ad767d70-5603-4238-b36f-a1409cb3b4e4", + "metadata": {}, + "source": [ + "An example of conversion to NetCDF using splitting wrt several keys" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "43c05e33-be81-4b82-b855-83dfd3623e06", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "11" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dss, split_coords_list = fl.to_xarray(\n", + " split_dims=['stream', 'dataType', 'edition', 'Ni'], \n", + " time_dim_mode='valid_time', \n", + " squeeze=False, \n", + ")\n", + "len(dss)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "11b5ede5-dcdf-413f-b37c-eaddbb0a5197", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'stream': 'enda', 'dataType': 'an', 'edition': 1, 'Ni': 18},\n", + " {'stream': 'enda', 'dataType': 'em', 'edition': 1, 'Ni': 18},\n", + " {'stream': 'enda', 'dataType': 'es', 'edition': 1, 'Ni': 18},\n", + " {'stream': 'enda', 'dataType': 'fc', 'edition': 1, 'Ni': 18},\n", + " {'stream': 'enda', 'dataType': 'fc', 'edition': 2, 'Ni': 18},\n", + " {'stream': 'ewda', 'dataType': 'an', 'edition': 1, 'Ni': 12},\n", + " {'stream': 'ewda', 'dataType': 'em', 'edition': 1, 'Ni': 12},\n", + " {'stream': 'ewda', 'dataType': 'es', 'edition': 1, 'Ni': 12},\n", + " {'stream': 'oper', 'dataType': 'an', 'edition': 1, 'Ni': 36},\n", + " {'stream': 'oper', 'dataType': 'fc', 'edition': 1, 'Ni': 36},\n", + " {'stream': 'wave', 'dataType': 'an', 'edition': 1, 'Ni': 18}]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "split_coords_list" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "7747a1ff-5010-47be-a502-93796e460a90", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 1MB\n",
+       "Dimensions:     (number: 1, valid_time: 8, levtype: 1, latitude: 9,\n",
+       "                 longitude: 18)\n",
+       "Coordinates:\n",
+       "  * number      (number) int64 8B 0\n",
+       "  * valid_time  (valid_time) datetime64[ns] 64B 2023-01-01 ... 2023-01-01T21:...\n",
+       "  * levtype     (levtype) <U3 12B 'sfc'\n",
+       "  * latitude    (latitude) float64 72B 80.0 60.0 40.0 20.0 ... -40.0 -60.0 -80.0\n",
+       "  * longitude   (longitude) float64 144B 0.0 20.0 40.0 ... 300.0 320.0 340.0\n",
+       "Data variables: (12/114)\n",
+       "    100u        (number, valid_time, levtype, latitude, longitude) float64 10kB ...\n",
+       "    100v        (number, valid_time, levtype, latitude, longitude) float64 10kB ...\n",
+       "    10u         (number, valid_time, levtype, latitude, longitude) float64 10kB ...\n",
+       "    10v         (number, valid_time, levtype, latitude, longitude) float64 10kB ...\n",
+       "    2d          (number, valid_time, levtype, latitude, longitude) float64 10kB ...\n",
+       "    2t          (number, valid_time, levtype, latitude, longitude) float64 10kB ...\n",
+       "    ...          ...\n",
+       "    vithee      (number, valid_time, levtype, latitude, longitude) float64 10kB ...\n",
+       "    vithen      (number, valid_time, levtype, latitude, longitude) float64 10kB ...\n",
+       "    vitoe       (number, valid_time, levtype, latitude, longitude) float64 10kB ...\n",
+       "    viwve       (number, valid_time, levtype, latitude, longitude) float64 10kB ...\n",
+       "    viwvn       (number, valid_time, levtype, latitude, longitude) float64 10kB ...\n",
+       "    z           (number, valid_time, levtype, latitude, longitude) float64 10kB ...\n",
+       "Attributes:\n",
+       "    class:        ea\n",
+       "    type:         an\n",
+       "    expver:       0001\n",
+       "    date:         20230101\n",
+       "    time:         0\n",
+       "    anoffset:     0\n",
+       "    domain:       g\n",
+       "    Conventions:  CF-1.8\n",
+       "    institution:  ECMWF
" + ], + "text/plain": [ + " Size: 1MB\n", + "Dimensions: (number: 1, valid_time: 8, levtype: 1, latitude: 9,\n", + " longitude: 18)\n", + "Coordinates:\n", + " * number (number) int64 8B 0\n", + " * valid_time (valid_time) datetime64[ns] 64B 2023-01-01 ... 2023-01-01T21:...\n", + " * levtype (levtype)