From 356f6087b0fd946474335eb62da0231f3ce323ce Mon Sep 17 00:00:00 2001 From: Pawel Wolff Date: Tue, 29 Apr 2025 14:59:16 +0200 Subject: [PATCH 1/7] Fix to_xarray conversion to allow splitting on multiple keys; do not require the splitting to form a hypercube --- src/earthkit/data/utils/xarray/builder.py | 3 ++- src/earthkit/data/utils/xarray/splitter.py | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/earthkit/data/utils/xarray/builder.py b/src/earthkit/data/utils/xarray/builder.py index 7f844becb..81c38359a 100644 --- a/src/earthkit/data/utils/xarray/builder.py +++ b/src/earthkit/data/utils/xarray/builder.py @@ -622,7 +622,8 @@ def prepare(self, keys): # LOG.debug(f"split_dims={self.split_dims}") ds_xr = XArrayInputFieldList(self.ds, keys=self.profile.index_keys, remapping=remapping) - vals, _ = ds_xr.unique_values(*keys) + vals, _ = ds_xr.unique_values(keys) + LOG.debug(f"{keys=}, {vals=}") return ds_xr, vals diff --git a/src/earthkit/data/utils/xarray/splitter.py b/src/earthkit/data/utils/xarray/splitter.py index f7c4306d3..2bc9f8304 100644 --- a/src/earthkit/data/utils/xarray/splitter.py +++ b/src/earthkit/data/utils/xarray/splitter.py @@ -55,6 +55,8 @@ def split(self, builder): for x in product(*dims.values()): y = dict(zip(dims.keys(), x)) ds_sel = ds_xr.sel(**y) + if len(ds_sel) == 0: + continue ds_sort, profile = builder.parse(ds_sel, None) if len(ds_sort) == 0: raise ValueError(f"No field found for selection={y}") From 126dc57085c9ef6be32b76b057f5117bd88d2c9a Mon Sep 17 00:00:00 2001 From: Pawel Wolff Date: Tue, 29 Apr 2025 15:44:50 +0200 Subject: [PATCH 2/7] to_xarray conversion returns a (list of) dataset(s) AND a (list of) dict(s) with splitting keys --- src/earthkit/data/utils/xarray/builder.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/earthkit/data/utils/xarray/builder.py b/src/earthkit/data/utils/xarray/builder.py index 81c38359a..b7ae38e75 100644 --- a/src/earthkit/data/utils/xarray/builder.py +++ b/src/earthkit/data/utils/xarray/builder.py @@ -632,8 +632,10 @@ def build(self): splitter = Splitter.make(self.split_dims) datasets = [] + split_coords_list = [] for ds, profile, split_coords in splitter.split(self): dims = profile.dims.to_list() + split_coords_list.append(dict(split_coords)) LOG.debug(f"splitting {dims=} type of s_ds={type(ds)} {split_coords=}") split_coords.pop(profile.variable_key, None) builder = self.builder(ds, profile, dims, grid=self.grid(ds), fixed_local_attrs=split_coords) @@ -646,7 +648,7 @@ def build(self): datasets.append(xarray.open_dataset(ds, **self.xr_open_dataset_kwargs)) ds._ek_builder = None - return datasets[0] if len(datasets) == 1 else datasets + return (datasets[0], split_coords_list[0]) if len(datasets) == 1 else (datasets, split_coords_list) def from_earthkit(ds, backend_kwargs=None, other_kwargs=None): From 4dddd1d1440d4e0ed911268b8dd318ae4d920fcc Mon Sep 17 00:00:00 2001 From: Pawel Wolff Date: Tue, 29 Apr 2025 16:06:04 +0200 Subject: [PATCH 3/7] earthkit-data xarray engine on single level ERA5 data from CDS notebook added --- .../ekd_test_on_cds_era5_single_level.ipynb | 1383 +++++++++++++++++ 1 file changed, 1383 insertions(+) create mode 100644 docs/experimental/ekd_test_on_cds_era5_single_level.ipynb diff --git a/docs/experimental/ekd_test_on_cds_era5_single_level.ipynb b/docs/experimental/ekd_test_on_cds_era5_single_level.ipynb new file mode 100644 index 000000000..c27a7fc64 --- /dev/null +++ b/docs/experimental/ekd_test_on_cds_era5_single_level.ipynb @@ -0,0 +1,1383 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "3d1a00c3-d3ae-48de-98ae-74e61f59c648", + "metadata": {}, + "source": [ + "# Testing earthkit-data xarray engine on single level ERA5 data from CDS" + ] + }, + { + "cell_type": "markdown", + "id": "199144cc-ac0a-423f-bd73-c7bff49a01f5", + "metadata": {}, + "source": [ + "To run this notebook, install this version of earthkit-data package:\n", + "https://github.com/ecmwf/earthkit-data/tree/126dc57085c9ef6be32b76b057f5117bd88d2c9a" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "6e74f84a-6a55-4549-b80f-1ec65fd90aa7", + "metadata": {}, + "outputs": [], + "source": [ + "import earthkit.data as ekd" + ] + }, + { + "cell_type": "markdown", + "id": "abadacb8-0062-4dde-be20-d2cb52964d15", + "metadata": {}, + "source": [ + "Load a dataset containing 30k+ GRIB messages with single level ERA5 data from CDS (with degraded spatial resolution)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "8584b68f-07f0-4ce0-918f-e3493a5825c2", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " " + ] + } + ], + "source": [ + "fl = ekd.from_source('url', 'https://get.ecmwf.int/repository/test-data/earthkit-data/test-data/xr_engine/cds-reanalysis-era5-single-levels-20230101-low-resol.grib')" + ] + }, + { + "cell_type": "markdown", + "id": "1a615dcf-e31c-4770-b5ac-9606cf64938e", + "metadata": {}, + "source": [ + "Explore the content of the dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "f0d85e0b-3eb6-4c95-9114-ba78d4be9e37", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'edition': (1, 2),\n", + " 'stream': ('oper', 'wave', 'ewda', 'enda'),\n", + " 'dataType': ('an', 'fc', 'em', 'es'),\n", + " 'stepType': ('instant', 'accum', 'max', 'avg'),\n", + " 'gridType': ('regular_ll',),\n", + " 'Ni': (36, 18, 12)}" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fl.unique_values('edition', 'stream', 'dataType', 'stepType', 'edition', 'gridType', 'Ni')" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "ebd189c1-897d-49e0-bcf7-247b644a4cf7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
centreshortNametypeOfLevelleveldataDatedataTimestepRangedataTypenumbergridType...stepTypestepNiNjvalidityDatevalidityTimemd5GridSectionbitmapPresentgridSpecedition
0ecmf10usurface02023010100an0.0regular_ll...instant0361920230101033c7d6025995e1b4913811e77d38ec500None1
1ecmf10vsurface02023010100an0.0regular_ll...instant0361920230101033c7d6025995e1b4913811e77d38ec500None1
2ecmf2dsurface02023010100an0.0regular_ll...instant0361920230101033c7d6025995e1b4913811e77d38ec500None1
3ecmf2tsurface02023010100an0.0regular_ll...instant0361920230101033c7d6025995e1b4913811e77d38ec500None1
4ecmfmslsurface02023010100an0.0regular_ll...instant0361920230101033c7d6025995e1b4913811e77d38ec500None1
..................................................................
38219ecmfswvl1depthBelowLandLayer02023010121000es0.0regular_ll...instant01892023010121003d13e67882e20f1c127f846bdc4725640None1
38220ecmfswvl2depthBelowLandLayer72023010121000es0.0regular_ll...instant01892023010121003d13e67882e20f1c127f846bdc4725640None1
38221ecmfswvl3depthBelowLandLayer282023010121000es0.0regular_ll...instant01892023010121003d13e67882e20f1c127f846bdc4725640None1
38222ecmfswvl4depthBelowLandLayer1002023010121000es0.0regular_ll...instant01892023010121003d13e67882e20f1c127f846bdc4725640None1
38223ecmfdeg0lsurface02023010118003es0.0regular_ll...instant31892023010121003d13e67882e20f1c127f846bdc4725640None1
\n", + "

38224 rows × 21 columns

\n", + "
" + ], + "text/plain": [ + " centre shortName typeOfLevel level dataDate dataTime \\\n", + "0 ecmf 10u surface 0 20230101 0 \n", + "1 ecmf 10v surface 0 20230101 0 \n", + "2 ecmf 2d surface 0 20230101 0 \n", + "3 ecmf 2t surface 0 20230101 0 \n", + "4 ecmf msl surface 0 20230101 0 \n", + "... ... ... ... ... ... ... \n", + "38219 ecmf swvl1 depthBelowLandLayer 0 20230101 2100 \n", + "38220 ecmf swvl2 depthBelowLandLayer 7 20230101 2100 \n", + "38221 ecmf swvl3 depthBelowLandLayer 28 20230101 2100 \n", + "38222 ecmf swvl4 depthBelowLandLayer 100 20230101 2100 \n", + "38223 ecmf deg0l surface 0 20230101 1800 \n", + "\n", + " stepRange dataType number gridType ... stepType step Ni Nj \\\n", + "0 0 an 0.0 regular_ll ... instant 0 36 19 \n", + "1 0 an 0.0 regular_ll ... instant 0 36 19 \n", + "2 0 an 0.0 regular_ll ... instant 0 36 19 \n", + "3 0 an 0.0 regular_ll ... instant 0 36 19 \n", + "4 0 an 0.0 regular_ll ... instant 0 36 19 \n", + "... ... ... ... ... ... ... ... .. .. \n", + "38219 0 es 0.0 regular_ll ... instant 0 18 9 \n", + "38220 0 es 0.0 regular_ll ... instant 0 18 9 \n", + "38221 0 es 0.0 regular_ll ... instant 0 18 9 \n", + "38222 0 es 0.0 regular_ll ... instant 0 18 9 \n", + "38223 3 es 0.0 regular_ll ... instant 3 18 9 \n", + "\n", + " validityDate validityTime md5GridSection \\\n", + "0 20230101 0 33c7d6025995e1b4913811e77d38ec50 \n", + "1 20230101 0 33c7d6025995e1b4913811e77d38ec50 \n", + "2 20230101 0 33c7d6025995e1b4913811e77d38ec50 \n", + "3 20230101 0 33c7d6025995e1b4913811e77d38ec50 \n", + "4 20230101 0 33c7d6025995e1b4913811e77d38ec50 \n", + "... ... ... ... \n", + "38219 20230101 2100 3d13e67882e20f1c127f846bdc472564 \n", + "38220 20230101 2100 3d13e67882e20f1c127f846bdc472564 \n", + "38221 20230101 2100 3d13e67882e20f1c127f846bdc472564 \n", + "38222 20230101 2100 3d13e67882e20f1c127f846bdc472564 \n", + "38223 20230101 2100 3d13e67882e20f1c127f846bdc472564 \n", + "\n", + " bitmapPresent gridSpec edition \n", + "0 0 None 1 \n", + "1 0 None 1 \n", + "2 0 None 1 \n", + "3 0 None 1 \n", + "4 0 None 1 \n", + "... ... ... ... \n", + "38219 0 None 1 \n", + "38220 0 None 1 \n", + "38221 0 None 1 \n", + "38222 0 None 1 \n", + "38223 0 None 1 \n", + "\n", + "[38224 rows x 21 columns]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fl_ls = fl.ls(extra_keys=['stream', 'stepType', 'step', 'Ni', 'Nj', 'validityDate', 'validityTime', \n", + " 'gridType', 'md5GridSection', 'bitmapPresent', 'gridSpec', 'edition'])\n", + "fl_ls" + ] + }, + { + "cell_type": "markdown", + "id": "3588a1c7-6bda-42c1-b0d9-fce7b31c7afc", + "metadata": {}, + "source": [ + "Here we see that md5GridSection key is not ideal to look for GRIB messages having the same grid: grid section is organised differently in GRIB edition 1 and 2" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "633977b6-a858-4b8a-85ad-63463d985dc7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
editionNimd5GridSectioncount
1112e09e4d6171c0ac85da1d256b2f8acf8811776
01183d13e67882e20f1c127f846bdc47256420976
321882a7e502a7ebe916255822ef509349d8288
213633c7d6025995e1b4913811e77d38ec505112
4236d655110b180fea25dc13aef29342304172
\n", + "
" + ], + "text/plain": [ + " edition Ni md5GridSection count\n", + "1 1 12 e09e4d6171c0ac85da1d256b2f8acf88 11776\n", + "0 1 18 3d13e67882e20f1c127f846bdc472564 20976\n", + "3 2 18 82a7e502a7ebe916255822ef509349d8 288\n", + "2 1 36 33c7d6025995e1b4913811e77d38ec50 5112\n", + "4 2 36 d655110b180fea25dc13aef293423041 72" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fl_ls[['edition', 'Ni', 'md5GridSection']].value_counts().reset_index().sort_values('Ni')" + ] + }, + { + "cell_type": "markdown", + "id": "596fb8cd-b404-4484-8158-4e338b3bdfd3", + "metadata": {}, + "source": [ + "Some further metadata exploration" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "bf2d985d-34b9-4c30-96e6-1cf1490bbebe", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
dataDatedataTimestepTypestepstepRangevalidityDatevalidityTimecount
020230101900instant00202301019002008
1202301012100instant002023010121002008
220230101300instant00202301013002008
3202301011800instant002023010118002008
420230101600instant00202301016002008
...........................
13320230101600max87-82023010114005
13420230101600max109-102023010116005
13520230101600max1110-112023010117005
13620230101600max1211-122023010118005
13720230101600max10-1202301017005
\n", + "

138 rows × 8 columns

\n", + "
" + ], + "text/plain": [ + " dataDate dataTime stepType step stepRange validityDate validityTime \\\n", + "0 20230101 900 instant 0 0 20230101 900 \n", + "1 20230101 2100 instant 0 0 20230101 2100 \n", + "2 20230101 300 instant 0 0 20230101 300 \n", + "3 20230101 1800 instant 0 0 20230101 1800 \n", + "4 20230101 600 instant 0 0 20230101 600 \n", + ".. ... ... ... ... ... ... ... \n", + "133 20230101 600 max 8 7-8 20230101 1400 \n", + "134 20230101 600 max 10 9-10 20230101 1600 \n", + "135 20230101 600 max 11 10-11 20230101 1700 \n", + "136 20230101 600 max 12 11-12 20230101 1800 \n", + "137 20230101 600 max 1 0-1 20230101 700 \n", + "\n", + " count \n", + "0 2008 \n", + "1 2008 \n", + "2 2008 \n", + "3 2008 \n", + "4 2008 \n", + ".. ... \n", + "133 5 \n", + "134 5 \n", + "135 5 \n", + "136 5 \n", + "137 5 \n", + "\n", + "[138 rows x 8 columns]" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fl_ls[['dataDate', 'dataTime', 'stepType', 'step', 'stepRange', 'validityDate', 'validityTime']].value_counts().reset_index()" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "0b5eba4e-09e7-4624-91da-2ff21d2debed", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "number\n", + "0.0 12616\n", + "1.0 2832\n", + "2.0 2832\n", + "3.0 2832\n", + "4.0 2832\n", + "5.0 2832\n", + "6.0 2832\n", + "7.0 2832\n", + "8.0 2832\n", + "9.0 2832\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fl_ls['number'].value_counts()" + ] + }, + { + "cell_type": "markdown", + "id": "ad767d70-5603-4238-b36f-a1409cb3b4e4", + "metadata": {}, + "source": [ + "An example of conversion to NetCDF using splitting wrt several keys (for the moment we consider the ensemble members with number=0 only)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "43c05e33-be81-4b82-b855-83dfd3623e06", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "11" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dss, split_coords_list = fl.sel(number=0).to_xarray(\n", + " split_dims=['stream', 'dataType', 'edition', 'Ni'], \n", + " time_dim_mode='valid_time', \n", + " squeeze=False, \n", + ")\n", + "len(dss)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "11b5ede5-dcdf-413f-b37c-eaddbb0a5197", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'stream': 'enda', 'dataType': 'an', 'edition': 1, 'Ni': 18},\n", + " {'stream': 'enda', 'dataType': 'em', 'edition': 1, 'Ni': 18},\n", + " {'stream': 'enda', 'dataType': 'es', 'edition': 1, 'Ni': 18},\n", + " {'stream': 'enda', 'dataType': 'fc', 'edition': 1, 'Ni': 18},\n", + " {'stream': 'enda', 'dataType': 'fc', 'edition': 2, 'Ni': 18},\n", + " {'stream': 'ewda', 'dataType': 'an', 'edition': 1, 'Ni': 12},\n", + " {'stream': 'ewda', 'dataType': 'em', 'edition': 1, 'Ni': 12},\n", + " {'stream': 'ewda', 'dataType': 'es', 'edition': 1, 'Ni': 12},\n", + " {'stream': 'oper', 'dataType': 'an', 'edition': 1, 'Ni': 36},\n", + " {'stream': 'oper', 'dataType': 'fc', 'edition': 1, 'Ni': 36},\n", + " {'stream': 'wave', 'dataType': 'an', 'edition': 1, 'Ni': 18}]" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "split_coords_list" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "7747a1ff-5010-47be-a502-93796e460a90", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 1MB\n",
+       "Dimensions:     (number: 1, valid_time: 8, levtype: 1, latitude: 9,\n",
+       "                 longitude: 18)\n",
+       "Coordinates:\n",
+       "  * number      (number) int64 8B 0\n",
+       "  * valid_time  (valid_time) datetime64[ns] 64B 2023-01-01 ... 2023-01-01T21:...\n",
+       "  * levtype     (levtype) <U3 12B 'sfc'\n",
+       "  * latitude    (latitude) float64 72B 80.0 60.0 40.0 20.0 ... -40.0 -60.0 -80.0\n",
+       "  * longitude   (longitude) float64 144B 0.0 20.0 40.0 ... 300.0 320.0 340.0\n",
+       "Data variables: (12/114)\n",
+       "    100u        (number, valid_time, levtype, latitude, longitude) float64 10kB ...\n",
+       "    100v        (number, valid_time, levtype, latitude, longitude) float64 10kB ...\n",
+       "    10u         (number, valid_time, levtype, latitude, longitude) float64 10kB ...\n",
+       "    10v         (number, valid_time, levtype, latitude, longitude) float64 10kB ...\n",
+       "    2d          (number, valid_time, levtype, latitude, longitude) float64 10kB ...\n",
+       "    2t          (number, valid_time, levtype, latitude, longitude) float64 10kB ...\n",
+       "    ...          ...\n",
+       "    vithee      (number, valid_time, levtype, latitude, longitude) float64 10kB ...\n",
+       "    vithen      (number, valid_time, levtype, latitude, longitude) float64 10kB ...\n",
+       "    vitoe       (number, valid_time, levtype, latitude, longitude) float64 10kB ...\n",
+       "    viwve       (number, valid_time, levtype, latitude, longitude) float64 10kB ...\n",
+       "    viwvn       (number, valid_time, levtype, latitude, longitude) float64 10kB ...\n",
+       "    z           (number, valid_time, levtype, latitude, longitude) float64 10kB ...\n",
+       "Attributes:\n",
+       "    class:        ea\n",
+       "    type:         an\n",
+       "    expver:       0001\n",
+       "    date:         20230101\n",
+       "    time:         0\n",
+       "    anoffset:     0\n",
+       "    domain:       g\n",
+       "    Conventions:  CF-1.8\n",
+       "    institution:  ECMWF
" + ], + "text/plain": [ + " Size: 1MB\n", + "Dimensions: (number: 1, valid_time: 8, levtype: 1, latitude: 9,\n", + " longitude: 18)\n", + "Coordinates:\n", + " * number (number) int64 8B 0\n", + " * valid_time (valid_time) datetime64[ns] 64B 2023-01-01 ... 2023-01-01T21:...\n", + " * levtype (levtype) Date: Fri, 2 May 2025 17:10:17 +0200 Subject: [PATCH 4/7] to_xarray conversion with split_dims always returns a list of datasets and a list of dicts with splitting keys, even if their length is 1; test added --- src/earthkit/data/utils/xarray/builder.py | 2 +- tests/xr_engine/test_xr_split.py | 42 +++++++++++++++++++---- 2 files changed, 36 insertions(+), 8 deletions(-) diff --git a/src/earthkit/data/utils/xarray/builder.py b/src/earthkit/data/utils/xarray/builder.py index b7ae38e75..620d1d105 100644 --- a/src/earthkit/data/utils/xarray/builder.py +++ b/src/earthkit/data/utils/xarray/builder.py @@ -648,7 +648,7 @@ def build(self): datasets.append(xarray.open_dataset(ds, **self.xr_open_dataset_kwargs)) ds._ek_builder = None - return (datasets[0], split_coords_list[0]) if len(datasets) == 1 else (datasets, split_coords_list) + return datasets, split_coords_list def from_earthkit(ds, backend_kwargs=None, other_kwargs=None): diff --git a/tests/xr_engine/test_xr_split.py b/tests/xr_engine/test_xr_split.py index 2c747af4f..0e90ba5f2 100644 --- a/tests/xr_engine/test_xr_split.py +++ b/tests/xr_engine/test_xr_split.py @@ -18,31 +18,59 @@ @pytest.mark.cache @pytest.mark.parametrize( - "kwargs,num,variables,dim_keys,split_values", + "url_suffix,kwargs,num,variables,dim_keys,split_values", [ ( + ["level", "pl.grib"], {"time_dim_mode": "raw", "split_dims": ["step"]}, 2, ["2t", "msl", "r", "t"], ["date", "time", "levelist"], - {"step": [0, 6]}, + [{"step": 0}, {"step": 6}], ), ( + ["level", "pl.grib"], {"time_dim_mode": "raw", "split_dims": ["step"], "ensure_dims": "step"}, 2, ["2t", "msl", "r", "t"], ["date", "time", "step", "levelist"], - {"step": [0, 6]}, + [{"step": 0}, {"step": 6}], + ), + ( + ["cds-reanalysis-era5-single-levels-20230101-low-resol.grib"], + {"time_dim_mode": "valid_time", "split_dims": ["stream", "dataType", "edition", "Ni"]}, + 11, + None, + ["valid_time"], + [ + {'stream': 'enda', 'dataType': 'an', 'edition': 1, 'Ni': 18}, + {'stream': 'enda', 'dataType': 'em', 'edition': 1, 'Ni': 18}, + {'stream': 'enda', 'dataType': 'es', 'edition': 1, 'Ni': 18}, + {'stream': 'enda', 'dataType': 'fc', 'edition': 1, 'Ni': 18}, + {'stream': 'enda', 'dataType': 'fc', 'edition': 2, 'Ni': 18}, + {'stream': 'ewda', 'dataType': 'an', 'edition': 1, 'Ni': 12}, + {'stream': 'ewda', 'dataType': 'em', 'edition': 1, 'Ni': 12}, + {'stream': 'ewda', 'dataType': 'es', 'edition': 1, 'Ni': 12}, + {'stream': 'oper', 'dataType': 'an', 'edition': 1, 'Ni': 36}, + {'stream': 'oper', 'dataType': 'fc', 'edition': 1, 'Ni': 36}, + {'stream': 'wave', 'dataType': 'an', 'edition': 1, 'Ni': 18}, + ], ), # ({"base_datetime_dim": True}, "param", ["r", "t"], ["levelist"]), # ({"squeeze": False}, "param", ["r", "t"], ["time", "step", "levelist"]), ], ) -def test_xr_split(kwargs, num, variables, dim_keys, split_values): - ds_ek = from_source("url", earthkit_remote_test_data_file("test-data", "xr_engine", "level", "pl.grib")) +def test_xr_split(url_suffix, kwargs, num, variables, dim_keys, split_values): + ds_ek = from_source("url", earthkit_remote_test_data_file("test-data", "xr_engine", *url_suffix)) dim_keys = dim_keys + ["latitude", "longitude"] - ds_lst = ds_ek.to_xarray(**kwargs) + ds_lst, split_coords_lst = ds_ek.to_xarray(**kwargs) assert len(ds_lst) == num + assert len(split_coords_lst) == len(split_values) + def dict_to_frozenset_of_kvpairs(d): + return frozenset(d.items()) + _split_coords_lst = frozenset(map(dict_to_frozenset_of_kvpairs, split_coords_lst)) + _split_values = frozenset(map(dict_to_frozenset_of_kvpairs, split_values)) + assert _split_coords_lst == _split_values for ds in ds_lst: - assert list(ds.dims.keys()) == dim_keys + assert sorted(ds.dims) == sorted(dim_keys) From 8506027eae050e74bf520a3bf1bbcc0e7d10da78 Mon Sep 17 00:00:00 2001 From: Pawel Wolff Date: Fri, 2 May 2025 17:27:10 +0200 Subject: [PATCH 5/7] trailing spaces fixed --- tests/xr_engine/test_xr_split.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/xr_engine/test_xr_split.py b/tests/xr_engine/test_xr_split.py index 0e90ba5f2..fc47ce41f 100644 --- a/tests/xr_engine/test_xr_split.py +++ b/tests/xr_engine/test_xr_split.py @@ -21,7 +21,7 @@ "url_suffix,kwargs,num,variables,dim_keys,split_values", [ ( - ["level", "pl.grib"], + ["level", "pl.grib"], {"time_dim_mode": "raw", "split_dims": ["step"]}, 2, ["2t", "msl", "r", "t"], @@ -29,7 +29,7 @@ [{"step": 0}, {"step": 6}], ), ( - ["level", "pl.grib"], + ["level", "pl.grib"], {"time_dim_mode": "raw", "split_dims": ["step"], "ensure_dims": "step"}, 2, ["2t", "msl", "r", "t"], @@ -37,7 +37,7 @@ [{"step": 0}, {"step": 6}], ), ( - ["cds-reanalysis-era5-single-levels-20230101-low-resol.grib"], + ["cds-reanalysis-era5-single-levels-20230101-low-resol.grib"], {"time_dim_mode": "valid_time", "split_dims": ["stream", "dataType", "edition", "Ni"]}, 11, None, @@ -53,7 +53,7 @@ {'stream': 'ewda', 'dataType': 'es', 'edition': 1, 'Ni': 12}, {'stream': 'oper', 'dataType': 'an', 'edition': 1, 'Ni': 36}, {'stream': 'oper', 'dataType': 'fc', 'edition': 1, 'Ni': 36}, - {'stream': 'wave', 'dataType': 'an', 'edition': 1, 'Ni': 18}, + {'stream': 'wave', 'dataType': 'an', 'edition': 1, 'Ni': 18}, ], ), # ({"base_datetime_dim": True}, "param", ["r", "t"], ["levelist"]), From 19c9bc044a49bef982f3086450984276e36c8944 Mon Sep 17 00:00:00 2001 From: Pawel Wolff Date: Fri, 2 May 2025 18:15:01 +0200 Subject: [PATCH 6/7] formatting issue fixed --- tests/xr_engine/test_xr_split.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/tests/xr_engine/test_xr_split.py b/tests/xr_engine/test_xr_split.py index fc47ce41f..abd42a4c0 100644 --- a/tests/xr_engine/test_xr_split.py +++ b/tests/xr_engine/test_xr_split.py @@ -43,17 +43,17 @@ None, ["valid_time"], [ - {'stream': 'enda', 'dataType': 'an', 'edition': 1, 'Ni': 18}, - {'stream': 'enda', 'dataType': 'em', 'edition': 1, 'Ni': 18}, - {'stream': 'enda', 'dataType': 'es', 'edition': 1, 'Ni': 18}, - {'stream': 'enda', 'dataType': 'fc', 'edition': 1, 'Ni': 18}, - {'stream': 'enda', 'dataType': 'fc', 'edition': 2, 'Ni': 18}, - {'stream': 'ewda', 'dataType': 'an', 'edition': 1, 'Ni': 12}, - {'stream': 'ewda', 'dataType': 'em', 'edition': 1, 'Ni': 12}, - {'stream': 'ewda', 'dataType': 'es', 'edition': 1, 'Ni': 12}, - {'stream': 'oper', 'dataType': 'an', 'edition': 1, 'Ni': 36}, - {'stream': 'oper', 'dataType': 'fc', 'edition': 1, 'Ni': 36}, - {'stream': 'wave', 'dataType': 'an', 'edition': 1, 'Ni': 18}, + {"stream": "enda", "dataType": "an", "edition": 1, "Ni": 18}, + {"stream": "enda", "dataType": "em", "edition": 1, "Ni": 18}, + {"stream": "enda", "dataType": "es", "edition": 1, "Ni": 18}, + {"stream": "enda", "dataType": "fc", "edition": 1, "Ni": 18}, + {"stream": "enda", "dataType": "fc", "edition": 2, "Ni": 18}, + {"stream": "ewda", "dataType": "an", "edition": 1, "Ni": 12}, + {"stream": "ewda", "dataType": "em", "edition": 1, "Ni": 12}, + {"stream": "ewda", "dataType": "es", "edition": 1, "Ni": 12}, + {"stream": "oper", "dataType": "an", "edition": 1, "Ni": 36}, + {"stream": "oper", "dataType": "fc", "edition": 1, "Ni": 36}, + {"stream": "wave", "dataType": "an", "edition": 1, "Ni": 18}, ], ), # ({"base_datetime_dim": True}, "param", ["r", "t"], ["levelist"]), @@ -67,8 +67,10 @@ def test_xr_split(url_suffix, kwargs, num, variables, dim_keys, split_values): ds_lst, split_coords_lst = ds_ek.to_xarray(**kwargs) assert len(ds_lst) == num assert len(split_coords_lst) == len(split_values) + def dict_to_frozenset_of_kvpairs(d): return frozenset(d.items()) + _split_coords_lst = frozenset(map(dict_to_frozenset_of_kvpairs, split_coords_lst)) _split_values = frozenset(map(dict_to_frozenset_of_kvpairs, split_values)) assert _split_coords_lst == _split_values From 20b617140f523e73099d67eead49ca2eb982c6c6 Mon Sep 17 00:00:00 2001 From: Pawel Wolff Date: Mon, 5 May 2025 14:43:23 +0200 Subject: [PATCH 7/7] a demo notebook for PR #688 updated --- .../ekd_test_on_cds_era5_single_level.ipynb | 221 ++++++++---------- 1 file changed, 102 insertions(+), 119 deletions(-) diff --git a/docs/experimental/ekd_test_on_cds_era5_single_level.ipynb b/docs/experimental/ekd_test_on_cds_era5_single_level.ipynb index c27a7fc64..716627916 100644 --- a/docs/experimental/ekd_test_on_cds_era5_single_level.ipynb +++ b/docs/experimental/ekd_test_on_cds_era5_single_level.ipynb @@ -14,7 +14,7 @@ "metadata": {}, "source": [ "To run this notebook, install this version of earthkit-data package:\n", - "https://github.com/ecmwf/earthkit-data/tree/126dc57085c9ef6be32b76b057f5117bd88d2c9a" + "https://github.com/ecmwf/earthkit-data/tree/feature/improve-xr-splitter" ] }, { @@ -63,7 +63,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 3, "id": "f0d85e0b-3eb6-4c95-9114-ba78d4be9e37", "metadata": {}, "outputs": [ @@ -78,7 +78,7 @@ " 'Ni': (36, 18, 12)}" ] }, - "execution_count": 13, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -89,7 +89,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 4, "id": "ebd189c1-897d-49e0-bcf7-247b644a4cf7", "metadata": {}, "outputs": [ @@ -148,7 +148,7 @@ " 0\n", " 0\n", " an\n", - " 0.0\n", + " 0\n", " regular_ll\n", " ...\n", " instant\n", @@ -172,7 +172,7 @@ " 0\n", " 0\n", " an\n", - " 0.0\n", + " 0\n", " regular_ll\n", " ...\n", " instant\n", @@ -196,7 +196,7 @@ " 0\n", " 0\n", " an\n", - " 0.0\n", + " 0\n", " regular_ll\n", " ...\n", " instant\n", @@ -220,7 +220,7 @@ " 0\n", " 0\n", " an\n", - " 0.0\n", + " 0\n", " regular_ll\n", " ...\n", " instant\n", @@ -244,7 +244,7 @@ " 0\n", " 0\n", " an\n", - " 0.0\n", + " 0\n", " regular_ll\n", " ...\n", " instant\n", @@ -283,7 +283,7 @@ " ...\n", " \n", " \n", - " 38219\n", + " 12611\n", " ecmf\n", " swvl1\n", " depthBelowLandLayer\n", @@ -292,7 +292,7 @@ " 2100\n", " 0\n", " es\n", - " 0.0\n", + " 0\n", " regular_ll\n", " ...\n", " instant\n", @@ -307,7 +307,7 @@ " 1\n", " \n", " \n", - " 38220\n", + " 12612\n", " ecmf\n", " swvl2\n", " depthBelowLandLayer\n", @@ -316,7 +316,7 @@ " 2100\n", " 0\n", " es\n", - " 0.0\n", + " 0\n", " regular_ll\n", " ...\n", " instant\n", @@ -331,7 +331,7 @@ " 1\n", " \n", " \n", - " 38221\n", + " 12613\n", " ecmf\n", " swvl3\n", " depthBelowLandLayer\n", @@ -340,7 +340,7 @@ " 2100\n", " 0\n", " es\n", - " 0.0\n", + " 0\n", " regular_ll\n", " ...\n", " instant\n", @@ -355,7 +355,7 @@ " 1\n", " \n", " \n", - " 38222\n", + " 12614\n", " ecmf\n", " swvl4\n", " depthBelowLandLayer\n", @@ -364,7 +364,7 @@ " 2100\n", " 0\n", " es\n", - " 0.0\n", + " 0\n", " regular_ll\n", " ...\n", " instant\n", @@ -379,7 +379,7 @@ " 1\n", " \n", " \n", - " 38223\n", + " 12615\n", " ecmf\n", " deg0l\n", " surface\n", @@ -388,7 +388,7 @@ " 1800\n", " 3\n", " es\n", - " 0.0\n", + " 0\n", " regular_ll\n", " ...\n", " instant\n", @@ -404,7 +404,7 @@ " \n", " \n", "\n", - "

38224 rows × 21 columns

\n", + "

12616 rows × 21 columns

\n", "" ], "text/plain": [ @@ -415,24 +415,24 @@ "3 ecmf 2t surface 0 20230101 0 \n", "4 ecmf msl surface 0 20230101 0 \n", "... ... ... ... ... ... ... \n", - "38219 ecmf swvl1 depthBelowLandLayer 0 20230101 2100 \n", - "38220 ecmf swvl2 depthBelowLandLayer 7 20230101 2100 \n", - "38221 ecmf swvl3 depthBelowLandLayer 28 20230101 2100 \n", - "38222 ecmf swvl4 depthBelowLandLayer 100 20230101 2100 \n", - "38223 ecmf deg0l surface 0 20230101 1800 \n", + "12611 ecmf swvl1 depthBelowLandLayer 0 20230101 2100 \n", + "12612 ecmf swvl2 depthBelowLandLayer 7 20230101 2100 \n", + "12613 ecmf swvl3 depthBelowLandLayer 28 20230101 2100 \n", + "12614 ecmf swvl4 depthBelowLandLayer 100 20230101 2100 \n", + "12615 ecmf deg0l surface 0 20230101 1800 \n", "\n", " stepRange dataType number gridType ... stepType step Ni Nj \\\n", - "0 0 an 0.0 regular_ll ... instant 0 36 19 \n", - "1 0 an 0.0 regular_ll ... instant 0 36 19 \n", - "2 0 an 0.0 regular_ll ... instant 0 36 19 \n", - "3 0 an 0.0 regular_ll ... instant 0 36 19 \n", - "4 0 an 0.0 regular_ll ... instant 0 36 19 \n", + "0 0 an 0 regular_ll ... instant 0 36 19 \n", + "1 0 an 0 regular_ll ... instant 0 36 19 \n", + "2 0 an 0 regular_ll ... instant 0 36 19 \n", + "3 0 an 0 regular_ll ... instant 0 36 19 \n", + "4 0 an 0 regular_ll ... instant 0 36 19 \n", "... ... ... ... ... ... ... ... .. .. \n", - "38219 0 es 0.0 regular_ll ... instant 0 18 9 \n", - "38220 0 es 0.0 regular_ll ... instant 0 18 9 \n", - "38221 0 es 0.0 regular_ll ... instant 0 18 9 \n", - "38222 0 es 0.0 regular_ll ... instant 0 18 9 \n", - "38223 3 es 0.0 regular_ll ... instant 3 18 9 \n", + "12611 0 es 0 regular_ll ... instant 0 18 9 \n", + "12612 0 es 0 regular_ll ... instant 0 18 9 \n", + "12613 0 es 0 regular_ll ... instant 0 18 9 \n", + "12614 0 es 0 regular_ll ... instant 0 18 9 \n", + "12615 3 es 0 regular_ll ... instant 3 18 9 \n", "\n", " validityDate validityTime md5GridSection \\\n", "0 20230101 0 33c7d6025995e1b4913811e77d38ec50 \n", @@ -441,11 +441,11 @@ "3 20230101 0 33c7d6025995e1b4913811e77d38ec50 \n", "4 20230101 0 33c7d6025995e1b4913811e77d38ec50 \n", "... ... ... ... \n", - "38219 20230101 2100 3d13e67882e20f1c127f846bdc472564 \n", - "38220 20230101 2100 3d13e67882e20f1c127f846bdc472564 \n", - "38221 20230101 2100 3d13e67882e20f1c127f846bdc472564 \n", - "38222 20230101 2100 3d13e67882e20f1c127f846bdc472564 \n", - "38223 20230101 2100 3d13e67882e20f1c127f846bdc472564 \n", + "12611 20230101 2100 3d13e67882e20f1c127f846bdc472564 \n", + "12612 20230101 2100 3d13e67882e20f1c127f846bdc472564 \n", + "12613 20230101 2100 3d13e67882e20f1c127f846bdc472564 \n", + "12614 20230101 2100 3d13e67882e20f1c127f846bdc472564 \n", + "12615 20230101 2100 3d13e67882e20f1c127f846bdc472564 \n", "\n", " bitmapPresent gridSpec edition \n", "0 0 None 1 \n", @@ -454,16 +454,16 @@ "3 0 None 1 \n", "4 0 None 1 \n", "... ... ... ... \n", - "38219 0 None 1 \n", - "38220 0 None 1 \n", - "38221 0 None 1 \n", - "38222 0 None 1 \n", - "38223 0 None 1 \n", + "12611 0 None 1 \n", + "12612 0 None 1 \n", + "12613 0 None 1 \n", + "12614 0 None 1 \n", + "12615 0 None 1 \n", "\n", - "[38224 rows x 21 columns]" + "[12616 rows x 21 columns]" ] }, - "execution_count": 12, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -484,7 +484,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 5, "id": "633977b6-a858-4b8a-85ad-63463d985dc7", "metadata": {}, "outputs": [ @@ -517,54 +517,46 @@ " \n", " \n", " \n", - " 1\n", + " 2\n", " 1\n", " 12\n", " e09e4d6171c0ac85da1d256b2f8acf88\n", - " 11776\n", + " 1840\n", " \n", " \n", " 0\n", " 1\n", " 18\n", " 3d13e67882e20f1c127f846bdc472564\n", - " 20976\n", + " 5640\n", " \n", " \n", " 3\n", " 2\n", " 18\n", " 82a7e502a7ebe916255822ef509349d8\n", - " 288\n", + " 24\n", " \n", " \n", - " 2\n", + " 1\n", " 1\n", " 36\n", " 33c7d6025995e1b4913811e77d38ec50\n", " 5112\n", " \n", - " \n", - " 4\n", - " 2\n", - " 36\n", - " d655110b180fea25dc13aef293423041\n", - " 72\n", - " \n", " \n", "\n", "" ], "text/plain": [ " edition Ni md5GridSection count\n", - "1 1 12 e09e4d6171c0ac85da1d256b2f8acf88 11776\n", - "0 1 18 3d13e67882e20f1c127f846bdc472564 20976\n", - "3 2 18 82a7e502a7ebe916255822ef509349d8 288\n", - "2 1 36 33c7d6025995e1b4913811e77d38ec50 5112\n", - "4 2 36 d655110b180fea25dc13aef293423041 72" + "2 1 12 e09e4d6171c0ac85da1d256b2f8acf88 1840\n", + "0 1 18 3d13e67882e20f1c127f846bdc472564 5640\n", + "3 2 18 82a7e502a7ebe916255822ef509349d8 24\n", + "1 1 36 33c7d6025995e1b4913811e77d38ec50 5112" ] }, - "execution_count": 14, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -583,7 +575,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 6, "id": "bf2d985d-34b9-4c30-96e6-1cf1490bbebe", "metadata": {}, "outputs": [ @@ -628,7 +620,7 @@ " 0\n", " 20230101\n", " 900\n", - " 2008\n", + " 568\n", " \n", " \n", " 1\n", @@ -639,7 +631,7 @@ " 0\n", " 20230101\n", " 2100\n", - " 2008\n", + " 568\n", " \n", " \n", " 2\n", @@ -650,7 +642,7 @@ " 0\n", " 20230101\n", " 300\n", - " 2008\n", + " 568\n", " \n", " \n", " 3\n", @@ -661,7 +653,7 @@ " 0\n", " 20230101\n", " 1800\n", - " 2008\n", + " 568\n", " \n", " \n", " 4\n", @@ -672,7 +664,7 @@ " 0\n", " 20230101\n", " 600\n", - " 2008\n", + " 568\n", " \n", " \n", " ...\n", @@ -690,10 +682,10 @@ " 20230101\n", " 600\n", " max\n", - " 8\n", - " 7-8\n", + " 7\n", + " 6-7\n", " 20230101\n", - " 1400\n", + " 1300\n", " 5\n", " \n", " \n", @@ -701,10 +693,10 @@ " 20230101\n", " 600\n", " max\n", - " 10\n", - " 9-10\n", + " 8\n", + " 7-8\n", " 20230101\n", - " 1600\n", + " 1400\n", " 5\n", " \n", " \n", @@ -712,10 +704,10 @@ " 20230101\n", " 600\n", " max\n", - " 11\n", - " 10-11\n", + " 10\n", + " 9-10\n", " 20230101\n", - " 1700\n", + " 1600\n", " 5\n", " \n", " \n", @@ -723,10 +715,10 @@ " 20230101\n", " 600\n", " max\n", - " 12\n", - " 11-12\n", + " 11\n", + " 10-11\n", " 20230101\n", - " 1800\n", + " 1700\n", " 5\n", " \n", " \n", @@ -753,18 +745,18 @@ "3 20230101 1800 instant 0 0 20230101 1800 \n", "4 20230101 600 instant 0 0 20230101 600 \n", ".. ... ... ... ... ... ... ... \n", - "133 20230101 600 max 8 7-8 20230101 1400 \n", - "134 20230101 600 max 10 9-10 20230101 1600 \n", - "135 20230101 600 max 11 10-11 20230101 1700 \n", - "136 20230101 600 max 12 11-12 20230101 1800 \n", + "133 20230101 600 max 7 6-7 20230101 1300 \n", + "134 20230101 600 max 8 7-8 20230101 1400 \n", + "135 20230101 600 max 10 9-10 20230101 1600 \n", + "136 20230101 600 max 11 10-11 20230101 1700 \n", "137 20230101 600 max 1 0-1 20230101 700 \n", "\n", " count \n", - "0 2008 \n", - "1 2008 \n", - "2 2008 \n", - "3 2008 \n", - "4 2008 \n", + "0 568 \n", + "1 568 \n", + "2 568 \n", + "3 568 \n", + "4 568 \n", ".. ... \n", "133 5 \n", "134 5 \n", @@ -775,7 +767,7 @@ "[138 rows x 8 columns]" ] }, - "execution_count": 16, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -786,7 +778,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 7, "id": "0b5eba4e-09e7-4624-91da-2ff21d2debed", "metadata": {}, "outputs": [ @@ -794,20 +786,11 @@ "data": { "text/plain": [ "number\n", - "0.0 12616\n", - "1.0 2832\n", - "2.0 2832\n", - "3.0 2832\n", - "4.0 2832\n", - "5.0 2832\n", - "6.0 2832\n", - "7.0 2832\n", - "8.0 2832\n", - "9.0 2832\n", + "0 12616\n", "Name: count, dtype: int64" ] }, - "execution_count": 18, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -821,12 +804,12 @@ "id": "ad767d70-5603-4238-b36f-a1409cb3b4e4", "metadata": {}, "source": [ - "An example of conversion to NetCDF using splitting wrt several keys (for the moment we consider the ensemble members with number=0 only)" + "An example of conversion to NetCDF using splitting wrt several keys" ] }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 8, "id": "43c05e33-be81-4b82-b855-83dfd3623e06", "metadata": {}, "outputs": [ @@ -836,13 +819,13 @@ "11" ] }, - "execution_count": 19, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "dss, split_coords_list = fl.sel(number=0).to_xarray(\n", + "dss, split_coords_list = fl.to_xarray(\n", " split_dims=['stream', 'dataType', 'edition', 'Ni'], \n", " time_dim_mode='valid_time', \n", " squeeze=False, \n", @@ -852,7 +835,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 9, "id": "11b5ede5-dcdf-413f-b37c-eaddbb0a5197", "metadata": {}, "outputs": [ @@ -872,7 +855,7 @@ " {'stream': 'wave', 'dataType': 'an', 'edition': 1, 'Ni': 18}]" ] }, - "execution_count": 20, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -883,7 +866,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 10, "id": "7747a1ff-5010-47be-a502-93796e460a90", "metadata": {}, "outputs": [ @@ -1292,18 +1275,18 @@ " anoffset: 0\n", " domain: g\n", " Conventions: CF-1.8\n", - " institution: ECMWF
  • class :
    ea
    type :
    an
    expver :
    0001
    date :
    20230101
    time :
    0
    anoffset :
    0
    domain :
    g
    Conventions :
    CF-1.8
    institution :
    ECMWF
  • " ], "text/plain": [ " Size: 1MB\n", @@ -1341,7 +1324,7 @@ " institution: ECMWF" ] }, - "execution_count": 21, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" }