diff --git a/docs/examples/index.rst b/docs/examples/index.rst index 6b8574d72..65614ad8c 100644 --- a/docs/examples/index.rst +++ b/docs/examples/index.rst @@ -163,6 +163,7 @@ Xarray engine xarray_engine_to_grib.ipynb xarray_engine_split.ipynb xarray_engine_seasonal.ipynb + xarray_engine_chunks.ipynb Targets and encoders +++++++++++++++++++++ diff --git a/docs/examples/xarray_engine_chunks.ipynb b/docs/examples/xarray_engine_chunks.ipynb new file mode 100644 index 000000000..39eb8f6f9 --- /dev/null +++ b/docs/examples/xarray_engine_chunks.ipynb @@ -0,0 +1,1245 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "f3568669-9884-491d-8597-5130ad273337", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Xarray engine: chunks" + ] + }, + { + "cell_type": "raw", + "id": "b42eccf8-abcc-44a1-8406-f8aa966b1bf5", + "metadata": { + "editable": true, + "raw_mimetype": "text/x-rst", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "This notebook demonstrates how to use chunking in computations when a GRIB fieldlist is converted to to Xarray with :py:meth:`~data.readers.grib.index.GribFieldList.to_xarray`. Chunking can be used to handle data that does not fit into memory." + ] + }, + { + "cell_type": "markdown", + "id": "8b1ceb8a-967d-4324-9af3-3b6eec468da1", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "First, we get 2m temperature data for a whole year on a low resolution regular latitude-longitude grid. It contains 2 fields per day (at 0 and 12 UTC). This data obviously fit into memory, so only used for demonstration purposes." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "3a4f7dd0-f443-4cda-8725-cd61927d1409", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "98299fdfafa74aa5b8cbc0f95188b8d5", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "t2_1_year_hourly.grib: 0%| | 0.00/429k [00:00, ?B/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "732" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import earthkit.data as ekd\n", + "ds_fl = ekd.from_source(\"sample\", \"t2_1_year_hourly.grib\")\n", + "len(ds_fl)" + ] + }, + { + "cell_type": "markdown", + "id": "e634b083-5491-4f12-83d0-3a1fb6c80e95", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "Next, we convert the GRIB Fieldlist to Xarray using the chunk size of 10 fields." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "0c270a17-cc0e-4fb0-9042-e7c8e1dc0fb1", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
<xarray.DataArray '2t' (valid_time: 732, latitude: 13, longitude: 24)> Size: 2MB\n", + "dask.array<open_dataset-2t, shape=(732, 13, 24), dtype=float64, chunksize=(10, 13, 24), chunktype=numpy.ndarray>\n", + "Coordinates:\n", + " * valid_time (valid_time) datetime64[ns] 6kB 2020-01-01 ... 2020-12-31T06:...\n", + " * latitude (latitude) float64 104B 90.0 75.0 60.0 ... -60.0 -75.0 -90.0\n", + " * longitude (longitude) float64 192B 0.0 15.0 30.0 ... 315.0 330.0 345.0\n", + "Attributes:\n", + " standard_name: air_temperature\n", + " long_name: 2 metre temperature\n", + " units: K
<xarray.DataArray '2t' (latitude: 13, longitude: 24)> Size: 2kB\n", + "array([[259.17798273, 259.17798273, 259.17798273, 259.17798273,\n", + " 259.17798273, 259.17798273, 259.17798273, 259.17798273,\n", + " 259.17798273, 259.17798273, 259.17798273, 259.17798273,\n", + " 259.17798273, 259.17798273, 259.17798273, 259.17798273,\n", + " 259.17798273, 259.17798273, 259.17798273, 259.17798273,\n", + " 259.17798273, 259.17798273, 259.17798273, 259.17798273],\n", + " [273.2611026 , 275.61228088, 275.48984236, 274.29307835,\n", + " 268.16812105, 267.89195131, 264.09208792, 262.4144496 ,\n", + " 262.67648853, 261.67375629, 261.81749775, 261.75990725,\n", + " 261.65672248, 261.12205718, 260.31177713, 259.69160124,\n", + " 259.44480308, 258.91397999, 256.69544345, 261.1351634 ,\n", + " 263.80255581, 245.3709899 , 246.22366237, 263.91035124],\n", + " [281.80054932, 277.3069957 , 278.84242945, 276.02408075,\n", + " 274.49351381, 274.21627678, 274.1331996 , 272.61215281,\n", + " 271.49176346, 269.63533416, 273.44181469, 275.6214595 ,\n", + " 276.75602234, 275.00730308, 276.87611285, 273.58944106,\n", + " 271.92337207, 268.99705718, 266.13354113, 265.23450595,\n", + " 271.60276073, 273.63473648, 279.19937105, 281.8119052 ],\n", + " [284.15830206, 283.85715793, 286.20103601, 283.92187788,\n", + " 283.76810397, 284.21051346, 282.39472624, 279.20961695,\n", + "...\n", + " 283.64451278, 283.35801176, 282.91684031, 282.9554759 ,\n", + " 282.14711695, 282.26140144, 281.0409011 , 280.42200595],\n", + " [269.24513607, 270.23454864, 271.63341305, 271.82437105,\n", + " 271.60942057, 270.65992432, 270.95410978, 271.93656367,\n", + " 273.56741237, 274.12688129, 273.18558177, 275.06365554,\n", + " 275.63789564, 274.6907901 , 272.28731504, 273.95432323,\n", + " 275.29179762, 275.50107016, 275.76251141, 276.15411831,\n", + " 273.34336866, 269.42683006, 268.963758 , 268.6785804 ],\n", + " [234.73941544, 228.72784611, 229.33594038, 225.86723307,\n", + " 238.04060226, 241.2718608 , 229.15774707, 226.06100868,\n", + " 224.74338573, 228.59588744, 232.7254554 , 258.2240039 ,\n", + " 257.83465964, 258.4833106 , 262.08999605, 259.26575595,\n", + " 256.58991066, 260.79205376, 251.32129728, 250.12530172,\n", + " 253.13652952, 256.13648682, 258.81438129, 254.06318594],\n", + " [227.70048102, 227.70048102, 227.70048102, 227.70048102,\n", + " 227.70048102, 227.70048102, 227.70048102, 227.70048102,\n", + " 227.70048102, 227.70048102, 227.70048102, 227.70048102,\n", + " 227.70048102, 227.70048102, 227.70048102, 227.70048102,\n", + " 227.70048102, 227.70048102, 227.70048102, 227.70048102,\n", + " 227.70048102, 227.70048102, 227.70048102, 227.70048102]])\n", + "Coordinates:\n", + " * latitude (latitude) float64 104B 90.0 75.0 60.0 45.0 ... -60.0 -75.0 -90.0\n", + " * longitude (longitude) float64 192B 0.0 15.0 30.0 45.0 ... 315.0 330.0 345.0