From 4c36fe5663aeb3fc19e6504d9f8190d7622321a9 Mon Sep 17 00:00:00 2001 From: mathleur Date: Thu, 9 Oct 2025 11:05:56 +0200 Subject: [PATCH 1/2] return indexes if option --- .../datacube/backends/datacube.py | 20 +++- polytope_feature/datacube/backends/fdb.py | 10 +- polytope_feature/datacube/backends/xarray.py | 3 +- polytope_feature/options.py | 14 ++- polytope_feature/polytope.py | 2 + tests/test_ecmwf_oper_data_fdb.py | 3 + tests/test_fdb_return_idx.py | 96 +++++++++++++++++++ 7 files changed, 142 insertions(+), 6 deletions(-) create mode 100644 tests/test_fdb_return_idx.py diff --git a/polytope_feature/datacube/backends/datacube.py b/polytope_feature/datacube/backends/datacube.py index 6c98f6495..8ddfa15fb 100644 --- a/polytope_feature/datacube/backends/datacube.py +++ b/polytope_feature/datacube/backends/datacube.py @@ -13,7 +13,14 @@ class Datacube(ABC): - def __init__(self, axis_options=None, compressed_axes_options=[], grid_online_path="", grid_local_directory=""): + def __init__( + self, + axis_options=None, + compressed_axes_options=[], + grid_online_path="", + grid_local_directory="", + return_indexes=False, + ): if axis_options is None: self.axis_options = {} else: @@ -34,6 +41,7 @@ def __init__(self, axis_options=None, compressed_axes_options=[], grid_online_pa self.grid_md5_hash = None self.grid_online_path = grid_online_path self.grid_local_directory = grid_local_directory + self.return_indexes = return_indexes @abstractmethod def get(self, requests: TensorIndexTree, context: Dict) -> Any: @@ -163,6 +171,7 @@ def create( alternative_axes=[], grid_online_path="", grid_local_directory="", + return_indexes=False, context=None, ): # TODO: get the configs as None for pre-determined value and change them to empty dictionary inside the function @@ -170,7 +179,13 @@ def create( from .xarray import XArrayDatacube xadatacube = XArrayDatacube( - datacube, axis_options, compressed_axes_options, context, grid_online_path, grid_local_directory + datacube, + axis_options, + compressed_axes_options, + context, + grid_online_path, + grid_local_directory, + return_indexes, ) return xadatacube if type(datacube).__name__ == "GribJump": @@ -185,6 +200,7 @@ def create( context, grid_online_path, grid_local_directory, + return_indexes, ) return fdbdatacube if type(datacube).__name__ == "MockDatacube": diff --git a/polytope_feature/datacube/backends/fdb.py b/polytope_feature/datacube/backends/fdb.py index 208c21cfe..3f866a328 100644 --- a/polytope_feature/datacube/backends/fdb.py +++ b/polytope_feature/datacube/backends/fdb.py @@ -19,13 +19,14 @@ def __init__( context=None, grid_online_path="", grid_local_directory="", + return_indexes=False, ): if config is None: config = {} if context is None: context = {} - super().__init__(axis_options, compressed_axes_options, grid_online_path, grid_local_directory) + super().__init__(axis_options, compressed_axes_options, grid_online_path, grid_local_directory, return_indexes) logging.info("Created an FDB datacube with options: " + str(axis_options)) @@ -324,12 +325,19 @@ def get_last_layer_before_leaf(self, requests, leaf_path, current_idx, fdb_range # now c are the leaves of the initial tree key_value_path = {c.axis.name: c.values} leaf_path["index"] = c.indexes + print(c.indexes) + print(key_value_path) + print(leaf_path) + print(self.unwanted_path) ax = c.axis (key_value_path, leaf_path, self.unwanted_path) = ax.unmap_path_key( key_value_path, leaf_path, self.unwanted_path ) # TODO: change this to accommodate non consecutive indexes being compressed too current_idx[i].extend(key_value_path["values"]) + if self.return_indexes: + c.indexes = key_value_path["values"] + print(current_idx[i]) fdb_range_n[i].append(c) return (current_idx, fdb_range_n) diff --git a/polytope_feature/datacube/backends/xarray.py b/polytope_feature/datacube/backends/xarray.py index c1c7ee1be..c1dd1efca 100644 --- a/polytope_feature/datacube/backends/xarray.py +++ b/polytope_feature/datacube/backends/xarray.py @@ -17,8 +17,9 @@ def __init__( context=None, grid_online_path="", grid_local_directory="", + return_indexes=False, ): - super().__init__(axis_options, compressed_axes_options, grid_online_path, grid_local_directory) + super().__init__(axis_options, compressed_axes_options, grid_online_path, grid_local_directory, return_indexes) if axis_options is None: axis_options = {} diff --git a/polytope_feature/options.py b/polytope_feature/options.py index 402404823..05f08dba7 100644 --- a/polytope_feature/options.py +++ b/polytope_feature/options.py @@ -84,6 +84,7 @@ class Config(ConfigModel): alternative_axes: Optional[List[GribJumpAxesConfig]] = [] grid_online_path: Optional[str] = "" grid_local_directory: Optional[str] = "" + return_indexes: Optional[bool] = False class PolytopeOptions(ABC): @@ -99,5 +100,14 @@ def get_polytope_options(options): alternative_axes = config_options.alternative_axes grid_online_path = config_options.grid_online_path grid_local_directory = config_options.grid_local_directory - - return (axis_config, compressed_axes_config, pre_path, alternative_axes, grid_online_path, grid_local_directory) + return_indexes = config_options.return_indexes + + return ( + axis_config, + compressed_axes_config, + pre_path, + alternative_axes, + grid_online_path, + grid_local_directory, + return_indexes, + ) diff --git a/polytope_feature/polytope.py b/polytope_feature/polytope.py index 133e7528d..06bc76b56 100644 --- a/polytope_feature/polytope.py +++ b/polytope_feature/polytope.py @@ -73,6 +73,7 @@ def __init__( alternative_axes, grid_online_path, grid_local_directory, + return_indexes, ) = PolytopeOptions.get_polytope_options(options) self.datacube = Datacube.create( datacube, @@ -82,6 +83,7 @@ def __init__( alternative_axes, grid_online_path, grid_local_directory, + return_indexes, self.context, ) if engine_options == {}: diff --git a/tests/test_ecmwf_oper_data_fdb.py b/tests/test_ecmwf_oper_data_fdb.py index 74454e27a..a0cacd376 100644 --- a/tests/test_ecmwf_oper_data_fdb.py +++ b/tests/test_ecmwf_oper_data_fdb.py @@ -66,6 +66,9 @@ def test_fdb_datacube(self): assert len(result.leaves) == 3 assert len(result.leaves[0].result) == 3 + print("HERE") + print(result.leaves[0].indexes) + @pytest.mark.fdb def test_fdb_datacube_point(self): import pygribjump as gj diff --git a/tests/test_fdb_return_idx.py b/tests/test_fdb_return_idx.py new file mode 100644 index 000000000..535219a45 --- /dev/null +++ b/tests/test_fdb_return_idx.py @@ -0,0 +1,96 @@ +import pandas as pd +import pytest + +from polytope_feature.polytope import Polytope, Request +from polytope_feature.shapes import Box, Select + + +class TestSlicingFDBDatacube: + def setup_method(self, method): + # Create a dataarray with 3 labelled axes using different index types + self.options = { + "axis_config": [ + {"axis_name": "step", "transformations": [{"name": "type_change", "type": "int"}]}, + { + "axis_name": "date", + "transformations": [{"name": "merge", "other_axis": "time", "linkers": ["T", "00"]}], + }, + { + "axis_name": "values", + "transformations": [ + {"name": "mapper", "type": "octahedral", "resolution": 1280, "axes": ["latitude", "longitude"]} + ], + }, + {"axis_name": "latitude", "transformations": [{"name": "reverse", "is_reverse": True}]}, + {"axis_name": "longitude", "transformations": [{"name": "cyclic", "range": [0, 360]}]}, + ], + "compressed_axes_config": [ + "longitude", + "latitude", + "levtype", + "step", + "date", + "domain", + "expver", + "param", + "class", + "stream", + "type", + ], + "pre_path": {"class": "od", "expver": "0001", "levtype": "sfc", "type": "fc", "stream": "oper"}, + } + + @pytest.mark.fdb + def test_fdb_datacube(self): + import pygribjump as gj + + request = Request( + Select("step", [0]), + Select("levtype", ["sfc"]), + Select("date", [pd.Timestamp("20240103T0000")]), + Select("domain", ["g"]), + Select("expver", ["0001"]), + Select("param", ["167"]), + Select("class", ["od"]), + Select("stream", ["oper"]), + Select("type", ["fc"]), + Box(["latitude", "longitude"], [0, 0], [0.2, 0.2]), + ) + self.fdbdatacube = gj.GribJump() + self.API = Polytope( + datacube=self.fdbdatacube, + options=self.options, + ) + result = self.API.retrieve(request) + result.pprint() + assert len(result.leaves) == 3 + assert len(result.leaves[0].result) == 3 + assert len(result.leaves[0].indexes) == 0 + + @pytest.mark.fdb + def test_fdb_datacube_return_idx(self): + import pygribjump as gj + + request = Request( + Select("step", [0]), + Select("levtype", ["sfc"]), + Select("date", [pd.Timestamp("20240103T0000")]), + Select("domain", ["g"]), + Select("expver", ["0001"]), + Select("param", ["167"]), + Select("class", ["od"]), + Select("stream", ["oper"]), + Select("type", ["fc"]), + Box(["latitude", "longitude"], [0, 0], [0.2, 0.2]), + ) + self.fdbdatacube = gj.GribJump() + self.options["return_indexes"] = True + self.API = Polytope( + datacube=self.fdbdatacube, + options=self.options, + ) + result = self.API.retrieve(request) + result.pprint() + assert len(result.leaves) == 3 + assert len(result.leaves[0].result) == 3 + assert len(result.leaves[0].indexes) == 3 From 7b6f9cc10eab06da3b0b80b77444645ae700dcbf Mon Sep 17 00:00:00 2001 From: mathleur Date: Thu, 9 Oct 2025 11:08:16 +0200 Subject: [PATCH 2/2] clean up --- polytope_feature/datacube/backends/fdb.py | 5 ----- tests/test_ecmwf_oper_data_fdb.py | 3 --- 2 files changed, 8 deletions(-) diff --git a/polytope_feature/datacube/backends/fdb.py b/polytope_feature/datacube/backends/fdb.py index 3f866a328..80c7adaaa 100644 --- a/polytope_feature/datacube/backends/fdb.py +++ b/polytope_feature/datacube/backends/fdb.py @@ -325,10 +325,6 @@ def get_last_layer_before_leaf(self, requests, leaf_path, current_idx, fdb_range # now c are the leaves of the initial tree key_value_path = {c.axis.name: c.values} leaf_path["index"] = c.indexes - print(c.indexes) - print(key_value_path) - print(leaf_path) - print(self.unwanted_path) ax = c.axis (key_value_path, leaf_path, self.unwanted_path) = ax.unmap_path_key( key_value_path, leaf_path, self.unwanted_path @@ -337,7 +333,6 @@ def get_last_layer_before_leaf(self, requests, leaf_path, current_idx, fdb_range current_idx[i].extend(key_value_path["values"]) if self.return_indexes: c.indexes = key_value_path["values"] - print(current_idx[i]) fdb_range_n[i].append(c) return (current_idx, fdb_range_n) diff --git a/tests/test_ecmwf_oper_data_fdb.py b/tests/test_ecmwf_oper_data_fdb.py index a0cacd376..74454e27a 100644 --- a/tests/test_ecmwf_oper_data_fdb.py +++ b/tests/test_ecmwf_oper_data_fdb.py @@ -66,9 +66,6 @@ def test_fdb_datacube(self): assert len(result.leaves) == 3 assert len(result.leaves[0].result) == 3 - print("HERE") - print(result.leaves[0].indexes) - @pytest.mark.fdb def test_fdb_datacube_point(self): import pygribjump as gj