diff --git a/.github/workflows/test_with_pytest.yml b/.github/workflows/test_with_pytest.yml index 7a345c7f..03c4716e 100644 --- a/.github/workflows/test_with_pytest.yml +++ b/.github/workflows/test_with_pytest.yml @@ -7,10 +7,10 @@ on: jobs: test: - runs-on: ubuntu-22.04 + runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] # Test on multiple Python versions + python-version: ["3.10", "3.11", "3.12", "3.13"] # Test on multiple Python versions steps: - name: Checkout repository @@ -20,8 +20,6 @@ jobs: uses: actions/setup-python@v4 with: - # until saxonche is available in 3.13 - # https://saxonica.plan.io/issues/6561 python-version: ${{ matrix.python-version }} - name: Display Python version run: python -c "import sys; print(sys.version)" diff --git a/benchmarks/core_profiles.py b/benchmarks/core_profiles.py index d7ab54cd..0bdeef8b 100644 --- a/benchmarks/core_profiles.py +++ b/benchmarks/core_profiles.py @@ -10,8 +10,6 @@ available_serializers, available_slicing_backends, create_dbentry, - factory, - hlis, ) N_SLICES = 32 @@ -26,13 +24,13 @@ def fill_slices(core_profiles, times): times: time values to fill a slice for """ core_profiles.ids_properties.homogeneous_time = 1 # HOMOGENEOUS - core_profiles.ids_properties.comment = "Generated for the IMAS-Python benchmark suite" + core_profiles.ids_properties.comment = ( + "Generated for the IMAS-Python benchmark suite" + ) core_profiles.ids_properties.creation_date = datetime.date.today().isoformat() core_profiles.code.name = "IMAS-Python ASV benchmark" core_profiles.code.version = imas.__version__ - core_profiles.code.repository = ( - "https://github.com/iterorganization/IMAS-Python" - ) + core_profiles.code.repository = "https://github.com/iterorganization/IMAS-Python" core_profiles.time = np.array(times) core_profiles.profiles_1d.resize(len(times)) @@ -50,13 +48,13 @@ def fill_slices(core_profiles, times): profiles_1d.ion.resize(len(ions)) profiles_1d.neutral.resize(len(ions)) for i, ion in enumerate(ions): - if hasattr(profiles_1d.ion[i], 'label'): + if hasattr(profiles_1d.ion[i], "label"): profiles_1d.ion[i].label = ion profiles_1d.neutral[i].label = ion - if hasattr(profiles_1d.ion[i], 'name'): + if hasattr(profiles_1d.ion[i], "name"): profiles_1d.ion[i].name = ion profiles_1d.neutral[i].name = ion - + # profiles_1d.ion[i].label = profiles_1d.neutral[i].label = ion profiles_1d.ion[i].z_ion = 1.0 profiles_1d.ion[i].neutral_index = profiles_1d.neutral[i].ion_index = i + 1 @@ -70,31 +68,31 @@ def fill_slices(core_profiles, times): class GetSlice: - params = [hlis, available_slicing_backends] - param_names = ["hli", "backend"] + params = [available_slicing_backends] + param_names = ["backend"] - def setup(self, hli, backend): - self.dbentry = create_dbentry(hli, backend) - core_profiles = factory[hli].core_profiles() + def setup(self, backend): + self.dbentry = create_dbentry(backend) + core_profiles = imas.IDSFactory().core_profiles() fill_slices(core_profiles, TIME) self.dbentry.put(core_profiles) - def time_get_slice(self, hli, backend): + def time_get_slice(self, backend): for t in TIME: self.dbentry.get_slice("core_profiles", t, imas.ids_defs.CLOSEST_INTERP) - def teardown(self, hli, backend): + def teardown(self, backend): if hasattr(self, "dbentry"): # imas + netCDF has no dbentry self.dbentry.close() class Get: - params = [hlis, available_backends] - param_names = ["hli", "backend"] + params = [available_backends] + param_names = ["backend"] setup = GetSlice.setup teardown = GetSlice.teardown - def time_get(self, hli, backend): + def time_get(self, backend): self.dbentry.get("core_profiles") @@ -103,8 +101,8 @@ class LazyGet: param_names = ["lazy", "backend"] def setup(self, lazy, backend): - self.dbentry = create_dbentry("imas", backend) - core_profiles = factory["imas"].core_profiles() + self.dbentry = create_dbentry(backend) + core_profiles = imas.IDSFactory().core_profiles() fill_slices(core_profiles, TIME) self.dbentry.put(core_profiles) @@ -118,75 +116,72 @@ def teardown(self, lazy, backend): class Generate: - params = [hlis] - param_names = ["hli"] + def setup(self): + self.core_profiles = imas.IDSFactory().core_profiles() - def setup(self, hli): - self.core_profiles = factory[hli].core_profiles() - - def time_generate(self, hli): + def time_generate(self): fill_slices(self.core_profiles, TIME) - def time_generate_slices(self, hli): + def time_generate_slices(self): for t in TIME: fill_slices(self.core_profiles, [t]) - def time_create_core_profiles(self, hli): - factory[hli].core_profiles() + def time_create_core_profiles(self): + imas.IDSFactory().core_profiles() class Put: - params = [["0", "1"], hlis, available_backends] + params = [["0", "1"], available_backends] param_names = ["disable_validate", "hli", "backend"] - def setup(self, disable_validate, hli, backend): - create_dbentry(hli, backend).close() # catch unsupported combinations - self.core_profiles = factory[hli].core_profiles() + def setup(self, disable_validate, backend): + create_dbentry(backend).close() # catch unsupported combinations + self.core_profiles = imas.IDSFactory().core_profiles() fill_slices(self.core_profiles, TIME) os.environ["IMAS_AL_DISABLE_VALIDATE"] = disable_validate - def time_put(self, disable_validate, hli, backend): - with create_dbentry(hli, backend) as dbentry: + def time_put(self, disable_validate, backend): + with create_dbentry(backend) as dbentry: dbentry.put(self.core_profiles) class PutSlice: - params = [["0", "1"], hlis, available_slicing_backends] - param_names = ["disable_validate", "hli", "backend"] + params = [["0", "1"], available_slicing_backends] + param_names = ["disable_validate", "backend"] - def setup(self, disable_validate, hli, backend): - create_dbentry(hli, backend).close() # catch unsupported combinations - self.core_profiles = factory[hli].core_profiles() + def setup(self, disable_validate, backend): + create_dbentry(backend).close() # catch unsupported combinations + self.core_profiles = imas.IDSFactory().core_profiles() os.environ["IMAS_AL_DISABLE_VALIDATE"] = disable_validate - def time_put_slice(self, disable_validate, hli, backend): - with create_dbentry(hli, backend) as dbentry: + def time_put_slice(self, disable_validate, backend): + with create_dbentry(backend) as dbentry: for t in TIME: fill_slices(self.core_profiles, [t]) dbentry.put_slice(self.core_profiles) class Serialize: - params = [hlis, available_serializers] - param_names = ["hli", "serializer"] + params = [available_serializers] + param_names = ["serializer"] - def setup(self, hli, serializer): - self.core_profiles = factory[hli].core_profiles() + def setup(self, serializer): + self.core_profiles = imas.IDSFactory().core_profiles() fill_slices(self.core_profiles, TIME) - def time_serialize(self, hli, serializer): + def time_serialize(self, serializer): self.core_profiles.serialize(serializer) class Deserialize: - params = [hlis, available_serializers] - param_names = ["hli", "serializer"] + params = [available_serializers] + param_names = ["serializer"] - def setup(self, hli, serializer): - self.core_profiles = factory[hli].core_profiles() + def setup(self, serializer): + self.core_profiles = imas.IDSFactory().core_profiles() fill_slices(self.core_profiles, TIME) self.data = self.core_profiles.serialize(serializer) - self.core_profiles = factory[hli].core_profiles() + self.core_profiles = imas.IDSFactory().core_profiles() - def time_deserialize(self, hli, serializer): + def time_deserialize(self, serializer): self.core_profiles.deserialize(self.data) diff --git a/benchmarks/edge_profiles.py b/benchmarks/edge_profiles.py index cb78629f..f1ec7fd7 100644 --- a/benchmarks/edge_profiles.py +++ b/benchmarks/edge_profiles.py @@ -5,7 +5,7 @@ import imas -from .utils import available_backends, create_dbentry, factory, hlis +from .utils import available_backends, create_dbentry N_POINTS = 600 # number of random R,Z points N_LINES = 1200 # number of random lines in R,Z plane @@ -27,9 +27,7 @@ def fill_ggd(edge_profiles, times): edge_profiles.ids_properties.creation_date = datetime.date.today().isoformat() edge_profiles.code.name = "IMAS-Python ASV benchmark" edge_profiles.code.version = imas.__version__ - edge_profiles.code.repository = ( - "https://github.com/iterorganization/IMAS-Python" - ) + edge_profiles.code.repository = "https://github.com/iterorganization/IMAS-Python" # This GGD grid is not a valid description, but it's a good stress test for the # typical access patterns that exist in GGD grids @@ -124,45 +122,42 @@ def fill_ggd(edge_profiles, times): class Get: - params = [hlis, available_backends] - param_names = ["hli", "backend"] + params = [available_backends] + param_names = ["backend"] - def setup(self, hli, backend): - self.dbentry = create_dbentry(hli, backend) - edge_profiles = factory[hli].edge_profiles() + def setup(self, backend): + self.dbentry = create_dbentry(backend) + edge_profiles = imas.IDSFactory().edge_profiles() fill_ggd(edge_profiles, TIME) self.dbentry.put(edge_profiles) - def time_get(self, hli, backend): + def time_get(self, backend): self.dbentry.get("edge_profiles") - def teardown(self, hli, backend): + def teardown(self, backend): if hasattr(self, "dbentry"): # imas + netCDF has no dbentry self.dbentry.close() class Generate: - params = [hlis] - param_names = ["hli"] - - def time_generate(self, hli): - edge_profiles = factory[hli].edge_profiles() + def time_generate(self): + edge_profiles = imas.IDSFactory().edge_profiles() fill_ggd(edge_profiles, TIME) - def time_create_edge_profiles(self, hli): - factory[hli].edge_profiles() + def time_create_edge_profiles(self): + imas.IDSFactory().edge_profiles() class Put: - params = [["0", "1"], hlis, available_backends] - param_names = ["disable_validate", "hli", "backend"] + params = [["0", "1"], available_backends] + param_names = ["disable_validate", "backend"] - def setup(self, disable_validate, hli, backend): - create_dbentry(hli, backend).close() # catch unsupported combinations - self.edge_profiles = factory[hli].edge_profiles() + def setup(self, disable_validate, backend): + create_dbentry(backend).close() # catch unsupported combinations + self.edge_profiles = imas.IDSFactory().edge_profiles() fill_ggd(self.edge_profiles, TIME) os.environ["IMAS_AL_DISABLE_VALIDATE"] = disable_validate - def time_put(self, disable_validate, hli, backend): - with create_dbentry(hli, backend) as dbentry: + def time_put(self, disable_validate, backend): + with create_dbentry(backend) as dbentry: dbentry.put(self.edge_profiles) diff --git a/benchmarks/utils.py b/benchmarks/utils.py index 47ae2576..3ff30fd3 100644 --- a/benchmarks/utils.py +++ b/benchmarks/utils.py @@ -1,10 +1,10 @@ -import importlib import logging import uuid from pathlib import Path import imas import imas.exception +import imas.ids_defs # Backend constants HDF5 = "HDF5" @@ -56,28 +56,12 @@ def backend_exists(backend): backend for backend in available_backends if backend not in [ASCII, NETCDF] ] -hlis = ["imas"] -DBEntry = { - "imas": imas.DBEntry, -} -factory = { - "imas": imas.IDSFactory(), -} -available_serializers = [imas.ids_defs.ASCII_SERIALIZER_PROTOCOL] - +available_serializers = [ + imas.ids_defs.ASCII_SERIALIZER_PROTOCOL, + imas.ids_defs.FLEXBUFFERS_SERIALIZER_PROTOCOL, +] -def create_dbentry(hli, backend): - if backend == NETCDF: - if hli == "imas": # check if netcdf backend is available - try: - assert ( - imas.DBEntry._select_implementation("x.nc").__name__ - == "NCDBEntryImpl" - ) - except (AttributeError, AssertionError): - raise NotImplementedError( - "This version of IMAS-Python doesn't implement netCDF." - ) from None - path = Path.cwd() / f"DB-{hli}-{backend}" - return DBEntry[hli](create_uri(backend, path), "w") +def create_dbentry(backend): + path = Path.cwd() / f"DB-{backend}" + return imas.DBEntry(create_uri(backend, path), "w") diff --git a/conftest.py b/conftest.py index b7ab1fe4..51aaa4d4 100644 --- a/conftest.py +++ b/conftest.py @@ -70,6 +70,13 @@ def pytest_addoption(parser): "hdf5": HDF5_BACKEND, "mdsplus": MDSPLUS_BACKEND, } +try: + from imas.db_entry import DBEntry + from imas_core.exception import ImasCoreBackendException + DBEntry("imas:mdsplus?path=dummy","r") +except ImasCoreBackendException as iex: + if "not available" in str(iex.message): + _BACKENDS.pop("mdsplus") try: diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst index f99e24d2..0e7348c1 100644 --- a/docs/source/changelog.rst +++ b/docs/source/changelog.rst @@ -3,6 +3,44 @@ Changelog ========= +What's new in IMAS-Python 2.1.0 +------------------------------- + +Build +''''' + +- update Python version support (remove 3.8, add 3.13) +- add dependency on `imas_core `__ + + +Improvements +'''''''''''' + +- :issue:`84`: improve `imas process-db-analysis` +- :issue:`71`: take into account identifier aliases (introduced in DD 4.1) +- :issue:`78`: disable *implicit* conversion when crossing a major version update +- improve integration of UDA backend +- cleaning old AL4 deprecated code +- :issue:`59`: convert name+identifier (DD3) into description+name (DD4) +- improve type hints (following PEP-585 and PEP-604) +- improve performance of IDS deepcopy +- :issue:`60`: improve `equilibrium` DD3->4 by converting `boundary_separatrix` into `contour_tree` +- :issue:`22`: add custom conversion example in the doc for `em_coupling` IDS + + +Bug fixes +''''''''' + +- fix testcases with coordinate validation issues +- :issue:`80`: fix `imas print` when using netcdf and imas_core is not present +- :issue:`61`: special DD3->4 rule to flip sign quantities missing the `cocos_label_transform attribute` in DD +- :merge:`58`: fix unclear provenance capture +- :merge:`57`: fix 0D arrays from lazy loading with netcdf +- :issue:`55`: handle missing case when converting 3.42->4 (_tor->_phi) + + + + What's new in IMAS-Python 2.0.1 ------------------------------- @@ -74,9 +112,9 @@ Bug fixes Dictionary 4.0.0 and 3.42.0. In other cases, the Data Dictionary version is now explicitly indicated. -- :issue:`IMAS-5560`: Fix a bug where IMASPy would not correctly recognize that +- IMAS-5560: Fix a bug where IMASPy would not correctly recognize that the UDA backend is used. -- :issue:`IMAS-5541`: Fix a bug when converting a closed contour to Data +- IMAS-5541: Fix a bug when converting a closed contour to Data Dictionary version 4.0.0. - Work around incorrect Data Dictionary 3.x metadata when converting ``flux_loop/flux`` in the ``magnetics`` IDS to Data Dictionary version 4.0.0. diff --git a/docs/source/cli.rst b/docs/source/cli.rst index df6db851..9147746d 100644 --- a/docs/source/cli.rst +++ b/docs/source/cli.rst @@ -121,8 +121,8 @@ process these files with ``imas process-db-analysis``. This will: variable). 2. These results are summarized in a table, showing per IDS: - - The number of data fields [#data_fields]_ that were filled in *any* of the - analyzed data entries. + - The number of data fields [#data_fields]_ that were filled in *any* occurrence of + the IDS in *any* of the analyzed data entries. - The total number of data fields [#data_fields]_ that the Data Dictionary defines for this IDS. - The percentage of fields filled. diff --git a/docs/source/conf.py b/docs/source/conf.py index 65f5e5f4..06f59e76 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -39,12 +39,12 @@ iter_projects = "https://github.com/iterorganization/" dd_url = urljoin(iter_projects, "imas-data-dictionary/") al_url = urljoin(iter_projects, "imas-core/") -issue_url = jira_url = "https://github.com/iterorganization/IMAS-Python/issues" # IMAS-Python repository_url = f"{iter_projects}/{src_project}/" blob_url = repository_url -mr_url = urljoin(repository_url, "/pulls") +issue_url = urljoin(repository_url, "issues/") +mr_url = urljoin(repository_url, "pull/") # Configuration of sphinx.ext.extlinks @@ -52,8 +52,8 @@ # unique name: (base URL, label prefix) extlinks = { "src": (blob_url + "%s", "%s"), - "issue": (issue_url + "%s", "%s"), - "merge": (mr_url + "%s", "!%s"), + "issue": (issue_url + "%s", "#%s"), + "merge": (mr_url + "%s", "#%s"), "dd": (dd_url + "%s", "%s"), "al": (al_url + "%s", "%s"), "pypa": ("https://packaging.python.org/%s", None), diff --git a/docs/source/courses/basic/analyze.rst b/docs/source/courses/basic/analyze.rst index 21a7c68b..97dd1ea3 100644 --- a/docs/source/courses/basic/analyze.rst +++ b/docs/source/courses/basic/analyze.rst @@ -25,15 +25,10 @@ can use the data. .. hint:: Use the ASCII data supplied with IMAS-Python for all exercises. It contains two IDSs (``equilibrium`` and ``core_profiles``) filled with data from three - time slices of ITER reference data. Two convenience methods are available in the - :mod:`imas.training` module to open the DBEntry for this training data. - - 1. :meth:`imas.training.get_training_db_entry()` returns an opened - ``imas.DBEntry`` object. Use this method if you want to use the IMAS-Python - interface. - 2. :meth:`imas.training.get_training_imas_db_entry()` returns an opened - ``imas.DBEntry`` object. Use this method if you want to use the Python Access - Layer interface. + time slices of ITER reference data. A convenience method is available in the + :mod:`imas.training` module to open the DBEntry for this training data: + :meth:`imas.training.get_training_db_entry()` returns an opened + ``imas.DBEntry`` object. Exercise 1 '''''''''' diff --git a/docs/source/examples.rst b/docs/source/examples.rst new file mode 100644 index 00000000..41fa0388 --- /dev/null +++ b/docs/source/examples.rst @@ -0,0 +1,14 @@ +.. _`IMAS-Python Examples`: + +IMAS-Python Examples +==================== + +Most IMAS-Python usage examples can be found throughout the documentation pages. On this +page we collect some examples that are too big or too generic to include in specific +pages. Currently this is a short list, but we expect that it will grow over time. + +.. toctree:: + :caption: IMAS-Python examples + :maxdepth: 1 + + examples/custom_conversion_em_coupling diff --git a/docs/source/examples/custom_conversion_em_coupling.py b/docs/source/examples/custom_conversion_em_coupling.py new file mode 100644 index 00000000..5b5b8628 --- /dev/null +++ b/docs/source/examples/custom_conversion_em_coupling.py @@ -0,0 +1,168 @@ +"""IMAS-Python example for custom conversion logic. + +This example script loads a Data Entry (in Data Dictionary 3.38.1) created by +DINA and converts the em_coupling IDS to DD 4.0.0. +""" + +import imas +from imas.ids_defs import IDS_TIME_MODE_INDEPENDENT + +input_uri = "imas:hdf5?path=/work/imas/shared/imasdb/ITER_SCENARIOS/3/105013/1" +# An error is reported when there's already data at the output_uri! +output_uri = "imas:hdf5?path=105013-1-converted" +target_dd_version = "4.0.0" + + +# Mapping of DD 3.38.1 em_coupling data to DD 4.0.0 +# Map the name of the matrix in DD 3.38.1 to the identifier and coordinate URIs +COUPLING_MAPS = { + "field_probes_active": dict( + coupling_quantity=2, + rows_uri="#magnetics/b_field_pol_probe", + columns_uri="#pf_active/coil", + ), + "field_probes_grid": dict( + coupling_quantity=2, + rows_uri="#magnetics/b_field_pol_probe", + columns_uri="#pf_plasma/element", + ), + "field_probes_passive": dict( + coupling_quantity=2, + rows_uri="#magnetics/b_field_pol_probe", + columns_uri="#pf_passive/loop", + ), + "mutual_active_active": dict( + coupling_quantity=1, + rows_uri="#pf_active/coil", + columns_uri="#pf_active/coil", + ), + "mutual_grid_active": dict( + coupling_quantity=1, + rows_uri="#pf_plasma/element", + columns_uri="#pf_active/coil", + ), + "mutual_grid_grid": dict( + coupling_quantity=1, + rows_uri="#pf_plasma/element", + columns_uri="#pf_plasma/element", + ), + "mutual_grid_passive": dict( + coupling_quantity=1, + rows_uri="#pf_plasma/element", + columns_uri="#pf_passive/loop", + ), + "mutual_loops_active": dict( + coupling_quantity=1, + rows_uri="#magnetics/flux_loop", + columns_uri="#pf_active/coil", + ), + "mutual_loops_passive": dict( + coupling_quantity=1, + rows_uri="#magnetics/flux_loop", + columns_uri="#pf_passive/loop", + ), + "mutual_loops_grid": dict( + coupling_quantity=1, + rows_uri="#magnetics/flux_loop", + columns_uri="#pf_plasma/element", + ), + "mutual_passive_active": dict( + coupling_quantity=1, + rows_uri="#pf_passive/loop", + columns_uri="#pf_active/coil", + ), + "mutual_passive_passive": dict( + coupling_quantity=1, + rows_uri="#pf_passive/loop", + columns_uri="#pf_passive/loop", + ), +} + + +with ( + imas.DBEntry(input_uri, "r") as entry, + imas.DBEntry(output_uri, "x", dd_version=target_dd_version) as out, +): + print("Loaded IMAS Data Entry:", input_uri) + + print("This data entry contains the following IDSs:") + filled_idss = [] + for idsname in entry.factory.ids_names(): + occurrences = entry.list_all_occurrences(idsname) + if occurrences: + filled_idss.append(idsname) + print(f"- {idsname}, occurrences: {occurrences}") + print("") + + # Load and convert all IDSs (except em_coupling) with imas.convert_ids() + # N.B. we know that the input URI doesn't have multiple occurrences, so + # we do not need to worry about them: + for idsname in filled_idss: + if idsname == "em_coupling": + continue + + print(f"Loading IDS: {idsname}...") + ids = entry.get(idsname, autoconvert=False) + print(f"Converting IDS {idsname} to DD {target_dd_version}...") + ids4 = imas.convert_ids( + ids, + target_dd_version, + provenance_origin_uri=input_uri, + ) + print(f"Storing IDS {idsname} to output data entry...") + out.put(ids4) + + print("Conversion for em_coupling:") + emc = entry.get("em_coupling", autoconvert=False) + print("Using standard convert, this may log warnings about discarding data") + emc4 = imas.convert_ids( + emc, + target_dd_version, + provenance_origin_uri=input_uri, + ) + + print("Starting custom conversion of the coupling matrices") + for matrix_name, mapping in COUPLING_MAPS.items(): + # Skip empty matrices + if not emc[matrix_name].has_value: + continue + + # Allocate a new coupling_matrix AoS element + emc4.coupling_matrix.resize(len(emc4.coupling_matrix) + 1, keep=True) + # And fill it + + emc4.coupling_matrix[-1].name = matrix_name + # Assigning an integer to the identifier will automatically fill the + # index/name/description. See documentation: + # https://imas-python.readthedocs.io/en/latest/identifiers.html + emc4.coupling_matrix[-1].quantity = mapping["coupling_quantity"] + emc4.coupling_matrix[-1].rows_uri = [mapping["rows_uri"]] + emc4.coupling_matrix[-1].columns_uri = [mapping["columns_uri"]] + emc4.coupling_matrix[-1].data = emc[matrix_name].value + # N.B. the original data has no error_upper/error_lower so we skip these + # Store em_coupling IDS + out.put(emc4) + + print("Generating pf_plasma IDS...") + # N.B. This logic is specific to DINA + # Create a new pf_plasma IDS and set basic properties + pf_plasma = out.factory.pf_plasma() + pf_plasma.ids_properties.homogeneous_time = IDS_TIME_MODE_INDEPENDENT + pf_plasma.ids_properties.comment = "PF Plasma generated from equilibrium" + + equilibrium = entry.get("equilibrium", lazy=True, autoconvert=False) + r = equilibrium.time_slice[0].profiles_2d[0].grid.dim1 + z = equilibrium.time_slice[0].profiles_2d[0].grid.dim2 + nr, nz = len(r), len(z) + # Generate a pf_plasma element for each grid point: + pf_plasma.element.resize(nr * nz) + for ir, rval in enumerate(r): + for iz, zval in enumerate(z): + element = pf_plasma.element[ir * nr + iz] + element.geometry.geometry_type = 2 # rectangle + element.geometry.rectangle.r = rval + element.geometry.rectangle.z = zval + # Store pf_plasma IDS + out.put(pf_plasma) + +print("Conversion finished") diff --git a/docs/source/examples/custom_conversion_em_coupling.rst b/docs/source/examples/custom_conversion_em_coupling.rst new file mode 100644 index 00000000..96f6ec53 --- /dev/null +++ b/docs/source/examples/custom_conversion_em_coupling.rst @@ -0,0 +1,13 @@ +Custom conversion of the ``em_coupling`` IDS +============================================ + +The ``em_coupling`` IDS has had a big change between Data Dictionary 3.x and Data +Dictionary 4.x. These changes are not covered by the automatic conversions of +:py:meth:`imas.convert_ids ` because these are too +code-specific. + +Instead we show on this page an example to convert a DINA dataset from DD 3.38.1 to DD +4.0.0, which can be used as a starting point for converting output data from other codes +as well. + +.. literalinclude:: custom_conversion_em_coupling.py diff --git a/docs/source/identifiers.rst b/docs/source/identifiers.rst index 312749e1..408c7abe 100644 --- a/docs/source/identifiers.rst +++ b/docs/source/identifiers.rst @@ -11,13 +11,16 @@ enumerated list of options for defining, for example: a neutron, or a photon. - Plasma heating may come from neutral beam injection, electron cyclotron heating, ion cyclotron heating, lower hybrid heating, alpha particles. +- These may have alternative naming conventions supported through aliases + (e.g., "235U" and "U_235" for Uranium 235). -Identifiers are a list of possible valid labels. Each label has three +Identifiers are a list of possible valid labels. Each label has up to four representations: 1. An index (integer) 2. A name (short string) 3. A description (long string) +4. List of aliases (list of short strings) Identifiers in IMAS-Python @@ -44,6 +47,15 @@ the available identifiers is stored as ``imas.identifiers.identifiers``. print(csid.total.index) print(csid.total.description) + # Access identifiers with aliases (when available) + mid = imas.identifiers.materials_identifier + print(mid["235U"].name) # Access by canonical name + print(mid["U_235"].name) # Access by alias + + # Both return the same object + assert mid["235U"].name is mid["U_235"].name + assert mid["235U"].name is mid.U_235.name + # Item access is also possible print(identifiers["edge_source_identifier"]) @@ -64,8 +76,8 @@ Assigning identifiers in IMAS-Python IMAS-Python implements smart assignment of identifiers. You may assign an identifier enum value (for example ``imas.identifiers.core_source_identifier.total``), a -string (for example ``"total"``) or an integer (for example ``"1"``) to an -identifier structure (for example ``core_profiles.source[0].identifier``) to set +string (for example ``"total"`` or its alias), or an integer (for example ``"1"``) +to an identifier structure (for example ``core_profiles.source[0].identifier``) to set all three child nodes ``name``, ``index`` and ``description`` in one go. See below example: @@ -86,6 +98,20 @@ below example: # 3. Assign an integer. This looks up the index in the identifier enum: core_sources.source[0].identifier = 1 + # Identifiers can still be assigned with the old alias name for backward compatibility: + wallids = imas.IDSFactory().wall() + wallids.description_ggd.resize(1) + wallids.description_ggd[0].material.resize(1) + wallids.description_ggd[0].material[0].grid_subset.resize(1) + mat = wallids.description_ggd[0].material[0].grid_subset[0].identifiers + mat.names.extend([""] * 1) + mid = imas.identifiers.materials_identifier + # Assign using canonical name + mat.names[0] = "235U" + # Or assign using alias (equivalent to above) + mat.names[0] = mid["U_235"].name + mat.names[0] = mid.U_235.name + # Inspect the contents of the structure imas.util.inspect(core_sources.source[0].identifier) @@ -101,6 +127,65 @@ below example: imas.util.inspect(core_sources.source[1].identifier) +Identifier aliases +------------------ + +Some identifiers may have multiple aliases defined in the Data Dictionary. Aliases are +former names kept as an option to ensure better backward compatibility after a change +and support multiple naming conventions. An identifier can have any number of +comma-separated aliases. + +Aliases can be accessed in the same ways as canonical names, and all aliases for an +identifier point to the same object. + +Aliases that begin with a number (e.g., 235U) cannot be accessed using dot notation +(e.g., material_identifier.235U) due to Python's syntax restrictions. Instead, such +aliases must be accessed using dictionary-style indexing, for example: +material_identifier["235U"]. + +.. code-block:: python + :caption: Working with identifier aliases + + import imas + + # Get materials identifier which has some aliases defined + mid = imas.identifiers.materials_identifier + + # Access by canonical name + uranium235_by_name = mid["235U"] + print(f"Name: {uranium235_by_name.name}") + print(f"Aliases: {uranium235_by_name.aliases}") # List of all aliases + print(f"First alias: {uranium235_by_name.alias}") # First alias for compatibility + print(f"Index: {uranium235_by_name.index}") + print(f"Description: {uranium235_by_name.description}") + + # Access by any alias - all return the same object + uranium235_by_alias1 = mid["U_235"].name + uranium235_by_alias2 = mid["Uranium_235"].name + print(f"Same objects: {uranium235_by_name is uranium235_by_alias1 is uranium235_by_alias2}") + + # You can also use attribute access for aliases (when valid Python identifiers) + uranium235_by_attr = mid.U_235.name + print(f"Same object: {uranium235_by_name is uranium235_by_attr}") + + # When assigning to IDS structures, alias works the following way + wallids = imas.IDSFactory().wall() + wallids.description_ggd.resize(1) + wallids.description_ggd[0].material.resize(1) + wallids.description_ggd[0].material[0].grid_subset.resize(1) + mat = wallids.description_ggd[0].material[0].grid_subset[0].identifiers + mat.names.extend([""] * 1) + mat.indices.resize(1) + mat.descriptions.extend([""] * 1) + mat.indices[0] = 20 + mat.descriptions[0] = "Uranium 235 isotope" + + # These assignments are all equivalent: + mat.names[0] = "235U" # canonical name + mat.names[0] = mid["235U"].name # enum value + mat.names[0] = mid.U_235.name # enum value via alias + mat.names[0] = mid["U_235"].name # enum value via alias + Compare identifiers ------------------- @@ -108,11 +193,12 @@ Identifier structures can be compared against the identifier enum as well. They compare equal when: 1. ``index`` is an exact match -2. ``name`` is an exact match, or ``name`` is not filled in the IDS node +2. ``name`` is an exact match, or ``name`` matches an alias, or ``name`` is not filled in the IDS node The ``description`` does not have to match with the Data Dictionary definition, but a warning is logged if the description in the IDS node does not match with -the Data Dictionary description: +the Data Dictionary description. The comparison also takes aliases into account, +so an identifier will match both its canonical name and any defined alias: .. code-block:: python :caption: Comparing identifiers @@ -139,6 +225,15 @@ the Data Dictionary description: >>> core_sources.source[0].identifier.name = "totalX" >>> core_sources.source[0].identifier == csid.total False + >>> # Alias comparison example with materials identifier + >>> mid = imas.identifiers.materials_identifier + >>> cxr = imas.IDSFactory().camera_x_rays() + >>> mat = cxr.filter_window.material + >>> mat.index = 20 + >>> mat.name = "U_235" # Using alias + >>> # Compares equal to the canonical identifier even though name is alias + >>> mat == mid["235U"].name + True .. seealso:: diff --git a/docs/source/index.rst b/docs/source/index.rst index 7aa06277..8388f5b5 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -51,6 +51,7 @@ Manual cli netcdf changelog + examples .. toctree:: :caption: IMAS-Python training courses diff --git a/docs/source/lazy_loading.rst b/docs/source/lazy_loading.rst index 9dda19e0..745df066 100644 --- a/docs/source/lazy_loading.rst +++ b/docs/source/lazy_loading.rst @@ -90,4 +90,13 @@ Lazy loading of data may speed up your programs, but also comes with some limita more efficient to do a full :code:`get()` or :code:`get_slice()` when you intend to use most of the data stored in an IDS. 5. When using IMAS-Python with remote data access (i.e. the UDA backend), a full - :code:`get()` or :code:`get_slice()` is more efficient than lazy loading. + :code:`get()` or :code:`get_slice()` may be more efficient than using lazy loading. + + It is recommended to add the parameter ``;cache_mode=none`` [#cache_mode_none]_ to + the end of a UDA IMAS URI when using lazy loading: otherwise the UDA backend will + still load the full IDS from the remote server. + + +.. [#cache_mode_none] The option ``cache_mode=none`` requires IMAS Core version 5.5.1 or + newer, and a remote UDA server with `IMAS UDA-Plugins + `__ version 1.7.0 or newer. diff --git a/docs/source/multi-dd.rst b/docs/source/multi-dd.rst index bef1fe54..ae1175fd 100644 --- a/docs/source/multi-dd.rst +++ b/docs/source/multi-dd.rst @@ -23,7 +23,7 @@ example: factory_3_32_0 = imas.IDSFactory("3.32.0") # Use DD version 3.32.0 # Will write IDSs to the backend in DD version 3.32.0 - dbentry = imas.DBEntry(imas.ids_defs.HDF5_BACKEND, "TEST", 10, 2, version="3.32.0") + dbentry = imas.DBEntry("imas:hdf5?path=dd3.32.0-output/", "w", dd_version="3.32.0") dbentry.create() .. seealso:: :ref:`multi-dd training` @@ -117,6 +117,25 @@ Explicit conversion versions, the corresponding data is not copied. IMAS-Python provides logging to indicate when this happens. +.. rubric:: DD3 -> DD4 special rule: name + identifier -> description + name (GH#59) + +IMAS‑Python implements an additional explicit conversion rule (see GH#59) to improve +migration of Machine Description parts of IDSs when moving from major version 3 to 4. +The rule targets simple sibling pairs on the same parent that provide both a "name" +and an "identifier" field and that are NOT part of an "identifier structure" (the +parent must not also have an "index" sibling). When applicable the rule performs the +following renames during explicit DD3->DD4 conversion: + +- DD3: parent/name -> DD4: parent/description +- DD3: parent/identifier -> DD4: parent/name + +The conversion is applied only when the corresponding target fields exist in the +DD4 definition and when no earlier mapping already covers the same paths. This +is performed by the explicit conversion machinery (for example via +imas.convert_ids or DBEntry explicit conversion) and is not guaranteed to be +applied by automatic conversion when reading/writing from a backend. + +In some cases like the one above, reverse conversion is also allowed(DD 4.0.0 -> 3.41.1) .. _`Supported conversions`: @@ -147,6 +166,8 @@ explicit conversion mechanisms. Changed definition of ``space/coordinates_type`` in GGD grids, Yes, No Migrate obsolescent ``ids_properties/source`` to ``ids_properties/provenance``, Yes, No Convert the multiple time-bases in the ``pulse_schedule`` IDS [#ps3to4]_, Yes, No + Convert name + identifier -> description + name, Yes, Yes + Convert equilibrium ``boundary\_[secondary\_]separatrix`` to ``contour_tree`` [#contourtree]_, Yes, No .. [#rename] Quantities which have been renamed between the two DD versions. For example, the ``ec/beam`` Array of Structures in the ``pulse_schedule`` IDS, @@ -186,6 +207,76 @@ explicit conversion mechanisms. interpolation otherwise. See also: https://github.com/iterorganization/IMAS-Python/issues/21. +.. [#contourtree] Fills the `contour_tree + `__ + in the ``equilibrium`` IDS based on data in the ``boundary_separatrix`` and + ``boundary_secondary_separatrix`` structures from DD3. See also: + https://github.com/iterorganization/IMAS-Python/issues/60. + + +.. _`Loading IDSs from a different major version`: + +Loading IDSs from a different major version +------------------------------------------- + +If you try to load an IDS that was stored in a different major version of the DD than +you are using, IMAS-Python will raise a runtime error, for example: + +.. code-block:: text + + On-disk data is stored in DD 3.39.1 which has a different major version than the + requested DD version (4.0.0). IMAS-Python will not automatically convert this + data for you. + +You need to explicitly convert the data, which you can do as follows: + +.. code-block:: python + + # Opened data entry + entry = imas.DBEntry(...) + + # A plain get, or get_slice will raise a RuntimeError when the data is stored in + # a different major version of the DD: + # entry.get("equilibrium") + + # So instead, we'll load the IDS in the DD version it is stored on disk + tmp_eq = entry.get("equilibrium", autoconvert=False) + # And explicitly convert it to the target version + equilibrium = imas.convert_ids(tmp_eq, entry.dd_version) + + +.. _`Storing IDSs with a different major version`: + +Storing IDSs with a different major version +------------------------------------------- + +If you try to put an IDS that was created for a different major version of the DD than +the Data Entry you want to store it in, IMAS-Python raise a runtime error, for example: + +.. code-block:: text + + Provided IDS uses DD 3.42.2 which has a different major version than the Data + Entry (4.0.0). IMAS-Python will not automatically convert this data for you. + +You need to explicitly convert the data, which you can do as follows: + +.. code-block:: python + + # IDS with data, in DD 3.42.2 + equilibrium = imas.IDSFactory("3.42.2").equilibrium() + ... + + # Data Entry uses DD 4.0.0 + with imas.DBEntry(uri, "w", dd_version="4.0.0") as entry: + # This put would raise a runtime error, because the major version of the IDS + # and the DBEntry don't match: + # entry.put(equilibrium) + + # So instead, we'll explicitly convert the IDS and put that one + entry.put(imas.convert_ids(equilibrium, entry.dd_version)) + + + .. _`DD background`: Background information diff --git a/imas/backends/imas_core/al_context.py b/imas/backends/imas_core/al_context.py index 1685e384..ede33bac 100644 --- a/imas/backends/imas_core/al_context.py +++ b/imas/backends/imas_core/al_context.py @@ -71,10 +71,7 @@ def global_action(self, path: str, rwmode: int, datapath: str = "") -> "ALContex Returns: The created context. """ - args = [self.ctx, path, rwmode] - if datapath: # AL4 compatibility: datapath arg was added in AL5 - args.append(datapath) - status, ctx = ll_interface.begin_global_action(*args) + status, ctx = ll_interface.begin_global_action(self.ctx, path, rwmode, datapath) if status != 0: raise LowlevelError("global_action", status) return ALContext(ctx) diff --git a/imas/backends/imas_core/db_entry_al.py b/imas/backends/imas_core/db_entry_al.py index b3240ebd..dad5019b 100644 --- a/imas/backends/imas_core/db_entry_al.py +++ b/imas/backends/imas_core/db_entry_al.py @@ -5,30 +5,29 @@ import logging import os from collections import deque +import re from typing import Any, Deque, List, Optional, Union from urllib.parse import urlparse +from packaging.version import Version + +import imas from imas.backends.db_entry_impl import GetSampleParameters, GetSliceParameters from imas.db_entry import DBEntryImpl from imas.exception import DataEntryException, LowlevelError from imas.ids_convert import NBCPathMap, dd_version_map_from_factories from imas.ids_defs import ( - ASCII_BACKEND, CHAR_DATA, CLOSE_PULSE, CREATE_PULSE, ERASE_PULSE, FORCE_CREATE_PULSE, FORCE_OPEN_PULSE, - HDF5_BACKEND, IDS_TIME_MODE_UNKNOWN, IDS_TIME_MODES, INTEGER_DATA, - MDSPLUS_BACKEND, - MEMORY_BACKEND, OPEN_PULSE, READ_OP, - UDA_BACKEND, UNDEFINED_INTERP, UNDEFINED_TIME, WRITE_OP, @@ -40,16 +39,9 @@ from .al_context import ALContext, LazyALContext from .db_entry_helpers import delete_children, get_children, put_children from .imas_interface import LLInterfaceError, has_imas, ll_interface -from .mdsplus_model import ensure_data_dir, mdsplus_model_dir +from .mdsplus_model import mdsplus_model_dir from .uda_support import extract_idsdef, get_dd_version_from_idsdef_xml -_BACKEND_NAME = { - ASCII_BACKEND: "ascii", - HDF5_BACKEND: "hdf5", - MEMORY_BACKEND: "memory", - MDSPLUS_BACKEND: "mdsplus", - UDA_BACKEND: "uda", -} _OPEN_MODES = { "r": OPEN_PULSE, "a": FORCE_OPEN_PULSE, @@ -71,13 +63,26 @@ def require_imas_available(): class ALDBEntryImpl(DBEntryImpl): """DBEntry implementation using imas_core as a backend.""" - """Map to the expected open_pulse (AL4) / begin_dataentry_action (AL5) argument.""" + def __init__(self, uri: str, mode: int, factory: IDSFactory): + # Setup backend and lowlevel Access Layer: + backend = urlparse(uri).path.lower().lstrip("/") + self._setup_backend(backend, mode, factory) + status, ctx = ll_interface.begin_dataentry_action(uri, mode) + if status != 0: + raise LowlevelError("opening/creating data entry", status) - def __init__(self, backend: str, ctx: ALContext, factory: IDSFactory): self.backend = backend - self._db_ctx = ctx + self._db_ctx = ALContext(ctx) self._ids_factory = factory self._lazy_ctx_cache: Deque[ALContext] = deque() + self._uri = uri + + # Parse query options, mimic logic in AL-Core instead of using + # urllib.parse.parse_qs(..). See https://github.com/jholloc/simple-uri-parser + self._querydict = {} + for option in re.split("[&;?]", urlparse(self._uri).query): + name, _, value = option.partition("=") + self._querydict[name] = value @classmethod def from_uri(cls, uri: str, mode: str, factory: IDSFactory) -> "ALDBEntryImpl": @@ -85,7 +90,7 @@ def from_uri(cls, uri: str, mode: str, factory: IDSFactory) -> "ALDBEntryImpl": if mode not in _OPEN_MODES: modes = list(_OPEN_MODES) raise ValueError(f"Unknown mode {mode!r}, was expecting any of {modes}") - return cls._from_uri(uri, _OPEN_MODES[mode], factory) + return cls(uri, _OPEN_MODES[mode], factory) @classmethod def from_pulse_run( @@ -108,60 +113,18 @@ def from_pulse_run( data_version = data_version or factory.dd_version options = options if options else "" - if ll_interface._al_version.major >= 5: - # We need a URI for AL 5 or later, construct from legacy parameters - status, uri = ll_interface.build_uri_from_legacy_parameters( - backend_id, pulse, run, user_name, db_name, data_version, options - ) - if status != 0: - raise LowlevelError("build URI from legacy parameters", status) - - return cls._from_uri(uri, mode, factory) - - else: - # AL4 legacy support: - backend = _BACKEND_NAME.get(backend_id, "") - cls._setup_backend(backend, mode, factory, user_name, db_name, run) - - status, ctx = ll_interface.begin_pulse_action( - backend_id, pulse, run, user_name, db_name, data_version - ) - if status != 0: - raise LowlevelError("begin pulse action", status) - - status = ll_interface.open_pulse(ctx, mode, options) - if status != 0: - raise LowlevelError("opening/creating data entry", status) - - return cls(backend, ALContext(ctx), factory) - - @classmethod - def _from_uri(cls, uri: str, mode: int, factory: IDSFactory) -> "ALDBEntryImpl": - """Helper method to actually open/create the dataentry.""" - backend = urlparse(uri).path.lower().lstrip("/") - cls._setup_backend(backend, mode, factory) - - status, ctx = ll_interface.begin_dataentry_action(uri, mode) + # Construct URI from legacy parameters + status, uri = ll_interface.build_uri_from_legacy_parameters( + backend_id, pulse, run, user_name, db_name, data_version, options + ) if status != 0: - raise LowlevelError("opening/creating data entry", status) + raise LowlevelError("build URI from legacy parameters", status) - return cls(backend, ALContext(ctx), factory) + return cls(uri, mode, factory) @classmethod - def _setup_backend( - cls, - backend: str, - mode: int, - factory: IDSFactory, - user_name: str = "", - db_name="", - run=1, - ) -> None: - """Custom logic for preparing some backends. - - Note: user_name, db_name and run are only used for AL 4.x, they can be - omitted when using AL 5 or later. - """ + def _setup_backend(cls, backend: str, mode: int, factory: IDSFactory) -> None: + """Custom logic for preparing some backends.""" if backend == "mdsplus": # MDSplus models: if mode != OPEN_PULSE: @@ -170,22 +133,6 @@ def _setup_backend( if ids_path: os.environ["ids_path"] = ids_path - if ll_interface._al_version.major == 4: - # Ensure the data directory exists - # Note: MDSPLUS model directory only uses the major version component of - # IMAS_VERSION, so we'll take the first character of IMAS_VERSION: - version = factory.version[0] - ensure_data_dir(user_name, db_name, version, run) - - elif backend == "hdf5": - pass # nothing to set up - - elif backend == "memory": - pass # nothing to set up - - elif backend == "ascii": - pass # nothing to set up - elif backend == "uda": # Set IDSDEF_PATH to point the UDA backend to the selected DD version idsdef_path = None @@ -193,7 +140,6 @@ def _setup_backend( if factory._xml_path is not None: # Factory was constructed with an explicit XML path, point UDA to that: idsdef_path = factory._xml_path - elif "IMAS_PREFIX" in os.environ: # Check if UDA can use the IDSDef.xml stored in $IMAS_PREFIX/include/ idsdef_path = os.environ["IMAS_PREFIX"] + "/include/IDSDef.xml" @@ -203,15 +149,9 @@ def _setup_backend( if idsdef_path is None: # Extract XML from the DD zip and point UDA to it idsdef_path = extract_idsdef(factory.version) - os.environ["IDSDEF_PATH"] = idsdef_path - logger.warning( - "The UDA backend is not tested with " - "IMAS-Python and may not work properly. " - "Please raise any issues you find." - ) - elif backend == "flexbuffers": + elif backend in ["hdf5", "memory", "ascii", "flexbuffers"]: pass # nothing to set up else: @@ -249,6 +189,8 @@ def get( raise RuntimeError("Database entry is not open.") if lazy and self.backend == "ascii": raise RuntimeError("Lazy loading is not supported by the ASCII backend.") + if self.backend == "uda": + self._check_uda_warnings(lazy) # Mixing contexts can be problematic, ensure all lazy contexts are closed: self._clear_lazy_ctx_cache() @@ -339,6 +281,15 @@ def put(self, ids: IDSToplevel, occurrence: int, is_slice: bool) -> None: # Create a version conversion map, if needed nbc_map = None if ids._version != self._ids_factory._version: + if ids._version.split(".")[0] != self._ids_factory._version.split(".")[0]: + raise RuntimeError( + f"Provided IDS uses DD {ids._version} which has a different major " + f"version than the Data Entry ({self._ids_factory._version}). " + "IMAS-Python will not automatically convert this data for you." + "See the documentation for more details and fixes: " + f"{imas.PUBLISHED_DOCUMENTATION_ROOT}" + "/multi-dd.html#storing-idss-with-a-different-major-version" + ) ddmap, source_is_older = dd_version_map_from_factories( ids_name, ids._parent, self._ids_factory ) @@ -405,3 +356,28 @@ def list_all_occurrences(self, ids_name: str) -> List[int]: "Access Layer 5.1 or newer is required." ) from None return occurrence_list + + def _check_uda_warnings(self, lazy: bool) -> None: + """Various checks / warnings for the UDA backend.""" + cache_mode = self._querydict.get("cache_mode") + if lazy and cache_mode != "none": + # cache_mode=none requires imas core 5.5.1 or newer, and a recent enough UDA + # server plugin (which we cannot check...) + cache_mode_hint = "" + if ll_interface._al_version >= Version("5.5.1"): + cache_mode_hint = ( + "\nYou may add the parameter ';cache_mode=none' to the IMAS URI " + "to avoid loading all of the data from the remote server." + ) + logger.warning( + "The UDA backend will load all IDS data from the remote server. " + "Lazy loading with the UDA backend may therefore still be slow.%s", + cache_mode_hint, + ) + + if cache_mode == "none" and ll_interface._al_version < Version("5.5.1"): + logger.warning( + "UDA option 'cache_mode=None' may not work correctly with " + "IMAS Core version %s.", + ll_interface._al_version, + ) diff --git a/imas/backends/imas_core/imas_interface.py b/imas/backends/imas_core/imas_interface.py index 6e463302..8fa3963b 100644 --- a/imas/backends/imas_core/imas_interface.py +++ b/imas/backends/imas_core/imas_interface.py @@ -6,6 +6,7 @@ This module tries to abstract away most API incompatibilities between the supported Access Layer versions (for example the rename of _ual_lowlevel to _al_lowlevel). """ + import inspect import logging @@ -31,7 +32,7 @@ has_imas = False imasdef = None lowlevel = None - logger.critical( + logger.warning( "Could not import 'imas_core': %s. Some functionality is not available.", exc, ) @@ -61,9 +62,6 @@ class LowlevelInterface: - If the lowlevel drops methods, we need to update the implementation fo the method to provide a proper error message or a workaround. - - Renamed methods (if this will ever happen) are perhaps best handled in the - ``__init__`` by providing a mapping of new to old name, so far this was only - relevant for the ``ual_`` to ``al_`` rename. """ def __init__(self, lowlevel): @@ -84,23 +82,13 @@ def __init__(self, lowlevel): # Introduced after 5.0.0 self._al_version_str = self._lowlevel.get_al_version() self._al_version = Version(self._al_version_str) - elif hasattr(lowlevel, "al_read_data"): - # In AL 5.0.0, all `ual_` methods were renamed to `al_` + else: self._al_version_str = "5.0.0" self._al_version = Version(self._al_version_str) - else: - # AL 4, don't try to determine in more detail - self._al_version_str = "4.?.?" - self._al_version = Version("4") - public_methods.remove("close_pulse") - if self._al_version < Version("5"): - method_prefix = "ual_" - else: - method_prefix = "al_" # Overwrite all of our methods that are implemented in the lowlevel for method in public_methods: - ll_method = getattr(lowlevel, method_prefix + method, None) + ll_method = getattr(lowlevel, f"al_{method}", None) if ll_method is not None: setattr(self, method, ll_method) @@ -115,24 +103,10 @@ def _minimal_version(self, minversion): f"but the current version is {self._al_version_str}" ) - # AL 4 lowlevel API - - def begin_pulse_action(self, backendID, shot, run, user, tokamak, version): - # Removed in AL5, compatibility handled in DBEntry - raise LLInterfaceError(f"{__name__} is not implemented") - - def open_pulse(self, pulseCtx, mode, options): - # Removed in AL5, compatibility handled in DBEntry - raise LLInterfaceError(f"{__name__} is not implemented") - def close_pulse(self, pulseCtx, mode): - # options argument (mandatory in AL4) was removed in AL5 - # This method is overwritten in AL5, but for AL4 we need to do this: - return lowlevel.ual_close_pulse(pulseCtx, mode, None) + raise LLInterfaceError(f"{__name__} is not implemented") - def begin_global_action(self, pulseCtx, dataobjectname, rwmode, datapath=""): - # datapath was added in AL5 to support more efficient partial_get in the - # UDA backend. TODO: figure out if this is useful for lazy loading. + def begin_global_action(self, pulseCtx, dataobjectname, rwmode, datapath): raise LLInterfaceError(f"{__name__} is not implemented") def begin_slice_action(self, pulseCtx, dataobjectname, rwmode, time, interpmode): diff --git a/imas/backends/imas_core/mdsplus_model.py b/imas/backends/imas_core/mdsplus_model.py index 3c91cefb..c5f09e29 100644 --- a/imas/backends/imas_core/mdsplus_model.py +++ b/imas/backends/imas_core/mdsplus_model.py @@ -364,25 +364,3 @@ def jTraverser_jar() -> Path: return jar_path else: raise MDSPlusModelError("jTraverser.jar not found. Is MDSplus-Java available?") - - -def ensure_data_dir(user: str, tokamak: str, version: str, run: int) -> None: - """Ensure that a data dir exists with a similar algorithm that - the MDSplus backend uses to set the data path. - See also mdsplus_backend.cpp:751 (setDataEnv)""" - if user == "public": - if "IMAS_HOME" not in os.environ: - raise RuntimeError( - "Environment variable IMAS_HOME must be set to access " - "the public database." - ) - dbdir = Path(os.environ["IMAS_HOME"]) / "shared" / "imasdb" / tokamak / version - elif user[0] == "/": - dbdir = Path(user) / tokamak / version - else: - dbdir = Path.home() / "public" / "imasdb" / tokamak / version - - # Check subfolder based on run - assert 0 <= run <= 99_999 - index = run // 10_000 - (dbdir / str(index)).mkdir(parents=True, exist_ok=True) diff --git a/imas/backends/netcdf/nc2ids.py b/imas/backends/netcdf/nc2ids.py index 306c128e..1b1dbfe8 100644 --- a/imas/backends/netcdf/nc2ids.py +++ b/imas/backends/netcdf/nc2ids.py @@ -366,9 +366,12 @@ def get_child(self, child): if value is not None: if isinstance(value, np.ndarray): - # Convert the numpy array to a read-only view - value = value.view() - value.flags.writeable = False + if value.ndim == 0: # Unpack 0D numpy arrays: + value = value.item() + else: + # Convert the numpy array to a read-only view + value = value.view() + value.flags.writeable = False # NOTE: bypassing IDSPrimitive.value.setter logic child._IDSPrimitive__value = value diff --git a/imas/command/cli.py b/imas/command/cli.py index 5e18d008..a270d834 100644 --- a/imas/command/cli.py +++ b/imas/command/cli.py @@ -102,7 +102,6 @@ def print_ids(uri, ids, occurrence, print_all): ids Name of the IDS to print (e.g. "core_profiles"). occurrence Which occurrence to print (defaults to 0). """ - min_version_guard(Version("5.0")) setup_rich_log_handler(False) with DBEntry(uri, "r") as dbentry: diff --git a/imas/command/db_analysis.py b/imas/command/db_analysis.py index 8f262e27..f8960858 100644 --- a/imas/command/db_analysis.py +++ b/imas/command/db_analysis.py @@ -7,6 +7,8 @@ import re import readline import sys +from csv import writer as csvwriter +from collections import Counter, defaultdict from dataclasses import dataclass, field from pathlib import Path from typing import Dict, Iterable, List, Optional @@ -139,12 +141,70 @@ def ids_info(idsfile: Path): } +@dataclass +class _PathUsage: + num_occurrences: int = 0 + path_counter: Counter = field(default_factory=Counter) + + +def _write_usage_stats_to_csv( + writer, usage_per_entry, usage_per_occurrence, num_entries +): + """Write usage statistics to csv file. + + Args: + writer: an instance of csv.writer + usage_per_entry: path usage statistics per data entry + usage_per_occurrence: path usage statistics per occurrence + num_entries: number of data entries + """ + # Write header + writer.writerow( + [ + "IDS", + "Path in IDS", + "Uses errorbar", + "Frequency (without occurrences)", + "Frequency (with occurences)", + ] + ) + + for ids_name in sorted(usage_per_entry): + entry_usage = usage_per_entry[ids_name] + occurrence_usage = usage_per_occurrence[ids_name] + + # Usage statistics of the IDS (# entries with this IDS / # entries) + freq = entry_usage.num_occurrences / num_entries + writer.writerow([ids_name, "", "", freq, ""]) + + for path, entry_count in sorted(entry_usage.path_counter.items()): + if "_error_" in path: + continue # Skip error nodes + occurrence_count = occurrence_usage.path_counter[path] + + uses_error = f"{path}_error_upper" in entry_usage.path_counter + # Frequency without occurrences, see GH#84 for details + freq1 = entry_count / entry_usage.num_occurrences + # Frequency with occurences + freq2 = occurrence_count / occurrence_usage.num_occurrences + + # Write data row + writer.writerow([ids_name, path, "X" if uses_error else "", freq1, freq2]) + + +_csv_help_text = ( + "Write analysis output to the provided CSV file. For details, " + "see https://github.com/iterorganization/IMAS-Python/issues/84." +) + + @click.command("process-db-analysis") @click.argument( "infiles", metavar="INPUT_FILES...", nargs=-1, type=infile_path, required=True ) @click.option("--show-empty-ids", is_flag=True, help="Show empty IDSs in the overview.") -def process_db_analysis(infiles, show_empty_ids): +@click.option("--csv", type=outfile_path, help=_csv_help_text) +def process_db_analysis(infiles, show_empty_ids, csv): """Process supplied Data Entry analyses, and display statistics. \b @@ -153,9 +213,10 @@ def process_db_analysis(infiles, show_empty_ids): """ setup_rich_log_handler(False) - factory = imas.IDSFactory() - filled_per_ids = {ids_name: set() for ids_name in factory.ids_names()} - logger.info("Using Data Dictionary version %s.", factory.dd_version) + usage_per_entry = defaultdict(_PathUsage) + usage_per_occurrence = defaultdict(_PathUsage) + num_entries = 0 + logger.info("Reading %d input files...", len(infiles)) # Read input data and collate usage info per IDS @@ -164,17 +225,51 @@ def process_db_analysis(infiles, show_empty_ids): data = json.load(file) for entry in data: + usage_for_this_entry = defaultdict(_PathUsage) for ids_info in entry["ids_info"]: - fill_info = filled_per_ids[ids_info["name"]] - fill_info.update(ids_info["filled_data"]) + ids_name = ids_info["name"] + filled_paths = ids_info["filled_data"] + # Update counters for this entry + usage_for_this_entry[ids_name].path_counter.update(filled_paths) + # Update counters for all occurrecnes + usage_per_occurrence[ids_name].num_occurrences += 1 + usage_per_occurrence[ids_name].path_counter.update(filled_paths) + # Update data entry usage + for ids_name, usage in usage_for_this_entry.items(): + usage_per_entry[ids_name].num_occurrences += 1 + usage_per_entry[ids_name].path_counter.update(usage.path_counter.keys()) + num_entries += 1 logger.info("Done reading input files.") + + if csv is not None: + # Output to CSV file + logger.info("Writing output to CSV file: %s", csv) + with open(csv, "w") as csvfile: + writer = csvwriter(csvfile) + _write_usage_stats_to_csv( + writer, usage_per_entry, usage_per_occurrence, num_entries + ) + logger.info("Done.") + return + logger.info("Analyzing filled data...") + factory = imas.IDSFactory() + logger.info("Using Data Dictionary version %s.", factory.dd_version) # Construct AnalysisNodes per IDS analysis_nodes: Dict[str, _AnalysisNode] = {} - for ids_name, filled in filled_per_ids.items(): + for ids_name, usage in usage_per_occurrence.items(): + if ids_name not in factory.ids_names(): + logger.warning( + "Founds IDS %s in data files, but this IDS is not available " + "in DD version %s. Statistics will not be tracked.", + ids_name, + factory.dd_version, + ) + continue metadata = factory.new(ids_name).metadata + filled = set(usage.path_counter.keys()) ids_analysis_node = _AnalysisNode("") def walk_metadata_tree(metadata: IDSMetadata, node: _AnalysisNode): diff --git a/imas/command/helpers.py b/imas/command/helpers.py index 8c664306..cded9ef1 100644 --- a/imas/command/helpers.py +++ b/imas/command/helpers.py @@ -36,7 +36,7 @@ def min_version_guard(al_version: Version): al_version: Minimum imas_core version required for this command. """ used_version = ll_interface._al_version - if used_version >= al_version: + if used_version and used_version >= al_version: return click.echo( f"This command requires at least version {al_version} of the Access Layer." diff --git a/imas/db_entry.py b/imas/db_entry.py index d7d74574..471a50ad 100644 --- a/imas/db_entry.py +++ b/imas/db_entry.py @@ -1,11 +1,13 @@ # This file is part of IMAS-Python. # You should have received the IMAS-Python LICENSE file with this project. -"""Logic for interacting with IMAS Data Entries. -""" +"""Logic for interacting with IMAS Data Entries.""" + +from __future__ import annotations import logging import os -from typing import Any, List, Optional, Tuple, Type, Union, overload +import pathlib +from typing import Any, Type, overload import numpy @@ -34,14 +36,14 @@ logger = logging.getLogger(__name__) -def _get_uri_mode(uri, mode) -> Tuple[str, str]: +def _get_uri_mode(uri, mode) -> tuple[str, str]: """Helper method to parse arguments of DBEntry.__init__.""" return uri, mode def _get_legacy_params( backend_id, db_name, pulse, run, user_name=None, data_version=None -) -> Tuple[int, str, int, int, Optional[str], Optional[str]]: +) -> tuple[int, str, int, int, str | None, str | None]: """Helper method to parse arguments of DBEntry.__init__.""" return backend_id, db_name, pulse, run, user_name, data_version @@ -75,8 +77,8 @@ def __init__( uri: str, mode: str, *, - dd_version: Optional[str] = None, - xml_path: Optional[str] = None, + dd_version: str | None = None, + xml_path: str | pathlib.Path | None = None, ) -> None: ... @overload @@ -86,19 +88,19 @@ def __init__( db_name: str, pulse: int, run: int, - user_name: Optional[str] = None, - data_version: Optional[str] = None, + user_name: str | None = None, + data_version: str | None = None, *, - shot: Optional[int] = None, - dd_version: Optional[str] = None, - xml_path: Optional[str] = None, + shot: int | None = None, + dd_version: str | None = None, + xml_path: str | pathlib.Path | None = None, ) -> None: ... def __init__( self, *args, - dd_version: Optional[str] = None, - xml_path: Optional[str] = None, + dd_version: str | None = None, + xml_path: str | pathlib.Path | None = None, **kwargs, ): """Open or create a Data Entry based on the provided URI and mode, or prepare a @@ -163,7 +165,7 @@ def __init__( ) from None # Actual intializiation - self._dbe_impl: Optional[DBEntryImpl] = None + self._dbe_impl: DBEntryImpl | None = None self._dd_version = dd_version self._xml_path = xml_path self._ids_factory = IDSFactory(dd_version, xml_path) @@ -187,7 +189,7 @@ def __init__( self._dbe_impl = cls.from_uri(self.uri, mode, self._ids_factory) @staticmethod - def _select_implementation(uri: Optional[str]) -> Type[DBEntryImpl]: + def _select_implementation(uri: str | None) -> Type[DBEntryImpl]: """Select which DBEntry implementation to use based on the URI.""" if uri and uri.endswith(".nc") and not uri.startswith("imas:"): from imas.backends.netcdf.db_entry_nc import NCDBEntryImpl as impl @@ -308,7 +310,7 @@ def get( lazy: bool = False, autoconvert: bool = True, ignore_unknown_dd_version: bool = False, - destination: Optional[IDSToplevel] = None, + destination: IDSToplevel | None = None, ) -> IDSToplevel: """Read the contents of an IDS into memory. @@ -371,7 +373,7 @@ def get_slice( lazy: bool = False, autoconvert: bool = True, ignore_unknown_dd_version: bool = False, - destination: Optional[IDSToplevel] = None, + destination: IDSToplevel | None = None, ) -> IDSToplevel: """Read a single time slice from an IDS in this Database Entry. @@ -435,14 +437,14 @@ def get_sample( ids_name: str, tmin: float, tmax: float, - dtime: Optional[Union[float, numpy.ndarray]] = None, - interpolation_method: Optional[int] = None, + dtime: float | numpy.ndarray | None = None, + interpolation_method: int | None = None, occurrence: int = 0, *, lazy: bool = False, autoconvert: bool = True, ignore_unknown_dd_version: bool = False, - destination: Optional[IDSToplevel] = None, + destination: IDSToplevel | None = None, ) -> IDSToplevel: """Read a range of time slices from an IDS in this Database Entry. @@ -548,8 +550,8 @@ def _get( self, ids_name: str, occurrence: int, - parameters: Union[None, GetSliceParameters, GetSampleParameters], - destination: Optional[IDSToplevel], + parameters: None | GetSliceParameters | GetSampleParameters, + destination: IDSToplevel | None, lazy: bool, autoconvert: bool, ignore_unknown_dd_version: bool, @@ -603,15 +605,13 @@ def _get( nbc_map = None if dd_version and dd_version != destination._dd_version: if dd_version.split(".")[0] != destination._dd_version.split(".")[0]: - logger.warning( - "On-disk data is stored in DD %s which has a different major " - "version than the requested DD version (%s). IMAS-Python will " - "convert the data automatically, but this does not cover all" - "changes. " - "See %s/multi-dd.html#conversion-of-idss-between-dd-versions", - dd_version, - destination._dd_version, - imas.PUBLISHED_DOCUMENTATION_ROOT, + raise RuntimeError( + f"On-disk data is stored in DD {dd_version} which has a different " + "major version than the requested DD version " + f"({destination._dd_version}). IMAS-Python will not automatically " + "convert this data for you. See the documentation for more " + f"details and fixes: {imas.PUBLISHED_DOCUMENTATION_ROOT}" + "/multi-dd.html#loading-idss-from-a-different-major-version" ) ddmap, source_is_older = dd_version_map_from_factories( ids_name, IDSFactory(version=dd_version), self._ids_factory @@ -734,7 +734,7 @@ def _put(self, ids: IDSToplevel, occurrence: int, is_slice: bool): version_put = ids.ids_properties.version_put version_put.data_dictionary = self._ids_factory._version version_put.access_layer = self._dbe_impl.access_layer_version() - version_put.access_layer_language = f"imas {imas.__version__}" + version_put.access_layer_language = f"IMAS-Python {imas.__version__}" self._dbe_impl.put(ids, occurrence, is_slice) @@ -752,12 +752,12 @@ def delete_data(self, ids_name: str, occurrence: int = 0) -> None: @overload def list_all_occurrences( self, ids_name: str, node_path: None = None - ) -> List[int]: ... + ) -> list[int]: ... @overload def list_all_occurrences( self, ids_name: str, node_path: str - ) -> Tuple[List[int], List[IDSBase]]: ... + ) -> tuple[list[int], list[IDSBase]]: ... def list_all_occurrences(self, ids_name, node_path=None): """List all non-empty occurrences of an IDS diff --git a/imas/ids_convert.py b/imas/ids_convert.py index c4e752e0..a1631b83 100644 --- a/imas/ids_convert.py +++ b/imas/ids_convert.py @@ -7,7 +7,7 @@ import logging from functools import lru_cache, partial from pathlib import Path -from typing import Callable, Dict, Iterator, Optional, Set, Tuple +from typing import Callable, Dict, Iterator, List, Optional, Set, Tuple from xml.etree.ElementTree import Element, ElementTree import numpy @@ -15,7 +15,7 @@ from scipy.interpolate import interp1d import imas -from imas.dd_zip import parse_dd_version +from imas.dd_zip import parse_dd_version, dd_etree from imas.ids_base import IDSBase from imas.ids_data_type import IDSDataType from imas.ids_defs import IDS_TIME_MODE_HETEROGENEOUS @@ -87,6 +87,15 @@ def __init__(self) -> None: converted. """ + self.post_process_ids: List[ + Callable[[IDSToplevel, IDSToplevel, bool], None] + ] = [] + """Postprocess functions to be applied to the whole IDS. + + These postprocess functions should be applied to the whole IDS after all data is + converted. The arguments supplied are: source IDS, target IDS, deepcopy boolean. + """ + self.ignore_missing_paths: Set[str] = set() """Set of paths that should not be logged when data is present.""" @@ -201,6 +210,10 @@ def _build_map(self, old: Element, new: Element) -> None: old_path_set = set(old_paths) new_path_set = set(new_paths) + # expose the path->Element maps as members so other methods can reuse them + self.old_paths = old_paths + self.new_paths = new_paths + def process_parent_renames(path: str) -> str: # Apply any parent AoS/structure rename # Loop in reverse order to find the closest parent which was renamed: @@ -222,20 +235,6 @@ def get_old_path(path: str, previous_name: str) -> str: old_path = previous_name return process_parent_renames(old_path) - def add_rename(old_path: str, new_path: str): - old_item = old_paths[old_path] - new_item = new_paths[new_path] - self.new_to_old[new_path] = ( - old_path, - _get_tbp(old_item, old_paths), - _get_ctxpath(old_path, old_paths), - ) - self.old_to_new[old_path] = ( - new_path, - _get_tbp(new_item, new_paths), - _get_ctxpath(new_path, new_paths), - ) - # Iterate through all NBC metadata and add entries for new_item in new.iterfind(".//field[@change_nbc_description]"): new_path = new_item.get("path") @@ -275,14 +274,16 @@ def add_rename(old_path: str, new_path: str): self.version_old, ) elif self._check_data_type(old_item, new_item): - add_rename(old_path, new_path) + # use class helper to register simple renames and + # reciprocal mappings + self._add_rename(old_path, new_path) if old_item.get("data_type") in DDVersionMap.STRUCTURE_TYPES: # Add entries for common sub-elements for path in old_paths: if path.startswith(old_path): npath = path.replace(old_path, new_path, 1) if npath in new_path_set: - add_rename(path, npath) + self._add_rename(path, npath) elif nbc_description == "type_changed": pass # We will handle this (if possible) in self._check_data_type elif nbc_description == "repeat_children_first_point": @@ -332,27 +333,133 @@ def add_rename(old_path: str, new_path: str): new_version = parse_dd_version(new_version_node.text) # Additional conversion rules for DDv3 to DDv4 if self.version_old.major == 3 and new_version and new_version.major == 4: - # Postprocessing for COCOS definition change: - for psi_like in ["psi_like", "dodpsi_like"]: - xpath_query = f".//field[@cocos_label_transformation='{psi_like}']" - for old_item in old.iterfind(xpath_query): - old_path = old_item.get("path") - new_path = self.old_to_new.path.get(old_path, old_path) - self.new_to_old.post_process[new_path] = _cocos_change - self.old_to_new.post_process[old_path] = _cocos_change - # Definition change for pf_active circuit/connections - if self.ids_name == "pf_active": - path = "circuit/connections" - self.new_to_old.post_process[path] = _circuit_connections_4to3 - self.old_to_new.post_process[path] = _circuit_connections_3to4 - # Migrate ids_properties/source to ids_properties/provenance - # Only implement forward conversion (DD3 -> 4): - # - Pretend that this is a rename from ids_properties/source -> provenance - # - And register type_change handler which will be called with the source - # element and the new provenance structure - path = "ids_properties/source" - self.old_to_new.path[path] = "ids_properties/provenance" - self.old_to_new.type_change[path] = _ids_properties_source + self._apply_3to4_conversion(old, new) + + def _add_rename(self, old_path: str, new_path: str) -> None: + """Register a simple rename from old_path -> new_path using the + path->Element maps stored on the instance (self.old_paths/self.new_paths). + This will also add the reciprocal mapping when possible. + """ + old_item = self.old_paths[old_path] + new_item = self.new_paths[new_path] + + # forward mapping + self.old_to_new[old_path] = ( + new_path, + _get_tbp(new_item, self.new_paths), + _get_ctxpath(new_path, self.new_paths), + ) + + # reciprocal mapping + self.new_to_old[new_path] = ( + old_path, + _get_tbp(old_item, self.old_paths), + _get_ctxpath(old_path, self.old_paths), + ) + + def _apply_3to4_conversion(self, old: Element, new: Element) -> None: + # Postprocessing for COCOS definition change: + cocos_paths = [] + for psi_like in ["psi_like", "dodpsi_like"]: + xpath_query = f".//field[@cocos_label_transformation='{psi_like}']" + for old_item in old.iterfind(xpath_query): + cocos_paths.append(old_item.get("path")) + # Sign flips not covered by the generic rule: + cocos_paths.extend(_3to4_sign_flip_paths.get(self.ids_name, [])) + for old_path in cocos_paths: + new_path = self.old_to_new.path.get(old_path, old_path) + self.new_to_old.post_process[new_path] = _cocos_change + self.old_to_new.post_process[old_path] = _cocos_change + + # Convert equilibrium boundary_separatrix and populate contour_tree + if self.ids_name == "equilibrium": + self.old_to_new.post_process_ids.append(_equilibrium_boundary_3to4) + self.old_to_new.ignore_missing_paths |= { + "time_slice/boundary_separatrix", + "time_slice/boundary_secondary_separatrix", + } + # Definition change for pf_active circuit/connections + if self.ids_name == "pf_active": + path = "circuit/connections" + self.new_to_old.post_process[path] = _circuit_connections_4to3 + self.old_to_new.post_process[path] = _circuit_connections_3to4 + + # Migrate ids_properties/source to ids_properties/provenance + # Only implement forward conversion (DD3 -> 4): + # - Pretend that this is a rename from ids_properties/source -> provenance + # - And register type_change handler which will be called with the source + # element and the new provenance structure + path = "ids_properties/source" + self.old_to_new.path[path] = "ids_properties/provenance" + self.old_to_new.type_change[path] = _ids_properties_source + + # GH#55: add logic to migrate some obsolete nodes in DD3.42.0 -> 4.0 + # These nodes (e.g. equilibrium profiles_1d/j_tor) have an NBC rename rule + # (to e.g. equilibrium profiles_1d/j_phi) applying to DD 3.41 and older. + # In DD 3.42, both the old AND new node names are present. + if self.version_old.minor >= 42: # Only apply for DD 3.42+ -> DD 4 + # Get a rename map for 3.41 -> new version + factory341 = imas.IDSFactory("3.41.0") + if self.ids_name in factory341.ids_names(): # Ensure the IDS exists in 3.41 + dd341_map = _DDVersionMap( + self.ids_name, + dd_etree("3.41.0"), + self.new_version, + Version("3.41.0"), + ) + to_update = {} + for path, newpath in self.old_to_new.path.items(): + # Find all nodes that have disappeared in DD 4.x, and apply the + # rename rule from DD3.41 -> DD 4.x + if newpath is None and path in dd341_map.old_to_new: + self.old_to_new.path[path] = dd341_map.old_to_new.path[path] + # Note: path could be a structure or AoS, so we also put all + # child paths in our map: + path = path + "/" # All child nodes will start with this + for p, v in dd341_map.old_to_new.path.items(): + if p.startswith(path): + to_update[p] = v + self.old_to_new.path.update(to_update) + + # GH#59: To improve further the conversion of DD3 to DD4, especially the + # Machine Description part of the IDSs, we would like to add a 3to4 specific + # rule to convert any siblings name + identifier (that are not part of an + # identifier structure, meaning that there is no index sibling) into + # description + name. Meaning: + # parent/name (DD3) -> parent/description (DD4) + # parent/identifier (DD3) -> parent/name (DD4) + # Only perform the mapping if the corresponding target fields exist in the + # new DD and if we don't already have a mapping for the involved paths. + # use self.old_paths and self.new_paths set in _build_map + for p in self.old_paths: + # look for name children + if not p.endswith("/name"): + continue + parent = p.rsplit("/", 1)[0] + name_path = f"{parent}/name" + id_path = f"{parent}/identifier" + index_path = f"{parent}/index" + desc_path = f"{parent}/description" + new_name_path = name_path + + # If neither 'name' nor 'identifier' existed in the old DD, skip this parent + if name_path not in self.old_paths or id_path not in self.old_paths: + continue + # exclude identifier-structure (has index sibling) + if index_path in self.old_paths: + continue + + # Ensure the candidate target fields exist in the new DD + if desc_path not in self.new_paths or new_name_path not in self.new_paths: + continue + + # Map DD3 name -> DD4 description + if name_path not in self.old_to_new.path: + self._add_rename(name_path, desc_path) + + # Map DD3 identifier -> DD4 name + if id_path in self.old_to_new.path: + self._add_rename(id_path, new_name_path) def _map_missing(self, is_new: bool, missing_paths: Set[str]): rename_map = self.new_to_old if is_new else self.old_to_new @@ -512,6 +619,10 @@ def convert_ids( else: _copy_structure(toplevel, target, deepcopy, rename_map) + # Global post-processing functions + for callback in rename_map.post_process_ids: + callback(toplevel, target, deepcopy) + logger.info("Conversion of IDS %s finished.", ids_name) if provenance_origin_uri: _add_provenance_entry(target, toplevel._version, provenance_origin_uri) @@ -644,6 +755,113 @@ def _copy_structure( callback(item, target_item) +_3to4_sign_flip_paths = { + "core_instant_changes": [ + "change/profiles_1d/grid/psi_magnetic_axis", + "change/profiles_1d/grid/psi_boundary", + ], + "core_profiles": [ + "profiles_1d/grid/psi_magnetic_axis", + "profiles_1d/grid/psi_boundary", + ], + "core_sources": [ + "source/profiles_1d/grid/psi_magnetic_axis", + "source/profiles_1d/grid/psi_boundary", + ], + "core_transport": [ + "model/profiles_1d/grid_d/psi_magnetic_axis", + "model/profiles_1d/grid_d/psi_boundary", + "model/profiles_1d/grid_v/psi_magnetic_axis", + "model/profiles_1d/grid_v/psi_boundary", + "model/profiles_1d/grid_flux/psi_magnetic_axis", + "model/profiles_1d/grid_flux/psi_boundary", + ], + "disruption": [ + "global_quantities/psi_halo_boundary", + "profiles_1d/grid/psi_magnetic_axis", + "profiles_1d/grid/psi_boundary", + ], + "ece": [ + "channel/beam_tracing/beam/position/psi", + "psi_normalization/psi_magnetic_axis", + "psi_normalization/psi_boundary", + ], + "edge_profiles": [ + "profiles_1d/grid/psi", + "profiles_1d/grid/psi_magnetic_axis", + "profiles_1d/grid/psi_boundary", + ], + "equilibrium": [ + "time_slice/boundary/psi", + "time_slice/global_quantities/q_min/psi", + "time_slice/ggd/psi/values", + ], + "mhd": ["ggd/psi/values"], + "pellets": ["time_slice/pellet/path_profiles/psi"], + "plasma_profiles": [ + "profiles_1d/grid/psi", + "profiles_1d/grid/psi_magnetic_axis", + "profiles_1d/grid/psi_boundary", + "ggd/psi/values", + ], + "plasma_sources": [ + "source/profiles_1d/grid/psi", + "source/profiles_1d/grid/psi_magnetic_axis", + "source/profiles_1d/grid/psi_boundary", + ], + "plasma_transport": [ + "model/profiles_1d/grid_d/psi", + "model/profiles_1d/grid_d/psi_magnetic_axis", + "model/profiles_1d/grid_d/psi_boundary", + "model/profiles_1d/grid_v/psi", + "model/profiles_1d/grid_v/psi_magnetic_axis", + "model/profiles_1d/grid_v/psi_boundary", + "model/profiles_1d/grid_flux/psi", + "model/profiles_1d/grid_flux/psi_magnetic_axis", + "model/profiles_1d/grid_flux/psi_boundary", + ], + "radiation": [ + "process/profiles_1d/grid/psi_magnetic_axis", + "process/profiles_1d/grid/psi_boundary", + ], + "reflectometer_profile": [ + "psi_normalization/psi_magnetic_axis", + "psi_normalization/psi_boundary", + ], + "reflectometer_fluctuation": [ + "psi_normalization/psi_magnetic_axis", + "psi_normalization/psi_boundary", + ], + "runaway_electrons": [ + "profiles_1d/grid/psi_magnetic_axis", + "profiles_1d/grid/psi_boundary", + ], + "sawteeth": [ + "profiles_1d/grid/psi_magnetic_axis", + "profiles_1d/grid/psi_boundary", + ], + "summary": [ + "global_quantities/psi_external_average/value", + "local/magnetic_axis/position/psi", + ], + "transport_solver_numerics": [ + "solver_1d/grid/psi_magnetic_axis", + "solver_1d/grid/psi_boundary", + "derivatives_1d/grid/psi_magnetic_axis", + "derivatives_1d/grid/psi_boundary", + ], + "wall": ["description_ggd/ggd/psi/values"], + "waves": [ + "coherent_wave/profiles_1d/grid/psi_magnetic_axis", + "coherent_wave/profiles_1d/grid/psi_boundary", + "coherent_wave/profiles_2d/grid/psi", + "coherent_wave/beam_tracing/beam/position/psi", + ], +} +"""List of paths per IDS that require a COCOS sign change, but aren't covered by the +generic rule.""" + + ######################################################################################## # Type changed handlers and post-processing functions # ######################################################################################## @@ -1031,3 +1249,70 @@ def _pulse_schedule_resample_callback(timebase, item: IDSBase, target_item: IDSB assume_sorted=True, )(timebase) target_item.value = value.astype(numpy.int32) if is_integer else value + + +def _equilibrium_boundary_3to4(eq3: IDSToplevel, eq4: IDSToplevel, deepcopy: bool): + """Convert DD3 boundary[[_secondary]_separatrix] to DD4 contour_tree""" + # Implement https://github.com/iterorganization/IMAS-Python/issues/60 + copy = numpy.copy if deepcopy else lambda x: x + for ts3, ts4 in zip(eq3.time_slice, eq4.time_slice): + if not ts3.global_quantities.psi_axis.has_value: + # No magnetic axis, assume no boundary either: + continue + n_nodes = 1 # magnetic axis + if ts3.boundary_separatrix.psi.has_value: + n_nodes = 2 + if ( # boundary_secondary_separatrix is introduced in DD 3.32.0 + hasattr(ts3, "boundary_secondary_separatrix") + and ts3.boundary_secondary_separatrix.psi.has_value + ): + n_nodes = 3 + node = ts4.contour_tree.node + node.resize(n_nodes) + # Magnetic axis (primary O-point) + gq = ts3.global_quantities + # Note the sign flip for psi due to the COCOS change between DD3 and DD4! + axis_is_psi_minimum = -gq.psi_axis < -gq.psi_boundary + + node[0].critical_type = 0 if axis_is_psi_minimum else 2 + node[0].r = gq.magnetic_axis.r + node[0].z = gq.magnetic_axis.z + node[0].psi = -gq.psi_axis # COCOS change + + # X-points + if n_nodes >= 2: + if ts3.boundary_separatrix.type == 0: # limiter plasma + node[1].critical_type = 2 if axis_is_psi_minimum else 0 + node[1].r = ts3.boundary_separatrix.active_limiter_point.r + node[1].z = ts3.boundary_separatrix.active_limiter_point.z + else: + node[1].critical_type = 1 # saddle-point (x-point) + if len(ts3.boundary_separatrix.x_point): + node[1].r = ts3.boundary_separatrix.x_point[0].r + node[1].z = ts3.boundary_separatrix.x_point[0].z + # Additional x-points. N.B. levelset is only stored on the first node + for i in range(1, len(ts3.boundary_separatrix.x_point)): + node.resize(len(node) + 1, keep=True) + node[-1].critical_type = 1 + node[-1].r = ts3.boundary_separatrix.x_point[i].r + node[-1].z = ts3.boundary_separatrix.x_point[i].z + node[-1].psi = -ts3.boundary_separatrix.psi + node[1].psi = -ts3.boundary_separatrix.psi # COCOS change + node[1].levelset.r = copy(ts3.boundary_separatrix.outline.r) + node[1].levelset.z = copy(ts3.boundary_separatrix.outline.z) + + if n_nodes >= 3: + node[2].critical_type = 1 # saddle-point (x-point) + if len(ts3.boundary_secondary_separatrix.x_point): + node[2].r = ts3.boundary_secondary_separatrix.x_point[0].r + node[2].z = ts3.boundary_secondary_separatrix.x_point[0].z + # Additional x-points. N.B. levelset is only stored on the first node + for i in range(1, len(ts3.boundary_secondary_separatrix.x_point)): + node.resize(len(node) + 1, keep=True) + node[-1].critical_type = 1 + node[-1].r = ts3.boundary_secondary_separatrix.x_point[i].r + node[-1].z = ts3.boundary_secondary_separatrix.x_point[i].z + node[-1].psi = -ts3.boundary_secondary_separatrix.psi + node[2].psi = -ts3.boundary_secondary_separatrix.psi # COCOS change + node[2].levelset.r = copy(ts3.boundary_secondary_separatrix.outline.r) + node[2].levelset.z = copy(ts3.boundary_secondary_separatrix.outline.z) diff --git a/imas/ids_coordinates.py b/imas/ids_coordinates.py index 29e62a87..f8b4f59d 100644 --- a/imas/ids_coordinates.py +++ b/imas/ids_coordinates.py @@ -1,7 +1,6 @@ # This file is part of IMAS-Python. # You should have received the IMAS-Python LICENSE file with this project. -"""Logic for interpreting coordinates in an IDS. -""" +"""Logic for interpreting coordinates in an IDS.""" import logging from contextlib import contextmanager @@ -235,7 +234,9 @@ def __getitem__(self, key: int) -> Union["IDSPrimitive", np.ndarray]: f"matching sizes:\n{sizes}" ) if len(nonzero_alternatives) > 1: - logger.info("Multiple alternative coordinates are set, using the first") + logger.debug( + "Multiple alternative coordinates are set, using the first" + ) return nonzero_alternatives[0] # Handle alternative coordinates, currently (DD 3.38.1) the `coordinate in diff --git a/imas/ids_factory.py b/imas/ids_factory.py index cd88952d..b840d8a8 100644 --- a/imas/ids_factory.py +++ b/imas/ids_factory.py @@ -1,11 +1,14 @@ # This file is part of IMAS-Python. # You should have received the IMAS-Python LICENSE file with this project. -"""Tools for generating IDSs from a Data Dictionary version. -""" +"""Tools for generating IDSs from a Data Dictionary version.""" + +from __future__ import annotations import logging +import pathlib +from collections.abc import Iterable, Iterator from functools import partial -from typing import Any, Iterable, Iterator, List, Optional +from typing import Any from imas import dd_zip from imas.exception import IDSNameError @@ -27,7 +30,7 @@ class IDSFactory: """ def __init__( - self, version: Optional[str] = None, xml_path: Optional[str] = None + self, version: str | None = None, xml_path: str | pathlib.Path | None = None ) -> None: """Create a new IDS Factory @@ -77,7 +80,7 @@ def __iter__(self) -> Iterator[str]: """Iterate over the IDS names defined by the loaded Data Dictionary""" return iter(self._ids_elements) - def ids_names(self) -> List[str]: + def ids_names(self) -> list[str]: """Get a list of all known IDS names in the loaded Data Dictionary""" return list(self._ids_elements) diff --git a/imas/ids_identifiers.py b/imas/ids_identifiers.py index a64dd87f..1525a070 100644 --- a/imas/ids_identifiers.py +++ b/imas/ids_identifiers.py @@ -1,7 +1,6 @@ # This file is part of IMAS-Python. # You should have received the IMAS-Python LICENSE file with this project. -"""IMAS-Python module to support Data Dictionary identifiers. -""" +"""IMAS-Python module to support Data Dictionary identifiers.""" import logging from enum import Enum @@ -16,16 +15,18 @@ class IDSIdentifier(Enum): """Base class for all identifier enums.""" - def __new__(self, value: int, description: str): - obj = object.__new__(self) + def __new__(cls, value: int, description: str, aliases: list = []): + obj = object.__new__(cls) obj._value_ = value return obj - def __init__(self, value: int, description: str) -> None: + def __init__(self, value: int, description: str, aliases: list = []) -> None: self.index = value """Unique index for this identifier value.""" self.description = description """Description for this identifier value.""" + self.aliases = aliases + """Alternative names for this identifier value.""" def __eq__(self, other): if self is other: @@ -37,35 +38,49 @@ def __eq__(self, other): except (AttributeError, TypeError, ValueError): # Attribute doesn't exist, or failed to convert return NotImplemented + # Index must match if other_index == self.index: - # Name may be left empty - if other_name == self.name or other_name == "": + # Name may be left empty, or match name or alias + if ( + other_name == self.name + or other_name == "" + or other_name in self.aliases + ): # Description doesn't have to match, though we will warn when it doesn't - if other_description != self.description and other_description != "": + if other_description not in (self.description, ""): logger.warning( "Description of %r does not match identifier description %r", other.description, self.description, ) return True - else: - logger.warning( - "Name %r does not match identifier name %r, but indexes are equal.", - other.name, - self.name, - ) + + # If we get here with matching indexes but no name/alias match, warn + logger.warning( + "Name %r does not match identifier name %r, but indexes are equal.", + other.name, + self.name, + ) return False @classmethod def _from_xml(cls, identifier_name, xml) -> Type["IDSIdentifier"]: element = fromstring(xml) enum_values = {} + aliases = {} for int_element in element.iterfind("int"): name = int_element.get("name") value = int_element.text description = int_element.get("description") - enum_values[name] = (int(value), description) + # alias attribute may contain multiple comma-separated aliases + alias_attr = int_element.get("alias", "") + aliases = [a.strip() for a in alias_attr.split(",") if a.strip()] + # Canonical entry: use the canonical 'name' as key + enum_values[name] = (int(value), description, aliases) + # Also add alias names as enum *aliases* (they become enum attributes) + for alias in aliases: + enum_values[alias] = (int(value), description, aliases) # Create the enumeration enum = cls( identifier_name, diff --git a/imas/ids_structure.py b/imas/ids_structure.py index 27270034..fbc3042e 100644 --- a/imas/ids_structure.py +++ b/imas/ids_structure.py @@ -1,7 +1,6 @@ # This file is part of IMAS-Python. # You should have received the IMAS-Python LICENSE file with this project. -"""A structure in an IDS -""" +"""A structure in an IDS""" import logging from copy import deepcopy @@ -151,7 +150,9 @@ def __deepcopy__(self, memo): for child in self._children: if child in self.__dict__: child_copy = deepcopy(getattr(self, child), memo) - setattr(copy, child, child_copy) + # bypass __setattr__: + copy.__dict__[child] = child_copy + child_copy._parent = copy return copy def __dir__(self) -> List[str]: diff --git a/imas/ids_toplevel.py b/imas/ids_toplevel.py index 15ae0970..947bf72f 100644 --- a/imas/ids_toplevel.py +++ b/imas/ids_toplevel.py @@ -1,7 +1,6 @@ # This file is part of IMAS-Python. # You should have received the IMAS-Python LICENSE file with this project. -"""Represents a Top-level IDS (like ``core_profiles``, ``equilibrium``, etc) -""" +"""Represents a Top-level IDS (like ``core_profiles``, ``equilibrium``, etc)""" import logging import os @@ -12,11 +11,10 @@ import numpy import imas -from imas.backends.imas_core.imas_interface import ll_interface, lowlevel +from imas.backends.imas_core.imas_interface import lowlevel from imas.exception import ValidationError from imas.ids_base import IDSDoc from imas.ids_defs import ( - ASCII_BACKEND, ASCII_SERIALIZER_PROTOCOL, CHAR_DATA, DEFAULT_SERIALIZER_PROTOCOL, @@ -47,19 +45,12 @@ def _serializer_tmpdir() -> str: def _create_serialization_dbentry(filepath: str, dd_version: str) -> "DBEntry": """Create a temporary DBEntry for use in the ASCII serialization protocol.""" - if ll_interface._al_version.major == 4: # AL4 compatibility - dbentry = imas.DBEntry( - ASCII_BACKEND, "serialize", 1, 1, "serialize", dd_version=dd_version - ) - dbentry.create(options=f"-fullpath {filepath}") - return dbentry - else: # AL5 - path = Path(filepath) - return imas.DBEntry( - f"imas:ascii?path={path.parent};filename={path.name}", - "w", - dd_version=dd_version, - ) + path = Path(filepath) + return imas.DBEntry( + f"imas:ascii?path={path.parent};filename={path.name}", + "w", + dd_version=dd_version, + ) class IDSToplevel(IDSStructure): diff --git a/imas/test/test_cli.py b/imas/test/test_cli.py index 6ff09c23..0f4b305e 100644 --- a/imas/test/test_cli.py +++ b/imas/test/test_cli.py @@ -2,10 +2,7 @@ import pytest from click.testing import CliRunner -from packaging.version import Version -from imas.backends.imas_core.imas_interface import has_imas -from imas.backends.imas_core.imas_interface import ll_interface from imas.command.cli import print_version from imas.command.db_analysis import analyze_db, process_db_analysis from imas.db_entry import DBEntry @@ -20,13 +17,7 @@ def test_imas_version(): @pytest.mark.cli -@pytest.mark.skipif( - not has_imas or ll_interface._al_version < Version("5.0"), - reason="Needs AL >= 5 AND Requires IMAS Core.", -) -def test_db_analysis( - tmp_path, -): +def test_db_analysis(tmp_path, requires_imas): # This only tests the happy flow, error handling is not tested db_path = tmp_path / "test_db_analysis" with DBEntry(f"imas:hdf5?path={db_path}", "w") as entry: @@ -48,3 +39,64 @@ def test_db_analysis( ) assert process_result.exit_code == 0, process_result.output assert "core_profiles" in process_result.output + + +@pytest.mark.cli +def test_db_analysis_csv(tmp_path, requires_imas): + with DBEntry(f"imas:hdf5?path={tmp_path}/entry1", "w") as entry: + eq = entry.factory.equilibrium() + eq.ids_properties.homogeneous_time = 2 + entry.put(eq) + eq.ids_properties.comment = "filled" + entry.put(eq, 1) + eq.ids_properties.homogeneous_time = 1 + eq.time = [1.0] + eq.time_slice.resize(1) + eq.time_slice[0].boundary.psi = 1.0 + eq.time_slice[0].boundary.psi_error_upper = 0.1 + entry.put(eq, 2) + wall = entry.factory.wall() + wall.ids_properties.homogeneous_time = 2 + entry.put(wall) + wall.first_wall_surface_area = 1.0 + entry.put(wall, 1) + with DBEntry(f"imas:hdf5?path={tmp_path}/entry2", "w") as entry: + eq = entry.factory.equilibrium() + eq.ids_properties.homogeneous_time = 2 + eq.ids_properties.comment = "also filled" + entry.put(eq) + + runner = CliRunner() + with runner.isolated_filesystem(temp_dir=tmp_path) as td: + analyze_result = runner.invoke( + analyze_db, [f"{tmp_path}/entry1", f"{tmp_path}/entry2"] + ) + assert analyze_result.exit_code == 0 + + outfile = Path(td) / "imas-db-analysis.json.gz" + assert outfile.exists() + process_result = runner.invoke( + process_db_analysis, [str(outfile), "--csv", "output.csv"] + ) + assert process_result.exit_code == 0 + + assert ( + Path("output.csv").read_text() + == """\ +IDS,Path in IDS,Uses errorbar,Frequency (without occurrences),Frequency (with occurences) +equilibrium,,,1.0, +equilibrium,ids_properties/comment,,1.0,0.75 +equilibrium,ids_properties/homogeneous_time,,1.0,1.0 +equilibrium,ids_properties/version_put/access_layer,,1.0,1.0 +equilibrium,ids_properties/version_put/access_layer_language,,1.0,1.0 +equilibrium,ids_properties/version_put/data_dictionary,,1.0,1.0 +equilibrium,time,,0.5,0.25 +equilibrium,time_slice/boundary/psi,X,0.5,0.25 +wall,,,0.5, +wall,first_wall_surface_area,,1.0,0.5 +wall,ids_properties/homogeneous_time,,1.0,1.0 +wall,ids_properties/version_put/access_layer,,1.0,1.0 +wall,ids_properties/version_put/access_layer_language,,1.0,1.0 +wall,ids_properties/version_put/data_dictionary,,1.0,1.0 +""" # noqa: E501 (line too long) + ) diff --git a/imas/test/test_dbentry.py b/imas/test/test_dbentry.py index a1380101..e13d82a4 100644 --- a/imas/test/test_dbentry.py +++ b/imas/test/test_dbentry.py @@ -2,7 +2,6 @@ import imas import imas.ids_defs -from imas.backends.imas_core.imas_interface import has_imas, ll_interface from imas.exception import UnknownDDVersion from imas.test.test_helpers import compare_children, open_dbentry @@ -23,11 +22,7 @@ def test_dbentry_contextmanager(requires_imas): assert entry2._dbe_impl is None -@pytest.mark.skipif( - not has_imas or ll_interface._al_version.major < 5, - reason="URI API not available", -) -def test_dbentry_contextmanager_uri(tmp_path): +def test_dbentry_contextmanager_uri(tmp_path, requires_imas): entry = imas.DBEntry(f"imas:ascii?path={tmp_path}/testdb", "w") ids = entry.factory.core_profiles() ids.ids_properties.homogeneous_time = 0 diff --git a/imas/test/test_helpers.py b/imas/test/test_helpers.py index f398d03f..cb6d107d 100644 --- a/imas/test/test_helpers.py +++ b/imas/test/test_helpers.py @@ -19,7 +19,7 @@ from imas.ids_struct_array import IDSStructArray from imas.ids_structure import IDSStructure from imas.ids_toplevel import IDSToplevel -from imas.util import idsdiffgen, visit_children +from imas.util import idsdiffgen, tree_iter logger = logging.getLogger(__name__) @@ -117,57 +117,60 @@ def maybe_set_random_value( primitive.value = random_data(primitive.metadata.data_type, ndim) return - shape = [] - for dim, coordinate in enumerate(primitive.metadata.coordinates): - same_as = primitive.metadata.coordinates_same_as[dim] - if not coordinate.has_validation and not same_as.has_validation: - if primitive.metadata.name.endswith("_error_upper"): - # _error_upper should only be filled when is - name = primitive.metadata.name[: -len("_error_upper")] - data = primitive._parent[name] - if not data.has_value: - return - size = data.shape[dim] - elif primitive.metadata.name.endswith("_error_lower"): - # _error_lower should only be filled when _error_upper is - name = primitive.metadata.name[: -len("_error_lower")] + "_error_upper" - data = primitive._parent[name] - if not data.has_value: + for dim, same_as in enumerate(primitive.metadata.coordinates_same_as): + if same_as.references: + try: + ref_elem = same_as.references[0].goto(primitive) + if len(ref_elem.shape) <= dim or ref_elem.shape[dim] == 0: return - size = data.shape[dim] - else: + except (ValueError, AttributeError, IndexError, RuntimeError): + return + + shape = [] + if primitive.metadata.name.endswith("_error_upper"): + name = primitive.metadata.name[: -len("_error_upper")] + data = primitive._parent[name] + if not data.has_value: + return + shape = list(data.shape) + elif primitive.metadata.name.endswith("_error_lower"): + name = primitive.metadata.name[: -len("_error_lower")] + "_error_upper" + data = primitive._parent[name] + if not data.has_value: + return + shape = list(data.shape) + else: + for dim, coordinate in enumerate(primitive.metadata.coordinates): + same_as = primitive.metadata.coordinates_same_as[dim] + + if not coordinate.has_validation and not same_as.has_validation: # we can independently choose a size for this dimension: size = random.randint(1, 6) - elif coordinate.references or same_as.references: - try: - if coordinate.references: - refs = [ref.goto(primitive) for ref in coordinate.references] - filled_refs = [ref for ref in refs if len(ref) > 0] - assert len(filled_refs) in (0, 1) - coordinate_element = filled_refs[0] if filled_refs else refs[0] - else: - coordinate_element = same_as.references[0].goto(primitive) - except (ValueError, AttributeError): - # Ignore invalid coordinate specs - coordinate_element = np.ones((1,) * 6) - - if len(coordinate_element) == 0: - # Scale chance of not setting a coordinate by our number of dimensions, - # such that overall there is roughly a 50% chance that any coordinate - # remains empty - maybe_set_random_value(coordinate_element, 0.5**ndim, skip_complex) - size = coordinate_element.shape[0 if coordinate.references else dim] - - if coordinate.size: # coordinateX = OR 1...1 - # Coin flip whether to use the size as determined by - # coordinate.references, or the size from coordinate.size - if random.random() < 0.5: - size = coordinate.size - else: - size = coordinate.size - if size == 0: - return # Leave empty - shape.append(size) + elif coordinate.references or same_as.references: + try: + if coordinate.references: + refs = [ref.goto(primitive) for ref in coordinate.references] + filled_refs = [ref for ref in refs if len(ref) > 0] + assert len(filled_refs) in (0, 1) + coordinate_element = filled_refs[0] if filled_refs else refs[0] + else: + coordinate_element = same_as.references[0].goto(primitive) + except (ValueError, AttributeError, IndexError): + # Ignore invalid coordinate specs or empty array references + coordinate_element = np.ones((1,) * 6) + + if len(coordinate_element) == 0: + maybe_set_random_value(coordinate_element, 0.5**ndim, skip_complex) + size = coordinate_element.shape[0 if coordinate.references else dim] + + if coordinate.size: # coordinateX = OR 1...1 + if random.random() < 0.5: + size = coordinate.size + else: + size = coordinate.size + if size == 0: + return # Leave empty + shape.append(size) if primitive.metadata.data_type is IDSDataType.STR: primitive.value = [random_string() for i in range(shape[0])] @@ -298,21 +301,29 @@ def fill_consistent( def unset_coordinate(coordinate): + def unset(element): + # Unset element value + element.value = [] + # But also its errorbars (if they exist) + try: + element._parent[element.metadata.name + "_error_upper"].value = [] + element._parent[element.metadata.name + "_error_lower"].value = [] + except AttributeError: + pass # Ignore when element has no errorbars + # Unset the coordinate quantity - coordinate.value = [] + unset(coordinate) # Find all elements that also have this as a coordinate and unset... parent = coordinate._dd_parent while parent.metadata.data_type is not IDSDataType.STRUCT_ARRAY: parent = parent._dd_parent - def callback(element): + for element in tree_iter(parent): if hasattr(element, "coordinates") and element.has_value: for ele_coor in element.coordinates: if ele_coor is coordinate: - element.value = [] - return - - visit_children(callback, parent) + unset(element) + break def compare_children(st1, st2, deleted_paths=set(), accept_lazy=False): diff --git a/imas/test/test_identifiers.py b/imas/test/test_identifiers.py index 263a6ccf..119e0e88 100644 --- a/imas/test/test_identifiers.py +++ b/imas/test/test_identifiers.py @@ -1,9 +1,18 @@ -import pytest +import importlib.metadata +from packaging.version import Version +import pytest from imas.dd_zip import dd_identifiers from imas.ids_factory import IDSFactory from imas.ids_identifiers import IDSIdentifier, identifiers +has_aliases = Version(importlib.metadata.version("imas_data_dictionaries")) >= Version( + "4.1.0" +) +requires_aliases = pytest.mark.skipif( + not has_aliases, reason="Requires DD 4.1.0 for identifier aliases" +) + def test_list_identifiers(): assert identifiers.identifiers == dd_identifiers() @@ -70,6 +79,66 @@ def test_identifier_struct_assignment(caplog): assert source.identifier != csid.total +def test_identifiers_with_aliases(): + # Custom identifier XML, based on materials identifier, with some more features + custom_identifier_xml = """\ + + +
+Materials used in the device mechanical structures +
+20 +21 +22 +23 +
+""" + identifier = IDSIdentifier._from_xml("custom_identifier", custom_identifier_xml) + + assert len(identifier) == 4 + + # no aliases + assert identifier.Diamond.aliases == [] + # 1 alias + assert identifier["235U"] is identifier.U_235 + assert identifier["235U"].aliases == ["U_235"] + # 3 aliases + assert ( + identifier.CxHy + is identifier.alias1 + is identifier.alias2 + is identifier["3alias"] + ) + assert identifier.CxHy.aliases == ["alias1", "alias2", "3alias"] + + +@requires_aliases +def test_identifier_struct_assignment_with_aliases(): + """Test identifier struct assignment with aliases using materials_identifier.""" + mid = identifiers.materials_identifier + + # Create an actual IDS structure + wallids = IDSFactory().wall() + wallids.description_ggd.resize(1) + wallids.description_ggd[0].material.resize(1) + wallids.description_ggd[0].material[0].grid_subset.resize(1) + mat = wallids.description_ggd[0].material[0].grid_subset[0].identifiers + mat.names.extend([""] * 1) + mat.indices.resize(1) + mat.descriptions.extend([""] * 1) + mat.names[0] = mid.U_235.name + mat.indices[0] = 20 + mat.descriptions[0] = "Uranium 235 isotope" + + # Basic attribute checks + assert mat.names[0] == mid["235U"].name + assert mat.indices[0] == mid.U_235.index + + # Modify material properties and test equality + mat.names[0] = "some_name" + assert mat.names[0] != mid.U_235.name + + def test_identifier_aos_assignment(): cfid = identifiers.pf_active_coil_function_identifier pfa = IDSFactory("3.39.0").pf_active() @@ -103,3 +172,124 @@ def test_invalid_identifier_assignment(): with pytest.raises(ValueError): # negative identifiers are reserved for user-defined identifiers cs.source[0].identifier = -1 + + +@requires_aliases +def test_identifier_aliases(): + """Test identifier enum aliases functionality.""" + mid = identifiers.materials_identifier + + # Test that alias points to the same object as the canonical name + assert mid.U_235 is mid["235U"] + assert mid.U_238 is mid["238U"] + assert mid.In_115 is mid["115In"] + assert mid.He_4 is mid["4He"] + + # Test that both name and alias have the same properties + assert mid.U_235.name == "235U" + assert mid.U_235.index == mid["235U"].index + assert mid.U_235.description == mid["235U"].description + assert "U_235" in mid.U_235.aliases + assert isinstance(mid.U_235.aliases, list) + + # Test accessing by any alias via bracket notation + for alias in mid.U_235.aliases: + assert mid[alias] is mid.U_235 + + +@requires_aliases +def test_identifier_alias_equality(): + """Test that identifiers with aliases are equal when comparing names and aliases.""" + mid = identifiers.materials_identifier + target = mid.U_235 + + # Test equality with canonical name + wallids = IDSFactory().wall() + wallids.description_ggd.resize(1) + wallids.description_ggd[0].material.resize(1) + wallids.description_ggd[0].material[0].grid_subset.resize(1) + mat = wallids.description_ggd[0].material[0].grid_subset[0].identifiers + mat.names.extend([""] * 1) + mat.names[0] = "235U" + assert mat.names[0] == target.name + + # Test equality with alias name + wallids2 = IDSFactory().wall() + wallids2.description_ggd.resize(1) + wallids2.description_ggd[0].material.resize(1) + wallids2.description_ggd[0].material[0].grid_subset.resize(1) + mat2 = wallids2.description_ggd[0].material[0].grid_subset[0].identifiers + mat2.names.extend([""] * 1) + mat2.names[0] = mid["U_235"].name # Use alias as name + assert mat2.names[0] == target.name + + # Test inequality when material has alias not matching canonical name + wallids3 = IDSFactory().wall() + wallids3.description_ggd.resize(1) + wallids3.description_ggd[0].material.resize(1) + wallids3.description_ggd[0].material[0].grid_subset.resize(1) + mat3 = wallids3.description_ggd[0].material[0].grid_subset[0].identifiers + mat3.names.extend([""] * 1) + mat3.names[0] = "test_name" + assert mat3.names[0] != target.name + + # Test equality when index doesn't match + wallids4 = IDSFactory().wall() + wallids4.description_ggd.resize(1) + wallids4.description_ggd[0].material.resize(1) + wallids4.description_ggd[0].material[0].grid_subset.resize(1) + mat4 = wallids4.description_ggd[0].material[0].grid_subset[0].identifiers + mat4.names.extend([""] * 1) + mat4.indices.resize(1) + mat4.names[0] = "235U" + mat4.indices[0] = 999 + assert mat4.indices[0] != target.index + assert mat4.names[0] == target.name + + # Test equality for multiple names,indices and descriptions + wallids5 = IDSFactory().wall() + wallids5.description_ggd.resize(1) + wallids5.description_ggd[0].material.resize(1) + wallids5.description_ggd[0].material[0].grid_subset.resize(1) + mat5 = wallids5.description_ggd[0].material[0].grid_subset[0].identifiers + mat5.names.extend([""] * 3) + mat5.indices.resize(3) + mat5.descriptions.extend([""] * 3) + mat5.names[0] = "235U" + mat5.names[1] = "238U" + mat5.names[2] = mid.U_235.name # Use alias as name + mat5.indices[0] = 20 + mat5.indices[1] = 21 + mat5.indices[2] = 20 + mat5.descriptions[0] = "Uranium 235 isotope" + mat5.descriptions[1] = "Uranium 238 isotope" + mat5.descriptions[2] = "Uranium 235 isotope" + + assert mat5.names[0] == mid["235U"].name + assert mat5.names[1] == mid["238U"].name + assert mat5.names[2] == mid["U_235"].name + assert mat5.indices[0] == mid["235U"].index + assert mat5.indices[1] == mid["238U"].index + assert mat5.indices[2] == mid["U_235"].index + assert mat5.descriptions[0] == mid["235U"].description + assert mat5.descriptions[1] == mid["238U"].description + assert mat5.descriptions[2] == mid["U_235"].description + + +@requires_aliases +def test_identifier_alias_equality_non_ggd(): + """Test identifier aliases functionality on non-ggd material""" + mid = identifiers.materials_identifier + + summary_ids = IDSFactory().summary() + summary_ids.wall.material = mid.U_235 # Use alias as enum + assert summary_ids.wall.material == mid["235U"] + assert summary_ids.wall.material == mid["U_235"] + + summary_ids.wall.material.name = "U_235" # Use alias as name + assert summary_ids.wall.material == mid["235U"] + assert summary_ids.wall.material == mid["U_235"] + + summary_ids.wall.material.name = "235U" # Use canonical name + assert summary_ids.wall.material == mid["235U"] + assert summary_ids.wall.material == mid["U_235"] diff --git a/imas/test/test_ids_convert.py b/imas/test/test_ids_convert.py index f2b9b7f7..b79fb1ba 100644 --- a/imas/test/test_ids_convert.py +++ b/imas/test/test_ids_convert.py @@ -12,12 +12,14 @@ from imas import identifiers from imas.ids_convert import ( + _3to4_sign_flip_paths, _get_ctxpath, _get_tbp, convert_ids, dd_version_map_from_factories, iter_parents, ) +from imas.ids_data_type import IDSDataType from imas.ids_defs import ( ASCII_BACKEND, IDS_TIME_MODE_HETEROGENEOUS, @@ -481,3 +483,216 @@ def test_3to4_pulse_schedule_fuzz(): fill_consistent(ps) convert_ids(ps, "4.0.0") + + +def test_3to4_migrate_deprecated_fields(): # GH#55 + # Test j_phi -> j_tor rename + eq342 = IDSFactory("3.42.0").equilibrium() + eq342.ids_properties.homogeneous_time = IDS_TIME_MODE_HOMOGENEOUS + eq342.time = [0.0] + eq342.time_slice.resize(1) + eq342.time_slice[0].profiles_1d.j_tor = [0.3, 0.2, 0.1] + eq342.time_slice[0].profiles_1d.j_tor_error_upper = [1.0] + eq342.time_slice[0].profiles_1d.j_tor_error_lower = [2.0] + eq342.time_slice[0].profiles_1d.psi = [1.0, 0.5, 0.0] + + # Basic case, check that j_tor (although deprecated) is migrated to j_phi: + eq4 = convert_ids(eq342, "4.0.0") + assert array_equal(eq4.time_slice[0].profiles_1d.j_phi.value, [0.3, 0.2, 0.1]) + assert array_equal(eq4.time_slice[0].profiles_1d.j_phi_error_upper.value, [1.0]) + assert array_equal(eq4.time_slice[0].profiles_1d.j_phi_error_lower.value, [2.0]) + + # When both j_tor and j_phi are present in the source IDS, we expect that j_phi + # takes precedence. This is a happy accident with how the DD defines both attributes + eq342.time_slice[0].profiles_1d.j_phi = [0.6, 0.4, 0.2] + eq4 = convert_ids(eq342, "4.0.0") + assert array_equal(eq4.time_slice[0].profiles_1d.j_phi.value, [0.6, 0.4, 0.2]) + + # Just to be sure, when j_tor has no value, it should also still work + del eq342.time_slice[0].profiles_1d.j_tor + eq4 = convert_ids(eq342, "4.0.0") + assert array_equal(eq4.time_slice[0].profiles_1d.j_phi.value, [0.6, 0.4, 0.2]) + + # Same applies to label -> name renames + cp342 = IDSFactory("3.42.0").core_profiles() + cp342.ids_properties.homogeneous_time = IDS_TIME_MODE_HOMOGENEOUS + cp342.time = [0.0] + cp342.profiles_1d.resize(1) + cp342.profiles_1d[0].ion.resize(1) + cp342.profiles_1d[0].ion[0].label = "x" + + cp4 = convert_ids(cp342, "4.0.0") + assert cp4.profiles_1d[0].ion[0].name == "x" + + cp342.profiles_1d[0].ion[0].name = "y" + cp4 = convert_ids(cp342, "4.0.0") + assert cp4.profiles_1d[0].ion[0].name == "y" + + del cp342.profiles_1d[0].ion[0].label + cp4 = convert_ids(cp342, "4.0.0") + assert cp4.profiles_1d[0].ion[0].name == "y" + + +def test_3to4_name_identifier_mapping_magnetics(): + # Create source IDS using DD 3.40.1 + factory = IDSFactory("3.40.1") + + src = factory.magnetics() + src.ids_properties.homogeneous_time = IDS_TIME_MODE_HOMOGENEOUS + # Populate a parent that has name + identifier (no 'index' sibling) + src.b_field_pol_probe.resize(1) + src.b_field_pol_probe[0].name = "TEST_NAME" + src.b_field_pol_probe[0].identifier = "TEST_IDENTIFIER" + + # Convert to DD 4.0.0 + dst = convert_ids(src, "4.0.0") + + # DD3 name -> DD4 description + assert dst.b_field_pol_probe[0].description == "TEST_NAME" + + # DD3 identifier -> DD4 name + assert dst.b_field_pol_probe[0].name == "TEST_IDENTIFIER" + + +def test_4to3_name_identifier_mapping_magnetics(): + # Create source IDS using DD 4.0.0 + factory = IDSFactory("4.0.0") + + src = factory.magnetics() + src.ids_properties.homogeneous_time = IDS_TIME_MODE_HOMOGENEOUS + # Populate a parent that has description + name (no 'index' sibling) + src.b_field_pol_probe.resize(1) + src.b_field_pol_probe[0].description = "TEST_DESCRIPTION" + src.b_field_pol_probe[0].name = "TEST_NAME" + + # Convert to DD 3.40.1 + dst = convert_ids(src, "3.40.1") + + # DD4 description -> DD3 name + assert dst.b_field_pol_probe[0].name == "TEST_DESCRIPTION" + + # DD4 name -> DD3 identifier + assert dst.b_field_pol_probe[0].identifier == "TEST_NAME" + + +def test_3to4_cocos_hardcoded_paths(): + # Check for existence in 3.42.0 + factory = IDSFactory("3.42.0") + for ids_name, paths in _3to4_sign_flip_paths.items(): + ids = factory.new(ids_name) + for path in paths: + # Check path exists and is not a FLT + metadata = ids.metadata[path] + assert metadata.data_type is IDSDataType.FLT + + # Test a conversion + eq = factory.equilibrium() + eq.time_slice.resize(1) + eq.time_slice[0].boundary.psi = 3.141 + + eq4 = convert_ids(eq, "4.0.0") + assert eq4.time_slice[0].boundary.psi == -3.141 + + +def test_3to4_equilibrium_boundary(): + eq342 = IDSFactory("3.42.0").equilibrium() + eq342.time_slice.resize(5) + + for i, ts in enumerate(eq342.time_slice): + # Always fill boundary and magnetic axis + ts.boundary.psi = 1 + ts.boundary.outline.r = [1.0, 3.0, 2.0, 1.0] + ts.boundary.outline.z = [1.0, 2.0, 3.0, 1.0] + ts.global_quantities.psi_axis = 1.0 + ts.global_quantities.magnetic_axis.r = 2.0 + ts.global_quantities.magnetic_axis.z = 2.0 + + if i > 0: + # Fill separatrix + ts.boundary_separatrix.psi = -1.0 + # Use limiter for time_slice[1], otherwise divertor: + if i == 1: + ts.boundary_separatrix.type = 0 + ts.boundary_separatrix.active_limiter_point.r = 3.0 + ts.boundary_separatrix.active_limiter_point.z = 2.0 + else: + ts.boundary_separatrix.type = 1 + ts.boundary_separatrix.outline.r = [1.0, 3.0, 2.0, 1.0] + ts.boundary_separatrix.outline.z = [1.0, 2.0, 3.0, 1.0] + ts.boundary_separatrix.x_point.resize(1) + ts.boundary_separatrix.x_point[0].r = 1.0 + ts.boundary_separatrix.x_point[0].z = 1.0 + # These are not part of the conversion: + ts.boundary_separatrix.strike_point.resize(2) + ts.boundary_separatrix.closest_wall_point.r = 1.0 + ts.boundary_separatrix.closest_wall_point.z = 1.0 + ts.boundary_separatrix.closest_wall_point.distance = 0.2 + ts.boundary_separatrix.dr_dz_zero_point.r = 3.0 + ts.boundary_separatrix.dr_dz_zero_point.z = 2.0 + ts.boundary_separatrix.gap.resize(1) + if i == 3: + # Fill second_separatrix + ts.boundary_secondary_separatrix.psi = -1.1 + # Use limiter for time_slice[1], otherwise divertor: + ts.boundary_secondary_separatrix.outline.r = [0.9, 3.1, 2.1, 0.9] + ts.boundary_secondary_separatrix.outline.z = [0.9, 2.1, 3.1, 0.9] + ts.boundary_secondary_separatrix.x_point.resize(1) + ts.boundary_secondary_separatrix.x_point[0].r = 2.1 + ts.boundary_secondary_separatrix.x_point[0].z = 3.1 + # These are not part of the conversion: + ts.boundary_secondary_separatrix.distance_inner_outer = 0.1 + ts.boundary_secondary_separatrix.strike_point.resize(2) + if i == 4: + ts.boundary_separatrix.x_point.resize(2, keep=True) + ts.boundary_separatrix.x_point[1].r = 2.0 + ts.boundary_separatrix.x_point[1].z = 3.0 + + eq4 = convert_ids(eq342, "4.0.0") + assert len(eq4.time_slice) == 5 + for i, ts in enumerate(eq4.time_slice): + node = ts.contour_tree.node + assert len(node) == [1, 2, 2, 3, 3][i] + # Test magnetic axis + assert node[0].critical_type == 0 + assert node[0].r == node[0].z == 2.0 + assert node[0].psi == -1.0 + assert len(node[0].levelset.r) == len(node[0].levelset.z) == 0 + # boundary_separatrix + if i == 1: # node[1] is boundary for limiter plasma + assert node[1].critical_type == 2 + assert node[1].r == 3.0 + assert node[1].z == 2.0 + elif i > 1: # node[1] is boundary for divertor plasma + assert node[1].critical_type == 1 + assert node[1].r == node[1].z == 1.0 + if i > 0: + assert node[1].psi == 1.0 + assert numpy.array_equal(node[1].levelset.r, [1.0, 3.0, 2.0, 1.0]) + assert numpy.array_equal(node[1].levelset.z, [1.0, 2.0, 3.0, 1.0]) + # boundary_secondary_separatrix + if i == 3: + assert node[2].critical_type == 1 + assert node[2].r == 2.1 + assert node[2].z == 3.1 + assert node[2].psi == 1.1 + assert numpy.array_equal(node[2].levelset.r, [0.9, 3.1, 2.1, 0.9]) + assert numpy.array_equal(node[2].levelset.z, [0.9, 2.1, 3.1, 0.9]) + # Second x-point from boundary_separatrix + if i == 4: + assert node[2].critical_type == 1 + assert node[2].r == 2.0 + assert node[2].z == 3.0 + assert node[2].psi == node[1].psi == 1.0 + # Levelset is only filled for the main x-point (node[1]) + assert not node[2].levelset.r.has_value + assert not node[2].levelset.z.has_value + + # not deepcopied, should share numpy arrays + slice1_outline_r = eq342.time_slice[1].boundary_separatrix.outline.r.value + assert slice1_outline_r is eq4.time_slice[1].contour_tree.node[1].levelset.r.value + + # deepcopy should create a copy of the numpy arrays + eq4_cp = convert_ids(eq342, "4.0.0", deepcopy=True) + assert not numpy.may_share_memory( + slice1_outline_r, eq4_cp.time_slice[1].contour_tree.node[1].levelset.r.value + ) diff --git a/imas/test/test_lazy_loading.py b/imas/test/test_lazy_loading.py index ff241016..4a7c65ca 100644 --- a/imas/test/test_lazy_loading.py +++ b/imas/test/test_lazy_loading.py @@ -224,10 +224,12 @@ def test_lazy_load_with_new_structure(requires_imas): eq.time_slice.resize(1) dbentry.put(eq) - entry2 = DBEntry(MEMORY_BACKEND, "ITER", 1, 1, data_version="3", dd_version="4.0.0") + entry2 = DBEntry( + MEMORY_BACKEND, "ITER", 1, 1, data_version="3", dd_version="3.39.0" + ) entry2.open() lazy_eq = entry2.get("equilibrium", lazy=True) - assert not lazy_eq.time_slice[0].boundary.dr_dz_zero_point.r.has_value + assert not lazy_eq.time_slice[0].boundary_separatrix.dr_dz_zero_point.r.has_value def test_lazy_load_multiple_ids(backend, worker_id, tmp_path): diff --git a/imas/test/test_time_slicing.py b/imas/test/test_time_slicing.py index 60788689..21c689f4 100644 --- a/imas/test/test_time_slicing.py +++ b/imas/test/test_time_slicing.py @@ -3,18 +3,15 @@ """ import logging -import os import numpy as np import pytest -from imas.backends.imas_core.mdsplus_model import ensure_data_dir, mdsplus_model_dir from imas.ids_defs import ( ASCII_BACKEND, CLOSEST_INTERP, IDS_TIME_MODE_HETEROGENEOUS, IDS_TIME_MODE_HOMOGENEOUS, - MDSPLUS_BACKEND, ) from imas.ids_factory import IDSFactory from imas.test.test_helpers import open_dbentry @@ -97,10 +94,6 @@ def test_hli_time_slicing_put(backend, worker_id, tmp_path, time_mode): else: pulse = int(worker_id[2:]) + 1 - # ensure presence of mdsplus model dir - if backend == MDSPLUS_BACKEND: - os.environ["ids_path"] = mdsplus_model_dir(IDSFactory()) - ensure_data_dir(str(tmp_path), "test", "3", 9999) db_entry = imas.DBEntry(backend, "test", pulse, 9999, user_name=str(tmp_path)) status, ctx = db_entry.create() if status != 0: diff --git a/imas/training.py b/imas/training.py index 9c4df602..6effcc5b 100644 --- a/imas/training.py +++ b/imas/training.py @@ -1,9 +1,7 @@ # This file is part of IMAS-Python. # You should have received the IMAS-Python LICENSE file with this project. -"""Functions that are useful for the IMAS-Python training courses. -""" +"""Functions that are useful for the IMAS-Python training courses.""" -import importlib from unittest.mock import patch try: @@ -12,34 +10,17 @@ from importlib_resources import files import imas -from imas.backends.imas_core.imas_interface import ll_interface -def _initialize_training_db(DBEntry_cls): +def get_training_db_entry() -> imas.DBEntry: + """Open and return an ``imas.DBEntry`` pointing to the training data.""" assets_path = files(imas) / "assets/" - pulse, run, user, database = 134173, 106, "public", "ITER" - if ll_interface._al_version.major == 4: - entry = DBEntry_cls(imas.ids_defs.ASCII_BACKEND, database, pulse, run, user) - entry.open(options=f"-prefix {assets_path}/") - else: - entry = DBEntry_cls(f"imas:ascii?path={assets_path}", "r") + entry = imas.DBEntry(f"imas:ascii?path={assets_path}", "r") - output_entry = DBEntry_cls(imas.ids_defs.MEMORY_BACKEND, database, pulse, run) - output_entry.create() + output_entry = imas.DBEntry("imas:memory?path=/", "w") for ids_name in ["core_profiles", "equilibrium"]: - ids = entry.get(ids_name) + ids = entry.get(ids_name, autoconvert=False) with patch.dict("os.environ", {"IMAS_AL_DISABLE_VALIDATE": "1"}): - output_entry.put(ids) + output_entry.put(imas.convert_ids(ids, output_entry.dd_version)) entry.close() return output_entry - - -def get_training_db_entry() -> imas.DBEntry: - """Open and return an ``imas.DBEntry`` pointing to the training data.""" - return _initialize_training_db(imas.DBEntry) - - -def get_training_imas_db_entry(): - """Open and return an ``imas.DBEntry`` pointing to the training data.""" - imas = importlib.import_module("imas") - return _initialize_training_db(imas.DBEntry) diff --git a/pyproject.toml b/pyproject.toml index 066e0ea9..ed3f964e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -64,6 +64,7 @@ dependencies = [ "packaging", "xxhash >= 2", "imas_data_dictionaries", + "imas_core" ] [project.optional-dependencies]