From 241adc4a2179714a32379f4006e3cd6c83592307 Mon Sep 17 00:00:00 2001 From: Sameeul B Samee Date: Sat, 7 Feb 2026 04:52:50 -0500 Subject: [PATCH 1/5] feat: Add zarr v3 format support with zarr-python v3 library Migrate from zarr-python v2 to v3 library (zarr>=3) and add support for reading/writing both zarr v2 and v3 on-disk formats. Adds backend="zarr3" option with Zarr3Reader/Zarr3Writer classes, detect_zarr_format() utility for auto-detection, and filesystem-based _list_zarr_children() fallback for v2 store enumeration under the new library. Co-Authored-By: Claude Opus 4.6 --- pyproject.toml | 4 +- src/bfio/backends.py | 274 ++++++++++++++++++++++++++++++++++++--- src/bfio/bfio.py | 72 ++++++++++- src/bfio/ts_backends.py | 46 +++++-- src/bfio/utils.py | 52 ++++++++ tests/test_zarr_v3.py | 280 ++++++++++++++++++++++++++++++++++++++++ 6 files changed, 690 insertions(+), 38 deletions(-) create mode 100644 tests/test_zarr_v3.py diff --git a/pyproject.toml b/pyproject.toml index dc8556e..461fc92 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ requires = [ "imagecodecs>=2021.2.26", "numpy", "ome-types>=0.4.2", - "zarr>=2.6.1,<3", + "zarr>=3", "scyjava", "jpype1", "tifffile>=2022.8.12", @@ -23,7 +23,7 @@ dependencies = [ "imagecodecs>=2021.2.26", "numpy", "ome-types>=0.4.2", - "zarr>=2.6.1,<3", + "zarr>=3", "scyjava", "jpype1", "tifffile>=2022.8.12", diff --git a/src/bfio/backends.py b/src/bfio/backends.py index 67c60bb..881e0c8 100644 --- a/src/bfio/backends.py +++ b/src/bfio/backends.py @@ -20,7 +20,7 @@ # bfio internals from bfio import __version__ as version import bfio.base_classes -from bfio.utils import start, clean_ome_xml_for_known_issues, pixels_per_cm +from bfio.utils import start, clean_ome_xml_for_known_issues, pixels_per_cm, detect_zarr_format logging.basicConfig( format="%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s", @@ -1211,6 +1211,30 @@ def __init__(self, frontend): import zarr from numcodecs import Blosc + def _list_zarr_children(path, child_type="array"): + """Filesystem-based fallback for enumerating zarr v2 store children. + + In zarr-python v3, array_keys()/group_keys() may not reliably + enumerate v2 format stores. This function checks subdirectories + for .zarray (arrays) or .zgroup (groups) marker files. + + Args: + path: Path to the zarr group directory (str or Path). + child_type: "array" to find arrays (.zarray), "group" to find + groups (.zgroup). + + Returns: + Sorted list of child names. + """ + p = Path(path) + marker = ".zarray" if child_type == "array" else ".zgroup" + children = [] + if p.is_dir(): + for child in p.iterdir(): + if child.is_dir() and (child / marker).exists(): + children.append(child.name) + return sorted(children) + class ZarrReader(bfio.base_classes.AbstractReader): logger = logging.getLogger("bfio.backends.ZarrReader") @@ -1224,34 +1248,40 @@ def __init__(self, frontend): self._root = zarr.open( str(self.frontend._file_path.resolve()), mode="r" ) - except zarr.errors.PathNotFoundError: + except (FileNotFoundError, KeyError): # a workaround for pre-compute slide output directory structure data_zarr_path = str(self.frontend._file_path.resolve()) + "/data.zarr" self._root = zarr.open(data_zarr_path, mode="r") + store_path = str(self.frontend._file_path.resolve()) + if self.frontend.level is None: - if isinstance(self._root, zarr.core.Array): + if isinstance(self._root, zarr.Array): self._rdr = self._root - elif isinstance(self._root, zarr.hierarchy.Group): + elif isinstance(self._root, zarr.Group): # the top level is a group, check if this has any arrays - num_arrays = len(sorted(self._root.array_keys())) - if num_arrays > 0: - self._rdr = self._root[next(self._root.array_keys())] + array_keys = _list_zarr_children(store_path, "array") + if len(array_keys) > 0: + self._rdr = self._root[array_keys[0]] else: # need to go one more level - self._root = self._root[next(self._root.group_keys())] - self._rdr = self._root[next(self._root.array_keys())] + group_keys = _list_zarr_children(store_path, "group") + self._root = self._root[group_keys[0]] + sub_path = str(Path(store_path) / group_keys[0]) + sub_array_keys = _list_zarr_children(sub_path, "array") + self._rdr = self._root[sub_array_keys[0]] else: pass else: - if isinstance(self._root, zarr.core.Array): + if isinstance(self._root, zarr.Array): self.close() raise ValueError( "Level is specified but the zarr file does not contain " + "multiple resoulutions." ) - elif isinstance(self._root, zarr.hierarchy.Group): - if len(sorted(self._root.array_keys())) > self.frontend.level: + elif isinstance(self._root, zarr.Group): + array_keys = _list_zarr_children(store_path, "array") + if len(array_keys) > self.frontend.level: self._rdr = self._root[self.frontend.level] else: raise ValueError( @@ -1279,7 +1309,7 @@ def _get_axis_info(self): axes_metadata = self._root.attrs["multiscales"][data_key]["axes"] for axes in axes_metadata: self._axes_list.append(axes["name"]) - except AttributeError or KeyError: + except (AttributeError, KeyError): self.logger.warning( "Unable to find multiscales metadata. Z, C and T " + "dimensions might be incorrect." @@ -1457,7 +1487,9 @@ def _init_writer(self): if self.frontend.append is True: mode = "a" self._root = zarr.open_group( - store=str(self.frontend._file_path.resolve()), mode=mode + store=str(self.frontend._file_path.resolve()), + mode=mode, + zarr_format=2, ) # Create the metadata @@ -1482,14 +1514,16 @@ def _init_writer(self): "metadata": {"method": "mean"}, } ] + + store_path = str(self.frontend._file_path.resolve()) if ( self.frontend.append is True - and len(sorted(self._root.array_keys())) > 0 + and len(_list_zarr_children(store_path, "array")) > 0 ): writer = self._root["0"] else: - writer = self._root.zeros( - "0", + writer = self._root.create_array( + name="0", shape=shape, chunks=( 1, @@ -1499,8 +1533,8 @@ def _init_writer(self): self.frontend._TILE_SIZE, ), dtype=self.frontend.dtype, - compressor=compressor, - dimension_separator="/", + compressors=compressor, + fill_value=0, ) # This is recommended to do for cloud storage to increase read/write @@ -1552,6 +1586,196 @@ def _write_image(self, X, Y, Z, C, T, image): def close(self): pass + class Zarr3Reader(ZarrReader): + """Reader for zarr v3 format stores using zarr-python v3 API.""" + + logger = logging.getLogger("bfio.backends.Zarr3Reader") + + def __init__(self, frontend): + # Call AbstractReader.__init__ directly, skip ZarrReader.__init__ + bfio.base_classes.AbstractReader.__init__(self, frontend) + + self.logger.debug("__init__(): Initializing _rdr (zarr v3)...") + self.logger.debug(f"Level is {self.frontend.level}") + + try: + self._root = zarr.open( + str(self.frontend._file_path.resolve()), mode="r" + ) + except (FileNotFoundError, KeyError): + data_zarr_path = str(self.frontend._file_path.resolve()) + "/data.zarr" + self._root = zarr.open(data_zarr_path, mode="r") + + if self.frontend.level is None: + if isinstance(self._root, zarr.Array): + self._rdr = self._root + elif isinstance(self._root, zarr.Group): + # Use native v3 group enumeration + array_names = sorted( + k for k, v in self._root.members() + if isinstance(v, zarr.Array) + ) + if len(array_names) > 0: + self._rdr = self._root[array_names[0]] + else: + group_names = sorted( + k for k, v in self._root.members() + if isinstance(v, zarr.Group) + ) + self._root = self._root[group_names[0]] + array_names = sorted( + k for k, v in self._root.members() + if isinstance(v, zarr.Array) + ) + self._rdr = self._root[array_names[0]] + else: + pass + else: + if isinstance(self._root, zarr.Array): + self.close() + raise ValueError( + "Level is specified but the zarr file does not contain " + + "multiple resoulutions." + ) + elif isinstance(self._root, zarr.Group): + array_names = sorted( + k for k, v in self._root.members() + if isinstance(v, zarr.Array) + ) + if len(array_names) > self.frontend.level: + self._rdr = self._root[self.frontend.level] + else: + raise ValueError( + "The zarr file does not contain resolution " + + "level {}.".format(self.frontend.level) + ) + else: + raise ValueError( + "The zarr file does not contain resolution level {}.".format( + self.frontend.level + ) + ) + + self._axes_list = [] + + def read_metadata(self): + self.logger.debug("read_metadata(): Reading metadata (v3)...") + # First try OME metadata (same as v2) + metadata_path = self.frontend._file_path.joinpath("OME").joinpath( + "METADATA.ome.xml" + ) + if metadata_path.exists(): + if self._metadata is None: + with open(metadata_path) as fr: + metadata = fr.read() + + try: + self._metadata = ome_types.from_xml(metadata, validate=False) + except ET.ParseError: + if self.frontend.clean_metadata: + cleaned = clean_ome_xml_for_known_issues(metadata) + self._metadata = ome_types.from_xml(cleaned, validate=False) + self.logger.warning( + "read_metadata(): OME XML required reformatting." + ) + else: + raise + + if self.frontend.level is not None: + self._metadata.images[0].pixels.size_x = self._rdr.shape[-1] + self._metadata.images[0].pixels.size_y = self._rdr.shape[-2] + + return self._metadata + + # Fall back to constructing metadata from array shape + return super().read_metadata() + + class Zarr3Writer(ZarrWriter): + """Writer for zarr v3 format stores using zarr-python v3 API.""" + + logger = logging.getLogger("bfio.backends.Zarr3Writer") + + def __init__(self, frontend): + super().__init__(frontend) + + def _init_writer(self): + """Initialize file writing for zarr v3 format.""" + if self.frontend.append is False: + if self.frontend._file_path.exists(): + shutil.rmtree(self.frontend._file_path) + + shape = ( + self.frontend.T, + self.frontend.C, + self.frontend.Z, + self.frontend.Y, + self.frontend.X, + ) + + mode = "w" + if self.frontend.append is True: + mode = "a" + + self._root = zarr.open_group( + store=str(self.frontend._file_path.resolve()), + mode=mode, + zarr_format=3, + ) + + # Create the metadata + metadata_path = ( + Path(self.frontend._file_path) + .joinpath("OME") + .joinpath("METADATA.ome.xml") + ) + + if self.frontend.append is False or ( + self.frontend.append is True and metadata_path.exists() is False + ): + metadata_path.parent.mkdir(parents=True, exist_ok=True) + with open(metadata_path, "w") as fw: + fw.write(str(self.frontend._metadata.to_xml())) + + self._root.attrs["multiscales"] = [ + { + "version": "0.1", + "name": self.frontend._file_path.name, + "datasets": [{"path": "0"}], + "metadata": {"method": "mean"}, + } + ] + + # Check for existing arrays when appending + if self.frontend.append is True: + existing_arrays = sorted( + k for k, v in self._root.members() + if isinstance(v, zarr.Array) + ) + if len(existing_arrays) > 0: + writer = self._root["0"] + self._writer = writer + return + + writer = self._root.create_array( + name="0", + shape=shape, + chunks=( + 1, + 1, + 1, + self.frontend._TILE_SIZE, + self.frontend._TILE_SIZE, + ), + dtype=self.frontend.dtype, + serializer=zarr.codecs.BytesCodec(), + compressors=zarr.codecs.ZstdCodec(level=1), + fill_value=0, + ) + + # Skip zarr.consolidate_metadata() — not part of v3 spec + + self._writer = writer + except ModuleNotFoundError: logger.info( "Zarr backend is not available. This could be due to a " @@ -1569,3 +1793,15 @@ def __init__(self, frontend): raise ImportError( "ZarrWriter class unavailable. Could not import" + " zarr." ) + + class Zarr3Reader(bfio.base_classes.AbstractReader): + def __init__(self, frontend): + raise ImportError( + "Zarr3Reader class unavailable. Could not import zarr." + ) + + class Zarr3Writer(bfio.base_classes.AbstractWriter): + def __init__(self, frontend): + raise ImportError( + "Zarr3Writer class unavailable. Could not import zarr." + ) diff --git a/src/bfio/bfio.py b/src/bfio/bfio.py index 9abe4a6..9d0ec40 100644 --- a/src/bfio/bfio.py +++ b/src/bfio/bfio.py @@ -13,6 +13,7 @@ from bfio import backends from bfio.base_classes import BioBase from bfio.ts_backends import TensorstoreReader, TensorstoreWriter +from bfio.utils import detect_zarr_format class BioReader(BioBase): @@ -125,9 +126,17 @@ def __init__( "Setting max_workers to 1, since max_workers==2 runs slower." + "To change back, set the object property." ) + elif self._backend_name == "zarr3": + self._backend = backends.Zarr3Reader(self) + if self._max_workers == 2: + self._max_workers = 1 + self.logger.debug( + "Setting max_workers to 1, since max_workers==2 runs slower." + + "To change back, set the object property." + ) else: raise ValueError( - 'backend must be "python", "bioformats", "tensorstore" or "zarr"' + 'backend must be "python", "bioformats", "tensorstore", "zarr", or "zarr3"' ) self.logger.debug("Finished initializing the backend.") @@ -180,6 +189,9 @@ def auto_select_backend(self, filename: str) -> str: ) return "bioformats" else: + fmt = detect_zarr_format(self._file_path) + if fmt == 3: + return "zarr3" return "zarr" else: return "bioformats" @@ -190,11 +202,12 @@ def set_backend(self, backend: typing.Optional[str] = None) -> None: "python", "bioformats", "zarr", + "zarr3", "tensorstore", ]: raise ValueError( "Keyword argument backend must be one of" - + '["python", "bioformats", "zarr", "tensorstore"]' + + '["python", "bioformats", "zarr", "zarr3", "tensorstore"]' ) # if backend not given, set backend @@ -230,6 +243,15 @@ def set_backend(self, backend: typing.Optional[str] = None) -> None: ) backend = self.auto_select_backend(self._file_path) + elif backend == "zarr3": + # make sure it is a directory + if not Path.is_dir(self._file_path): + self.logger.warning( + "Zarr3 backend is selected but the path is not a directory," + + " switching to bioformats backend." + ) + backend = self.auto_select_backend(self._file_path) + self._backend_name = backend.lower() def __getstate__(self) -> typing.Dict: @@ -895,6 +917,18 @@ def find_file_recursive(directory_path, filename): # Handle a zarr file if filepath.name.endswith(".zarr"): + # Check for zarr v3 format (zarr.json) + zarr_json_path = find_file_recursive(filepath, "zarr.json") + if zarr_json_path is not None: + with open(zarr_json_path, "r") as fr: + zarr_meta = json.load(fr) + if zarr_meta.get("node_type") == "array" and "shape" in zarr_meta: + shape = zarr_meta["shape"] + height = shape[3] + width = shape[4] + return width, height + + # Check for zarr v2 format (.zarray) zarray_path = find_file_recursive(filepath, ".zarray") if zarray_path is not None: with open(zarray_path, "r") as fr: @@ -1127,8 +1161,18 @@ class if specified. *Defaults to None.* "Setting max_workers to 1, since max_workers==2 runs slower." + "To change back, set the object property." ) + elif self._backend_name == "zarr3": + self._backend = backends.Zarr3Writer(self) + if self._max_workers == 2: + self._max_workers = 1 + self.logger.debug( + "Setting max_workers to 1, since max_workers==2 runs slower." + + "To change back, set the object property." + ) else: - raise ValueError('backend must be "python", "bioformats", or "zarr"') + raise ValueError( + 'backend must be "python", "bioformats", "tensorstore", "zarr", or "zarr3"' + ) if not self._file_path.name.endswith( ".ome.tif" @@ -1154,11 +1198,12 @@ def set_backend(self, backend: typing.Optional[str] = None) -> None: "python", "bioformats", "zarr", + "zarr3", "tensorstore", ]: raise ValueError( "Keyword argument backend must be one of " - + '["python","bioformats","zarr","tensorstore"]' + + '["python","bioformats","zarr","zarr3","tensorstore"]' ) if backend == "python": extension = "".join(self._file_path.suffixes) @@ -1178,6 +1223,15 @@ def set_backend(self, backend: typing.Optional[str] = None) -> None: ) backend = "bioformats" + if backend == "zarr3": + # make sure we can create a directory + if Path.exists(self._file_path) and Path.is_file(self._file_path): + self.logger.warning( + "Zarr3 backend is selected but a file with same pathname exist," + + " switching to bioformats backend." + ) + backend = "bioformats" + if backend is None: extension = "".join(self._file_path.suffixes) if extension.endswith(".ome.tif") or extension.endswith(".ome.tiff"): @@ -1191,7 +1245,15 @@ def set_backend(self, backend: typing.Optional[str] = None) -> None: ) backend = "bioformats" else: - backend = "zarr" + # Auto-detect format for existing stores + if Path.exists(self._file_path): + fmt = detect_zarr_format(self._file_path) + if fmt == 3: + backend = "zarr3" + else: + backend = "zarr" + else: + backend = "zarr" else: backend = "bioformats" diff --git a/src/bfio/ts_backends.py b/src/bfio/ts_backends.py index fe5e3c9..51b4e98 100644 --- a/src/bfio/ts_backends.py +++ b/src/bfio/ts_backends.py @@ -65,6 +65,18 @@ def __init__(self, frontend): self.T = self._rdr._T self.data_type = self._rdr._datatype + def _list_zarr_children(self, path, child_type="array"): + """Filesystem-based fallback for enumerating zarr v2 store children.""" + from pathlib import Path as _Path + p = _Path(path) + marker = ".zarray" if child_type == "array" else ".zgroup" + children = [] + if p.is_dir(): + for child in p.iterdir(): + if child.is_dir() and (child / marker).exists(): + children.append(child.name) + return sorted(children) + def get_zarr_array_info(self): self.logger.debug(f"Level is {self.frontend.level}") @@ -72,20 +84,21 @@ def get_zarr_array_info(self): root_path = self.frontend._file_path try: root = zarr.open(str(root_path.resolve()), mode="r") - except zarr.errors.PathNotFoundError: + except (FileNotFoundError, KeyError): # a workaround for pre-compute slide output directory structure root_path = self.frontend._file_path / "data.zarr" root = zarr.open(root_path.resolve(), mode="r") axes_list = "" + store_path = str(root_path.resolve()) if self.frontend.level is None: - if isinstance(root, zarr.core.Array): + if isinstance(root, zarr.Array): return str(root_path.resolve()), axes_list - elif isinstance(root, zarr.hierarchy.Group): + elif isinstance(root, zarr.Group): # the top level is a group, check if this has any arrays - num_arrays = len(sorted(root.array_keys())) - if num_arrays > 0: - array_key = next(root.array_keys()) + array_keys = self._list_zarr_children(store_path, "array") + if len(array_keys) > 0: + array_key = array_keys[0] root_path = root_path / str(array_key) try: axes_metadata = root.attrs["multiscales"][0]["axes"] @@ -101,7 +114,8 @@ def get_zarr_array_info(self): return str(root_path.resolve()), axes_list else: # need to go one more level - group_key = next(root.group_keys()) + group_keys = self._list_zarr_children(store_path, "group") + group_key = group_keys[0] root = root[group_key] try: axes_metadata = root.attrs["multiscales"][0]["axes"] @@ -114,20 +128,23 @@ def get_zarr_array_info(self): + "dimensions might be incorrect." ) - array_key = next(root.array_keys()) + sub_path = str(Path(store_path) / group_key) + sub_array_keys = self._list_zarr_children(sub_path, "array") + array_key = sub_array_keys[0] root_path = root_path / str(group_key) / str(array_key) return str(root_path.resolve()), axes_list else: return str(root_path.resolve()), axes_list else: - if isinstance(root, zarr.core.Array): + if isinstance(root, zarr.Array): self.close() raise ValueError( "Level is specified but the zarr file does not contain " + "multiple resoulutions." ) - elif isinstance(root, zarr.hierarchy.Group): - if len(sorted(root.array_keys())) > self.frontend.level: + elif isinstance(root, zarr.Group): + array_keys = self._list_zarr_children(store_path, "array") + if len(array_keys) > self.frontend.level: root_path = root_path / str(self.frontend.level) try: axes_metadata = root.attrs["multiscales"][0]["axes"] @@ -367,7 +384,12 @@ def write_metadata(self): # This is recommended to do for cloud storage to increase read/write # speed, but it also increases write speed locally when threading. - zarr.consolidate_metadata(str(self.frontend._file_path.resolve())) + try: + zarr.consolidate_metadata(str(self.frontend._file_path.resolve())) + except Exception: + self.logger.debug( + "Could not consolidate zarr metadata, continuing without it." + ) def write_image(self, X, Y, Z, C, T, image): diff --git a/src/bfio/utils.py b/src/bfio/utils.py index f0df213..dbd7d47 100644 --- a/src/bfio/utils.py +++ b/src/bfio/utils.py @@ -1,5 +1,7 @@ # -*- coding: utf-8 -*- # import core packages +import json +import pathlib import scyjava import copy import logging @@ -443,3 +445,53 @@ def pixels_per_cm( pixels_per_cm = image_dim_px / physical_dim_cm return int(pixels_per_cm) + + +def detect_zarr_format(path): + """Detect the zarr format version of a store on disk. + + Checks for zarr.json (v3) or .zgroup/.zarray (v2) marker files. + + Args: + path: Path to the zarr store directory (str or Path). + + Returns: + int: 3 for zarr v3 format, 2 for zarr v2 format, 0 if unknown. + """ + p = pathlib.Path(path) + if not p.is_dir(): + return 0 + + # Check for v3 marker at root + zarr_json = p / "zarr.json" + if zarr_json.exists(): + try: + with open(zarr_json) as f: + meta = json.load(f) + if meta.get("zarr_format") == 3: + return 3 + except (json.JSONDecodeError, OSError): + pass + + # Check for v2 markers at root + if (p / ".zgroup").exists() or (p / ".zarray").exists(): + return 2 + + # Recurse one level into subdirectories + try: + for child in sorted(p.iterdir()): + if child.is_dir(): + if (child / "zarr.json").exists(): + try: + with open(child / "zarr.json") as f: + meta = json.load(f) + if meta.get("zarr_format") == 3: + return 3 + except (json.JSONDecodeError, OSError): + pass + if (child / ".zgroup").exists() or (child / ".zarray").exists(): + return 2 + except OSError: + pass + + return 0 diff --git a/tests/test_zarr_v3.py b/tests/test_zarr_v3.py new file mode 100644 index 0000000..d70e68e --- /dev/null +++ b/tests/test_zarr_v3.py @@ -0,0 +1,280 @@ +# -*- coding: utf-8 -*- +"""Tests for zarr v3 support in bfio using unittest.""" +import json +import tempfile +import unittest +from pathlib import Path + +import numpy + +from bfio.utils import detect_zarr_format + + +class TestZarrFormatDetection(unittest.TestCase): + """Test detect_zarr_format() utility.""" + + def test_detect_v2_format_zgroup(self): + """Detect zarr v2 format via .zgroup marker.""" + with tempfile.TemporaryDirectory() as tmp: + store = Path(tmp) / "test.zarr" + store.mkdir() + (store / ".zgroup").write_text('{"zarr_format": 2}') + self.assertEqual(detect_zarr_format(store), 2) + + def test_detect_v2_format_zarray(self): + """Detect zarr v2 format via .zarray marker.""" + with tempfile.TemporaryDirectory() as tmp: + store = Path(tmp) / "test.zarr" + store.mkdir() + (store / ".zarray").write_text('{"zarr_format": 2}') + self.assertEqual(detect_zarr_format(store), 2) + + def test_detect_v3_format(self): + """Detect zarr v3 format via zarr.json marker.""" + with tempfile.TemporaryDirectory() as tmp: + store = Path(tmp) / "test.zarr" + store.mkdir() + with open(store / "zarr.json", "w") as f: + json.dump({"zarr_format": 3, "node_type": "group"}, f) + self.assertEqual(detect_zarr_format(store), 3) + + def test_detect_v3_format_in_subdir(self): + """Detect zarr v3 format via zarr.json in a subdirectory.""" + with tempfile.TemporaryDirectory() as tmp: + store = Path(tmp) / "test.zarr" + store.mkdir() + subdir = store / "0" + subdir.mkdir() + with open(subdir / "zarr.json", "w") as f: + json.dump({"zarr_format": 3, "node_type": "array"}, f) + self.assertEqual(detect_zarr_format(store), 3) + + def test_detect_empty_dir(self): + """Return 0 for empty directory.""" + with tempfile.TemporaryDirectory() as tmp: + store = Path(tmp) / "test.zarr" + store.mkdir() + self.assertEqual(detect_zarr_format(store), 0) + + def test_detect_nonexistent(self): + """Return 0 for nonexistent path.""" + with tempfile.TemporaryDirectory() as tmp: + self.assertEqual(detect_zarr_format(Path(tmp) / "nonexistent"), 0) + + +class TestZarr3Writer(unittest.TestCase): + """Test Zarr3Writer creates v3 format stores.""" + + def test_write_v3_creates_zarr_json(self): + """Verify v3 writer creates zarr.json marker files.""" + from bfio import BioWriter + + with tempfile.TemporaryDirectory() as tmp: + out_path = Path(tmp) / "output.zarr" + data = numpy.random.randint(0, 255, (128, 128), dtype=numpy.uint8) + + bw = BioWriter( + out_path, backend="zarr3", X=128, Y=128, dtype=numpy.uint8 + ) + bw[:128, :128, 0, 0, 0] = data + bw.close() + + self.assertTrue((out_path / "zarr.json").exists()) + with open(out_path / "zarr.json") as f: + meta = json.load(f) + self.assertEqual(meta.get("zarr_format"), 3) + + def test_write_v3_ome_metadata(self): + """Verify v3 writer creates OME metadata.""" + from bfio import BioWriter + + with tempfile.TemporaryDirectory() as tmp: + out_path = Path(tmp) / "output.zarr" + data = numpy.random.randint(0, 255, (128, 128), dtype=numpy.uint8) + + bw = BioWriter( + out_path, backend="zarr3", X=128, Y=128, dtype=numpy.uint8 + ) + bw[:128, :128, 0, 0, 0] = data + bw.close() + + metadata_path = out_path / "OME" / "METADATA.ome.xml" + self.assertTrue(metadata_path.exists()) + + +class TestZarr3Reader(unittest.TestCase): + """Test Zarr3Reader reads v3 format stores.""" + + def test_read_v3_data(self): + """Write v3 then read back, verify data integrity.""" + from bfio import BioReader, BioWriter + + with tempfile.TemporaryDirectory() as tmp: + out_path = Path(tmp) / "output.zarr" + data = numpy.random.randint(0, 255, (128, 128), dtype=numpy.uint8) + + bw = BioWriter( + out_path, backend="zarr3", X=128, Y=128, dtype=numpy.uint8 + ) + bw[:128, :128, 0, 0, 0] = data + bw.close() + + br = BioReader(out_path, backend="zarr3") + read_data = br[:128, :128] + br.close() + + numpy.testing.assert_array_equal(data, read_data) + + def test_read_v3_dimensions(self): + """Verify dimensions are correctly read from v3 store.""" + from bfio import BioReader, BioWriter + + with tempfile.TemporaryDirectory() as tmp: + out_path = Path(tmp) / "output.zarr" + bw = BioWriter( + out_path, + backend="zarr3", + X=256, + Y=128, + Z=2, + C=3, + T=1, + dtype=numpy.uint16, + ) + data = numpy.zeros((128, 256, 2, 3, 1), dtype=numpy.uint16) + bw.write(data) + bw.close() + + br = BioReader(out_path, backend="zarr3") + self.assertEqual(br.X, 256) + self.assertEqual(br.Y, 128) + self.assertEqual(br.Z, 2) + self.assertEqual(br.C, 3) + self.assertEqual(br.T, 1) + br.close() + + +class TestZarrAutoBackendSelection(unittest.TestCase): + """Test auto-detection picks correct backend.""" + + def test_auto_detect_v2(self): + """Verify auto-detect picks 'zarr' for v2 format.""" + from bfio import BioReader, BioWriter + + with tempfile.TemporaryDirectory() as tmp: + out_path = Path(tmp) / "output_v2.zarr" + data = numpy.random.randint(0, 255, (128, 128), dtype=numpy.uint8) + + bw = BioWriter( + out_path, backend="zarr", X=128, Y=128, dtype=numpy.uint8 + ) + bw[:128, :128, 0, 0, 0] = data + bw.close() + + br = BioReader(out_path) + self.assertEqual(br._backend_name, "zarr") + br.close() + + def test_auto_detect_v3(self): + """Verify auto-detect picks 'zarr3' for v3 format.""" + from bfio import BioReader, BioWriter + + with tempfile.TemporaryDirectory() as tmp: + out_path = Path(tmp) / "output_v3.zarr" + data = numpy.random.randint(0, 255, (128, 128), dtype=numpy.uint8) + + bw = BioWriter( + out_path, backend="zarr3", X=128, Y=128, dtype=numpy.uint8 + ) + bw[:128, :128, 0, 0, 0] = data + bw.close() + + br = BioReader(out_path) + self.assertEqual(br._backend_name, "zarr3") + br.close() + + +class TestZarrRoundTrip(unittest.TestCase): + """Test write-read roundtrip for both v2 and v3 formats.""" + + def test_roundtrip_v2(self): + """Write and read back data with zarr v2, verify integrity.""" + from bfio import BioReader, BioWriter + + with tempfile.TemporaryDirectory() as tmp: + out_path = Path(tmp) / "roundtrip_zarr.zarr" + data = numpy.random.randint(0, 65535, (64, 64), dtype=numpy.uint16) + + bw = BioWriter( + out_path, backend="zarr", X=64, Y=64, dtype=numpy.uint16 + ) + bw[:64, :64, 0, 0, 0] = data + bw.close() + + br = BioReader(out_path, backend="zarr") + read_data = br[:64, :64] + br.close() + + numpy.testing.assert_array_equal(data, read_data) + + def test_roundtrip_v3(self): + """Write and read back data with zarr v3, verify integrity.""" + from bfio import BioReader, BioWriter + + with tempfile.TemporaryDirectory() as tmp: + out_path = Path(tmp) / "roundtrip_zarr3.zarr" + data = numpy.random.randint(0, 65535, (64, 64), dtype=numpy.uint16) + + bw = BioWriter( + out_path, backend="zarr3", X=64, Y=64, dtype=numpy.uint16 + ) + bw[:64, :64, 0, 0, 0] = data + bw.close() + + br = BioReader(out_path, backend="zarr3") + read_data = br[:64, :64] + br.close() + + numpy.testing.assert_array_equal(data, read_data) + + +class TestImageSizeV3(unittest.TestCase): + """Test BioReader.image_size() works with both v2 and v3 format.""" + + def test_image_size_v3(self): + """Verify image_size() reads dimensions from v3 store.""" + from bfio import BioReader, BioWriter + + with tempfile.TemporaryDirectory() as tmp: + out_path = Path(tmp) / "size_test.zarr" + bw = BioWriter( + out_path, backend="zarr3", X=256, Y=128, dtype=numpy.uint8 + ) + data = numpy.zeros((128, 256, 1, 1, 1), dtype=numpy.uint8) + bw.write(data) + bw.close() + + width, height = BioReader.image_size(out_path) + self.assertEqual(width, 256) + self.assertEqual(height, 128) + + def test_image_size_v2(self): + """Verify image_size() still works with v2 stores.""" + from bfio import BioReader, BioWriter + + with tempfile.TemporaryDirectory() as tmp: + out_path = Path(tmp) / "size_test_v2.zarr" + bw = BioWriter( + out_path, backend="zarr", X=256, Y=128, dtype=numpy.uint8 + ) + data = numpy.zeros((128, 256, 1, 1, 1), dtype=numpy.uint8) + bw.write(data) + bw.close() + + width, height = BioReader.image_size(out_path) + self.assertEqual(width, 256) + self.assertEqual(height, 128) + + +if __name__ == "__main__": + unittest.main() From 2640cdae899645a5603d7f0feb8c53e1da37d647 Mon Sep 17 00:00:00 2001 From: Sameeul B Samee Date: Wed, 11 Feb 2026 14:25:42 -0500 Subject: [PATCH 2/5] fix formatting --- src/bfio/backends.py | 29 ++++++++++++++--------------- src/bfio/bfio.py | 19 +++++++++++-------- src/bfio/ts_backends.py | 5 +++-- tests/test_read.py | 3 ++- tests/test_zarr_v3.py | 36 +++++++++--------------------------- 5 files changed, 39 insertions(+), 53 deletions(-) diff --git a/src/bfio/backends.py b/src/bfio/backends.py index 881e0c8..9df8ef5 100644 --- a/src/bfio/backends.py +++ b/src/bfio/backends.py @@ -20,7 +20,11 @@ # bfio internals from bfio import __version__ as version import bfio.base_classes -from bfio.utils import start, clean_ome_xml_for_known_issues, pixels_per_cm, detect_zarr_format +from bfio.utils import ( + start, + clean_ome_xml_for_known_issues, + pixels_per_cm, +) logging.basicConfig( format="%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s", @@ -1612,19 +1616,20 @@ def __init__(self, frontend): elif isinstance(self._root, zarr.Group): # Use native v3 group enumeration array_names = sorted( - k for k, v in self._root.members() - if isinstance(v, zarr.Array) + k for k, v in self._root.members() if isinstance(v, zarr.Array) ) if len(array_names) > 0: self._rdr = self._root[array_names[0]] else: group_names = sorted( - k for k, v in self._root.members() + k + for k, v in self._root.members() if isinstance(v, zarr.Group) ) self._root = self._root[group_names[0]] array_names = sorted( - k for k, v in self._root.members() + k + for k, v in self._root.members() if isinstance(v, zarr.Array) ) self._rdr = self._root[array_names[0]] @@ -1639,8 +1644,7 @@ def __init__(self, frontend): ) elif isinstance(self._root, zarr.Group): array_names = sorted( - k for k, v in self._root.members() - if isinstance(v, zarr.Array) + k for k, v in self._root.members() if isinstance(v, zarr.Array) ) if len(array_names) > self.frontend.level: self._rdr = self._root[self.frontend.level] @@ -1748,8 +1752,7 @@ def _init_writer(self): # Check for existing arrays when appending if self.frontend.append is True: existing_arrays = sorted( - k for k, v in self._root.members() - if isinstance(v, zarr.Array) + k for k, v in self._root.members() if isinstance(v, zarr.Array) ) if len(existing_arrays) > 0: writer = self._root["0"] @@ -1796,12 +1799,8 @@ def __init__(self, frontend): class Zarr3Reader(bfio.base_classes.AbstractReader): def __init__(self, frontend): - raise ImportError( - "Zarr3Reader class unavailable. Could not import zarr." - ) + raise ImportError("Zarr3Reader class unavailable. Could not import zarr.") class Zarr3Writer(bfio.base_classes.AbstractWriter): def __init__(self, frontend): - raise ImportError( - "Zarr3Writer class unavailable. Could not import zarr." - ) + raise ImportError("Zarr3Writer class unavailable. Could not import zarr.") diff --git a/src/bfio/bfio.py b/src/bfio/bfio.py index 9d0ec40..2551b1f 100644 --- a/src/bfio/bfio.py +++ b/src/bfio/bfio.py @@ -23,12 +23,13 @@ class BioReader(BioBase): any Bio-Formats supported file format, but is specially optimized for handling the OME tiled tiff format. - There are three backends: ``bioformats``, ``python``, and ``zarr``. The - ``bioformats`` backend directly uses Bio-Formats for file reading, and can read any - forma that is supported by Bio-Formats. The ``python`` backend will only read - images in OME Tiff format with tile tags set to 1024x1024, and is - significantly faster than the "bioformats" backend for reading these types of tiff - files. The ``zarr`` backend will only read OME Zarr files. + There are five backends: ``bioformats``, ``python``, ``zarr``, ``zarr3``, and + ``tensorstore``. The ``bioformats`` backend directly uses Bio-Formats for file + reading, and can read any format that is supported by Bio-Formats. + The ``python`` backend will only read images in OME Tiff format with tile tags + set to 1024x1024, and is significantly faster than the "bioformats" backend for + reading these types of tiff files. The ``zarr`` backend will only read OME Zarr + files. File reading and writing are multi-threaded by default, except for the ``bioformats`` backend which does not currently support threading. Half of the @@ -136,7 +137,8 @@ def __init__( ) else: raise ValueError( - 'backend must be "python", "bioformats", "tensorstore", "zarr", or "zarr3"' + 'backend must be "python", "bioformats", "tensorstore", "zarr", or ' + '"zarr3"' ) self.logger.debug("Finished initializing the backend.") @@ -1171,7 +1173,8 @@ class if specified. *Defaults to None.* ) else: raise ValueError( - 'backend must be "python", "bioformats", "tensorstore", "zarr", or "zarr3"' + 'backend must be "python", "bioformats", "tensorstore", "zarr", \ + or "zarr3"' ) if not self._file_path.name.endswith( diff --git a/src/bfio/ts_backends.py b/src/bfio/ts_backends.py index 51b4e98..f807dd6 100644 --- a/src/bfio/ts_backends.py +++ b/src/bfio/ts_backends.py @@ -55,7 +55,7 @@ def __init__(self, frontend): ) else: self._file_path, self._axes_list = self.get_zarr_array_info() - self._file_type = FileType.OmeZarr + self._file_type = FileType.OmeZarrV2 self._rdr = TSReader(self._file_path, self._file_type, self._axes_list) self.X = self._rdr._X @@ -68,6 +68,7 @@ def __init__(self, frontend): def _list_zarr_children(self, path, child_type="array"): """Filesystem-based fallback for enumerating zarr v2 store children.""" from pathlib import Path as _Path + p = _Path(path) marker = ".zarray" if child_type == "array" else ".zgroup" children = [] @@ -184,7 +185,7 @@ def read_metadata(self): self.logger.debug("read_metadata(): Reading metadata...") if self._file_type == FileType.OmeTiff: return self.read_tiff_metadata() - if self._file_type == FileType.OmeZarr: + if self._file_type == FileType.OmeZarrV2: return self.read_zarr_metadata() def read_image(self, X, Y, Z, C, T): diff --git a/tests/test_read.py b/tests/test_read.py index 94e427e..0ac1fd1 100644 --- a/tests/test_read.py +++ b/tests/test_read.py @@ -11,6 +11,7 @@ from ome_zarr.utils import download as zarr_download TEST_IMAGES = { + "ExpA_VIP_ASLM_on.zarr": "https://uk1s3.embassy.ebi.ac.uk/idr/zarr/v0.5/idr0066/ExpA_VIP_ASLM_on.zarr", "5025551.zarr": "https://uk1s3.embassy.ebi.ac.uk/idr/zarr/v0.4/idr0054A/5025551.zarr", "Plate1-Blue-A-12-Scene-3-P3-F2-03.czi": "https://downloads.openmicroscopy.org/images/Zeiss-CZI/idr0011/Plate1-Blue-A_TS-Stinger/Plate1-Blue-A-12-Scene-3-P3-F2-03.czi", "0.tif": "https://osf.io/j6aer/download", @@ -129,7 +130,7 @@ def test_read_tif_strip_auto(self): def test_read_zarr_auto(self): """test_read_zarr_auto - Read ome zarr, should load zarr backend""" with bfio.BioReader(TEST_DIR.joinpath("4d_array.zarr")) as br: - self.assertEqual(br._backend_name, "zarr") + self.assertEqual(br._backend_name, "zarr3") I = br[:] diff --git a/tests/test_zarr_v3.py b/tests/test_zarr_v3.py index d70e68e..7a6fc70 100644 --- a/tests/test_zarr_v3.py +++ b/tests/test_zarr_v3.py @@ -73,9 +73,7 @@ def test_write_v3_creates_zarr_json(self): out_path = Path(tmp) / "output.zarr" data = numpy.random.randint(0, 255, (128, 128), dtype=numpy.uint8) - bw = BioWriter( - out_path, backend="zarr3", X=128, Y=128, dtype=numpy.uint8 - ) + bw = BioWriter(out_path, backend="zarr3", X=128, Y=128, dtype=numpy.uint8) bw[:128, :128, 0, 0, 0] = data bw.close() @@ -92,9 +90,7 @@ def test_write_v3_ome_metadata(self): out_path = Path(tmp) / "output.zarr" data = numpy.random.randint(0, 255, (128, 128), dtype=numpy.uint8) - bw = BioWriter( - out_path, backend="zarr3", X=128, Y=128, dtype=numpy.uint8 - ) + bw = BioWriter(out_path, backend="zarr3", X=128, Y=128, dtype=numpy.uint8) bw[:128, :128, 0, 0, 0] = data bw.close() @@ -113,9 +109,7 @@ def test_read_v3_data(self): out_path = Path(tmp) / "output.zarr" data = numpy.random.randint(0, 255, (128, 128), dtype=numpy.uint8) - bw = BioWriter( - out_path, backend="zarr3", X=128, Y=128, dtype=numpy.uint8 - ) + bw = BioWriter(out_path, backend="zarr3", X=128, Y=128, dtype=numpy.uint8) bw[:128, :128, 0, 0, 0] = data bw.close() @@ -165,9 +159,7 @@ def test_auto_detect_v2(self): out_path = Path(tmp) / "output_v2.zarr" data = numpy.random.randint(0, 255, (128, 128), dtype=numpy.uint8) - bw = BioWriter( - out_path, backend="zarr", X=128, Y=128, dtype=numpy.uint8 - ) + bw = BioWriter(out_path, backend="zarr", X=128, Y=128, dtype=numpy.uint8) bw[:128, :128, 0, 0, 0] = data bw.close() @@ -183,9 +175,7 @@ def test_auto_detect_v3(self): out_path = Path(tmp) / "output_v3.zarr" data = numpy.random.randint(0, 255, (128, 128), dtype=numpy.uint8) - bw = BioWriter( - out_path, backend="zarr3", X=128, Y=128, dtype=numpy.uint8 - ) + bw = BioWriter(out_path, backend="zarr3", X=128, Y=128, dtype=numpy.uint8) bw[:128, :128, 0, 0, 0] = data bw.close() @@ -205,9 +195,7 @@ def test_roundtrip_v2(self): out_path = Path(tmp) / "roundtrip_zarr.zarr" data = numpy.random.randint(0, 65535, (64, 64), dtype=numpy.uint16) - bw = BioWriter( - out_path, backend="zarr", X=64, Y=64, dtype=numpy.uint16 - ) + bw = BioWriter(out_path, backend="zarr", X=64, Y=64, dtype=numpy.uint16) bw[:64, :64, 0, 0, 0] = data bw.close() @@ -225,9 +213,7 @@ def test_roundtrip_v3(self): out_path = Path(tmp) / "roundtrip_zarr3.zarr" data = numpy.random.randint(0, 65535, (64, 64), dtype=numpy.uint16) - bw = BioWriter( - out_path, backend="zarr3", X=64, Y=64, dtype=numpy.uint16 - ) + bw = BioWriter(out_path, backend="zarr3", X=64, Y=64, dtype=numpy.uint16) bw[:64, :64, 0, 0, 0] = data bw.close() @@ -247,9 +233,7 @@ def test_image_size_v3(self): with tempfile.TemporaryDirectory() as tmp: out_path = Path(tmp) / "size_test.zarr" - bw = BioWriter( - out_path, backend="zarr3", X=256, Y=128, dtype=numpy.uint8 - ) + bw = BioWriter(out_path, backend="zarr3", X=256, Y=128, dtype=numpy.uint8) data = numpy.zeros((128, 256, 1, 1, 1), dtype=numpy.uint8) bw.write(data) bw.close() @@ -264,9 +248,7 @@ def test_image_size_v2(self): with tempfile.TemporaryDirectory() as tmp: out_path = Path(tmp) / "size_test_v2.zarr" - bw = BioWriter( - out_path, backend="zarr", X=256, Y=128, dtype=numpy.uint8 - ) + bw = BioWriter(out_path, backend="zarr", X=256, Y=128, dtype=numpy.uint8) data = numpy.zeros((128, 256, 1, 1, 1), dtype=numpy.uint8) bw.write(data) bw.close() From 17829157b39ea4c42a4d6e4cc389214223b53413 Mon Sep 17 00:00:00 2001 From: Sameeul B Samee Date: Wed, 11 Feb 2026 14:53:41 -0500 Subject: [PATCH 3/5] fix formatting and update test --- src/bfio/backends.py | 1 - src/bfio/ts_backends.py | 1 - tests/test_read.py | 1 - 3 files changed, 3 deletions(-) diff --git a/src/bfio/backends.py b/src/bfio/backends.py index 9df8ef5..adfdb10 100644 --- a/src/bfio/backends.py +++ b/src/bfio/backends.py @@ -16,7 +16,6 @@ from tifffile import tifffile from xml.etree import ElementTree as ET - # bfio internals from bfio import __version__ as version import bfio.base_classes diff --git a/src/bfio/ts_backends.py b/src/bfio/ts_backends.py index f807dd6..8d748dd 100644 --- a/src/bfio/ts_backends.py +++ b/src/bfio/ts_backends.py @@ -6,7 +6,6 @@ from typing import Dict import shutil - # Third party packages import ome_types from xml.etree import ElementTree as ET diff --git a/tests/test_read.py b/tests/test_read.py index 0ac1fd1..fc2d3fa 100644 --- a/tests/test_read.py +++ b/tests/test_read.py @@ -11,7 +11,6 @@ from ome_zarr.utils import download as zarr_download TEST_IMAGES = { - "ExpA_VIP_ASLM_on.zarr": "https://uk1s3.embassy.ebi.ac.uk/idr/zarr/v0.5/idr0066/ExpA_VIP_ASLM_on.zarr", "5025551.zarr": "https://uk1s3.embassy.ebi.ac.uk/idr/zarr/v0.4/idr0054A/5025551.zarr", "Plate1-Blue-A-12-Scene-3-P3-F2-03.czi": "https://downloads.openmicroscopy.org/images/Zeiss-CZI/idr0011/Plate1-Blue-A_TS-Stinger/Plate1-Blue-A-12-Scene-3-P3-F2-03.czi", "0.tif": "https://osf.io/j6aer/download", From 392b3c67dba4ba91e3e026625fb5f14125098071 Mon Sep 17 00:00:00 2001 From: Sameeul B Samee Date: Wed, 11 Feb 2026 15:38:39 -0500 Subject: [PATCH 4/5] create zarr2 files --- tests/test_read.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_read.py b/tests/test_read.py index fc2d3fa..3cd8682 100644 --- a/tests/test_read.py +++ b/tests/test_read.py @@ -61,6 +61,7 @@ def setUpModule(): shape=(1, br.C, br.Z, br.Y, br.X), dtype=br.dtype, chunks=(1, 1, 1, 1024, 1024), + zarr_format=2, ) for t in range(1): for c in range(br.C): @@ -129,7 +130,7 @@ def test_read_tif_strip_auto(self): def test_read_zarr_auto(self): """test_read_zarr_auto - Read ome zarr, should load zarr backend""" with bfio.BioReader(TEST_DIR.joinpath("4d_array.zarr")) as br: - self.assertEqual(br._backend_name, "zarr3") + self.assertEqual(br._backend_name, "zarr") I = br[:] From d2311fa029c181fcfa8004f5c8ed36ce6118d3a9 Mon Sep 17 00:00:00 2001 From: Sameeul B Samee Date: Wed, 11 Feb 2026 15:52:41 -0500 Subject: [PATCH 5/5] fix: Convert integer level to string for zarr v3 Group key access zarr-python v3's Group.__getitem__ requires string keys, not integers. This fixes TypeError when using level parameter with BioReader. Co-Authored-By: Claude Opus 4.6 --- src/bfio/backends.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/bfio/backends.py b/src/bfio/backends.py index adfdb10..9c7b227 100644 --- a/src/bfio/backends.py +++ b/src/bfio/backends.py @@ -1285,7 +1285,7 @@ def __init__(self, frontend): elif isinstance(self._root, zarr.Group): array_keys = _list_zarr_children(store_path, "array") if len(array_keys) > self.frontend.level: - self._rdr = self._root[self.frontend.level] + self._rdr = self._root[str(self.frontend.level)] else: raise ValueError( "The zarr file does not contain resolution " @@ -1646,7 +1646,7 @@ def __init__(self, frontend): k for k, v in self._root.members() if isinstance(v, zarr.Array) ) if len(array_names) > self.frontend.level: - self._rdr = self._root[self.frontend.level] + self._rdr = self._root[str(self.frontend.level)] else: raise ValueError( "The zarr file does not contain resolution "