diff --git a/docs/release-notes.md b/docs/release-notes.md index c3449ba..e89cbc8 100644 --- a/docs/release-notes.md +++ b/docs/release-notes.md @@ -4,6 +4,7 @@ ### Bug Fixes - Fix a regression introduced in 0.2.6 where `is_monotonic` was significantly degrading `.sel` performance. +- Fix `xdas.concat` to gracefully handle empty inputs, preventing errors when selecting out-of-range data from a `DataCollection` (@atrabattoni). ### Documentation - Achieved **100% docstring coverage** (excluding `__magic__` and private `_methods`) (@atrabattoni). diff --git a/tests/test_datacollection.py b/tests/test_datacollection.py index d710743..65d6d5f 100644 --- a/tests/test_datacollection.py +++ b/tests/test_datacollection.py @@ -409,3 +409,37 @@ def test_sequence_map_invalid_item(self): atom = xs.decimate(..., 2, ftype="fir") with pytest.raises(TypeError, match="encountered in the collection"): ds.map(atom) + + def test_mapping_sel_one_element_becomes_empty(self): + da = wavelet_wavefronts() + da_near = da.sel(distance=slice(0, 4999)) + da_far = da.sel(distance=slice(5000, 10000)) + dc = xd.DataCollection({"near": da_near, "far": da_far}, "instrument") + result = dc.sel(distance=slice(0, 2000)) + assert set(result.keys()) == {"near"} + assert not result["near"].empty + + def test_mapping_sel_all_elements_become_empty(self): + da = wavelet_wavefronts() + da_near = da.sel(distance=slice(0, 4999)) + da_far = da.sel(distance=slice(5000, 10000)) + dc = xd.DataCollection({"near": da_near, "far": da_far}, "instrument") + result = dc.sel(distance=slice(-1000, -1)) + assert len(result) == 0 + + def test_sequence_sel_one_element_becomes_empty(self): + da = wavelet_wavefronts() + da_near = da.sel(distance=slice(0, 4999)) + da_far = da.sel(distance=slice(5000, 10000)) + dc = xd.DataCollection([da_near, da_far], "instrument") + result = dc.sel(distance=slice(0, 2000)) + assert len(result) == 1 + assert not result[0].empty + + def test_sequence_sel_all_elements_become_empty(self): + da = wavelet_wavefronts() + da_near = da.sel(distance=slice(0, 4999)) + da_far = da.sel(distance=slice(5000, 10000)) + dc = xd.DataCollection([da_near, da_far], "instrument") + result = dc.sel(distance=slice(-1000, -1)) + assert len(result) == 0 diff --git a/tests/test_routines.py b/tests/test_routines.py index 3819019..6dc38b6 100644 --- a/tests/test_routines.py +++ b/tests/test_routines.py @@ -670,6 +670,31 @@ def test_dim_last(self): assert isinstance(result, xd.DataArray) +class TestConcatEdgeCases: + def test_empty_list_returns_dataarray(self): + result = xd.concat([]) + assert isinstance(result, xd.DataArray) + assert result.empty + + def test_all_empty_elements_returns_empty_dataarray(self): + da = xd.DataArray(np.zeros((0, 10)), dims=("time", "distance")) + result = xd.concat([da, da]) + assert isinstance(result, xd.DataArray) + assert result.empty + assert result.dims == ("time", "distance") + + def test_mixed_empty_and_nonempty_uses_nonempty(self): + t_empty = np.array([], dtype="datetime64[ns]") + da_empty = xd.DataArray(np.zeros((0,)), {"time": t_empty}) + t = np.array( + ["2000-01-01", "2000-01-02", "2000-01-03", "2000-01-04", "2000-01-05"], + dtype="datetime64[ns]", + ) + da = xd.DataArray(np.ones((5,)), {"time": t}) + result = xd.concat([da_empty, da]) + assert result.equals(da) + + class TestConcatCoordsEdgeCases: def test_tolerance_with_dense_coord_raises(self): da1 = xd.DataArray( diff --git a/xdas/coordinates/interp.py b/xdas/coordinates/interp.py index 1dc12da..4440166 100644 --- a/xdas/coordinates/interp.py +++ b/xdas/coordinates/interp.py @@ -217,7 +217,7 @@ def get_sampling_interval(self, cast=True): return delta def is_monotonic_increasing(self): - """Return ``True`` if no segment boundary exhibits a backward jump.""" + """Return ``True`` if no segment starts before the end of the previous one.""" return not self.get_split_indices("overlaps", tolerance=False).size def equals(self, other): diff --git a/xdas/coordinates/sampled.py b/xdas/coordinates/sampled.py index 06a9c6f..d05f159 100644 --- a/xdas/coordinates/sampled.py +++ b/xdas/coordinates/sampled.py @@ -264,7 +264,7 @@ def get_sampling_interval(self, cast=True): return delta def is_monotonic_increasing(self): - """Return ``True`` if no segment starts below the end of the previous one.""" + """Return ``True`` if no segment starts before the end of the previous one.""" return not self.get_split_indices("overlaps", tolerance=False).size def equals(self, other): diff --git a/xdas/core/routines.py b/xdas/core/routines.py index 32b0d4f..a058f88 100644 --- a/xdas/core/routines.py +++ b/xdas/core/routines.py @@ -942,10 +942,15 @@ def concat(objs, dim="first", tolerance=None, virtual=None, verbose=None): Returns ------- DataArray - The concatenated dataarray. + The concatenated dataarray. Coordinates along axes other than *dim* are + taken from the first element; no compatibility check is performed on ``objs[1:]``. """ - objs = [da for da in objs if not da.empty] + objs = list(objs) + non_empty = [da for da in objs if not da.empty] + if not non_empty: + return objs[0] if objs else DataArray([]) + objs = non_empty if virtual is None: virtual = all(isinstance(da.data, (VirtualSource, VirtualStack)) for da in objs) @@ -959,6 +964,7 @@ def concat(objs, dim="first", tolerance=None, virtual=None, verbose=None): dims = (dim, *objs[0].dims) objs = [da.expand_dims(dim) for da in objs] + # TODO: check that objs[1:] have the same non-concat coords as objs[0] coords = objs[0].coords.drop_dims(dim) name = objs[0].name attrs = objs[0].attrs