From 44433eca2d2c53f893bfd57abfba9bb76ab46ed7 Mon Sep 17 00:00:00 2001 From: Andreas Poehlmann Date: Sat, 10 Jan 2026 21:52:55 +0100 Subject: [PATCH 01/15] tests: add copy test for fs with key/prefix collision --- upath/tests/implementations/test_s3.py | 48 ++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/upath/tests/implementations/test_s3.py b/upath/tests/implementations/test_s3.py index d4027fdc..0044c7be 100644 --- a/upath/tests/implementations/test_s3.py +++ b/upath/tests/implementations/test_s3.py @@ -1,6 +1,7 @@ """see upath/tests/conftest.py for fixtures""" import sys +import warnings import fsspec import pytest @@ -165,3 +166,50 @@ def test_pathlib_consistent_join(): b1 = UPath("s3://mybucket/withkey").joinpath("subfolder/myfile.txt") assert b0 == b1 assert "s3://mybucket/withkey/subfolder/myfile.txt" == str(b0) == str(b1) + + +def test_copy__object_key_collides_with_dir_prefix(s3_server, tmp_path): + anon, s3so = s3_server + s3so["use_listings_cache"] = False + + s3 = fsspec.filesystem("s3", anon=False, **s3so) + bucket = "copy_into_collision_bucket" + s3.mkdir(bucket + "/src" + "/common_prefix/") + # object under common prefix as key + s3.pipe_file(f"{bucket}/src/common_prefix", b"hello world") + # store more objects with same prefix + s3.pipe_file(f"{bucket}/src/common_prefix/file1.txt", b"1") + s3.pipe_file(f"{bucket}/src/common_prefix/file2.txt", b"2") + + # make sure the sources have a collision + assert s3.isdir(f"{bucket}/src/common_prefix") + assert s3.isfile(f"{bucket}/src/common_prefix") + assert s3.isfile(f"{bucket}/src/common_prefix/file1.txt") + assert s3.isfile(f"{bucket}/src/common_prefix/file2.txt") + # prepare source and destination + src = UPath(f"s3://{bucket}/src", anon=anon) + dst = UPath(tmp_path) + + def on_collision_rename_file(src, dst): + warnings.warn( + f"{src!s} collides with prefix. Renaming target file object to {dst!s}", + UserWarning, + stacklevel=3, + ) + return ( + dst.with_suffix(dst.suffix + ".COLLISION"), + dst, + ) + + # perform copy + src.copy_into(dst, on_collision=on_collision_rename_file) + + # check results + dst_files = sorted(str(x.relative_to(tmp_path)) for x in dst.glob("**/*")) + assert dst_files == [ + "src", + "src/common_prefix", + "src/common_prefix.COLLISION", + "src/common_prefix/file1.txt", + "src/common_prefix/file2.txt", + ] From 44cbbfb24369c9ccc3662ace447d5df531c2ea2c Mon Sep 17 00:00:00 2001 From: Andreas Poehlmann Date: Sat, 10 Jan 2026 22:30:08 +0100 Subject: [PATCH 02/15] upath.core: implement support for on_name_collision in UPath._copy_from --- upath/core.py | 42 ++++++++++++++++++++++++++++++++++++++++- upath/types/__init__.py | 7 +++++++ 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/upath/core.py b/upath/core.py index d572a4c9..d4c08a46 100644 --- a/upath/core.py +++ b/upath/core.py @@ -41,6 +41,7 @@ from upath.registry import get_upath_class from upath.types import UNSET_DEFAULT from upath.types import JoinablePathLike +from upath.types import OnNameCollisionFunc from upath.types import PathInfo from upath.types import ReadablePath from upath.types import ReadablePathLike @@ -1305,9 +1306,48 @@ def _copy_from( self, source: ReadablePath, follow_symlinks: bool = True, + on_name_collision: OnNameCollisionFunc | None = None, **kwargs: Any, ) -> None: - return super()._copy_from(source, follow_symlinks) + """ + UPath custom:: Recursively copy the given path to this path. + """ + # fixme: it would be best if this would be upstreamed + from pathlib_abc import vfsopen + from pathlib_abc import vfspath + from pathlib_abc._os import copyfileobj + from pathlib_abc._os import ensure_different_files + + stack: list[tuple[ReadablePath, WritablePath]] = [(source, self)] + while stack: + src, dst = stack.pop() + info = src.info + if not follow_symlinks and info.is_symlink(): + dst.symlink_to(vfspath(src.readlink()), src.info.is_dir()) + elif on_name_collision and info.is_file() and info.is_dir(): + dst_file, dst_dir = on_name_collision(src, dst) + if dst_file is not None: + ensure_different_files(src, dst_file) + with vfsopen(src, "rb") as source_f: + with vfsopen(dst_file, "wb") as target_f: + copyfileobj(source_f, target_f) + if dst_dir is not None: + children = src.iterdir() + dst_dir.mkdir() + # feed through dict.fromkeys to remove duplicates + for child in dict.fromkeys(children): + stack.append((child, dst_dir.joinpath(child.name))) + elif info.is_dir(): + children = src.iterdir() + dst.mkdir() + # feed through dict.fromkeys to remove duplicates + for child in dict.fromkeys(children): + stack.append((child, dst.joinpath(child.name))) + else: + ensure_different_files(src, dst) + with vfsopen(src, "rb") as source_f: + with vfsopen(dst, "wb") as target_f: + copyfileobj(source_f, target_f) # --- WritablePath attributes ------------------------------------- diff --git a/upath/types/__init__.py b/upath/types/__init__.py index f0c6c965..a1e3730b 100644 --- a/upath/types/__init__.py +++ b/upath/types/__init__.py @@ -2,6 +2,7 @@ import enum import sys +from collections.abc import Callable from os import PathLike from typing import TYPE_CHECKING from typing import Any @@ -35,6 +36,7 @@ "PathParser", "UPathParser", "UNSET_DEFAULT", + "OnIsFileAndDirHandler", ] @@ -124,3 +126,8 @@ def isabs(self, path: JoinablePathLike) -> bool: ... def splitdrive(self, path: JoinablePathLike) -> tuple[str, str]: ... def splitroot(self, path: JoinablePathLike) -> tuple[str, str, str]: ... + + +OnNameCollisionFunc: TypeAlias = Callable[ + [ReadablePath, WritablePath], tuple[WritablePath | None, WritablePath | None] +] From de02f0331124acb5d230edaafd596168e7b88c12 Mon Sep 17 00:00:00 2001 From: Andreas Poehlmann Date: Sun, 11 Jan 2026 10:59:42 +0100 Subject: [PATCH 03/15] upath.implementations.local: fix PosixUPath/WindowsUPath _copy_from --- upath/implementations/local.py | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/upath/implementations/local.py b/upath/implementations/local.py index 3bf3ec76..62f9d472 100644 --- a/upath/implementations/local.py +++ b/upath/implementations/local.py @@ -364,6 +364,11 @@ def rmdir(self, recursive: bool = UNSET_DEFAULT) -> None: else: shutil.rmtree(self) + # we need to override pathlib.Path._copy_from to support it as a + # WritablePath._copy_from target with support for on_name_collision + # Issue: https://github.com/barneygale/pathlib-abc/issues/48 + _copy_from = UPath._copy_from + if sys.version_info < (3, 14): # noqa: C901 @overload @@ -720,17 +725,6 @@ def chmod( ) return super().chmod(mode) - if not hasattr(pathlib.Path, "_copy_from"): - - def _copy_from( - self, - source: ReadablePath | LocalPath, - follow_symlinks: bool = True, - preserve_metadata: bool = False, - ) -> None: - _copy_from: Any = WritablePath._copy_from.__get__(self) - _copy_from(source, follow_symlinks=follow_symlinks) - UPath.register(LocalPath) From 2e1aca9c4cfbaccf4c30813c0a34c3bf0e8e19d5 Mon Sep 17 00:00:00 2001 From: Andreas Poehlmann Date: Sun, 11 Jan 2026 11:41:31 +0100 Subject: [PATCH 04/15] upath.implementations.cloud: fix _copy_from for S3Path --- upath/implementations/cloud.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/upath/implementations/cloud.py b/upath/implementations/cloud.py index 7814a456..d04b6ae1 100644 --- a/upath/implementations/cloud.py +++ b/upath/implementations/cloud.py @@ -11,6 +11,8 @@ from upath._flavour import upath_strip_protocol from upath.core import UPath from upath.types import JoinablePathLike +from upath.types import OnNameCollisionFunc +from upath.types import ReadablePath if TYPE_CHECKING: from typing import Literal @@ -147,6 +149,31 @@ def __init__( if not self.drive and len(self.parts) > 1: raise ValueError("non key-like path provided (bucket/container missing)") + def _copy_from( + self, + source: ReadablePath, + follow_symlinks: bool = True, + on_name_collision: OnNameCollisionFunc | None = None, + **kwargs: Any, + ) -> None: + # to allow _copy_from to check if a path isfile AND isdir + # we need to disable s3fs's dircache mechanism because it + # currently implements a XOR relation the two for objects + # ref: fsspec/s3fs#999 + sopts = dict(self.storage_options) + sopts["use_listings_cache"] = False + new_self = type(self)( + self.path, + protocol=self.protocol, # type: ignore + **sopts, + ) + new_self._copy_from( + source, + follow_symlinks=follow_symlinks, + on_name_collision=on_name_collision, + **kwargs, + ) + class AzurePath(CloudPath): __slots__ = () From 57357b55113b00b3af346415b4f1d650c99f74a2 Mon Sep 17 00:00:00 2001 From: Andreas Poehlmann Date: Sun, 11 Jan 2026 12:47:45 +0100 Subject: [PATCH 05/15] tests: fix s3 to local name collision test --- upath/tests/implementations/test_s3.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/upath/tests/implementations/test_s3.py b/upath/tests/implementations/test_s3.py index 0044c7be..63cd5289 100644 --- a/upath/tests/implementations/test_s3.py +++ b/upath/tests/implementations/test_s3.py @@ -170,9 +170,8 @@ def test_pathlib_consistent_join(): def test_copy__object_key_collides_with_dir_prefix(s3_server, tmp_path): anon, s3so = s3_server - s3so["use_listings_cache"] = False - s3 = fsspec.filesystem("s3", anon=False, **s3so) + s3 = fsspec.filesystem("s3", anon=anon, **{**s3so, "use_listings_cache": False}) bucket = "copy_into_collision_bucket" s3.mkdir(bucket + "/src" + "/common_prefix/") # object under common prefix as key @@ -187,7 +186,7 @@ def test_copy__object_key_collides_with_dir_prefix(s3_server, tmp_path): assert s3.isfile(f"{bucket}/src/common_prefix/file1.txt") assert s3.isfile(f"{bucket}/src/common_prefix/file2.txt") # prepare source and destination - src = UPath(f"s3://{bucket}/src", anon=anon) + src = UPath(f"s3://{bucket}/src", anon=anon, **s3so) dst = UPath(tmp_path) def on_collision_rename_file(src, dst): @@ -202,7 +201,7 @@ def on_collision_rename_file(src, dst): ) # perform copy - src.copy_into(dst, on_collision=on_collision_rename_file) + src.copy_into(dst, on_name_collision=on_collision_rename_file) # check results dst_files = sorted(str(x.relative_to(tmp_path)) for x in dst.glob("**/*")) From 1690e76e1f4afab0b3716af72b4446076fb2b00a Mon Sep 17 00:00:00 2001 From: Andreas Poehlmann Date: Sun, 11 Jan 2026 12:49:15 +0100 Subject: [PATCH 06/15] upath.implementations.cloud: fix S3Path.copy --- upath/implementations/cloud.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/upath/implementations/cloud.py b/upath/implementations/cloud.py index d04b6ae1..ab20f807 100644 --- a/upath/implementations/cloud.py +++ b/upath/implementations/cloud.py @@ -5,6 +5,7 @@ from collections.abc import Sequence from typing import TYPE_CHECKING from typing import Any +from typing import overload from upath import UnsupportedOperation from upath._chain import DEFAULT_CHAIN_PARSER @@ -13,9 +14,12 @@ from upath.types import JoinablePathLike from upath.types import OnNameCollisionFunc from upath.types import ReadablePath +from upath.types import SupportsPathLike +from upath.types import WritablePath if TYPE_CHECKING: from typing import Literal + from typing import TypeVar if sys.version_info >= (3, 11): from typing import Self @@ -30,6 +34,8 @@ from upath.types.storage_options import HfStorageOptions from upath.types.storage_options import S3StorageOptions + _WT = TypeVar("_WT", bound="WritablePath") + __all__ = [ "CloudPath", "GCSPath", @@ -174,6 +180,29 @@ def _copy_from( **kwargs, ) + @overload + def copy(self, target: _WT, **kwargs: Any) -> _WT: ... + + @overload + def copy(self, target: SupportsPathLike | str, **kwargs: Any) -> Self: ... + + def copy(self, target: _WT | SupportsPathLike | str, **kwargs: Any) -> _WT | UPath: + """ + Recursively copy this file or directory tree to the given destination. + """ + # to allow _copy_from to check if a path isfile AND isdir + # we need to disable s3fs's dircache mechanism because it + # currently implements a XOR relation the two for objects + # ref: fsspec/s3fs#999 + sopts = dict(self.storage_options) + sopts["use_listings_cache"] = False + new_self = type(self)( + self.path, + protocol=self.protocol, # type: ignore + **sopts, + ) + return super(type(self), new_self).copy(target, **kwargs) + class AzurePath(CloudPath): __slots__ = () From 95e5cf842d900e34a1fc5b1f0c5b73800567931b Mon Sep 17 00:00:00 2001 From: Andreas Poehlmann Date: Sun, 11 Jan 2026 12:51:30 +0100 Subject: [PATCH 07/15] upath.implementations.cloud: S3Path._copy_from doesn't need the fix --- upath/implementations/cloud.py | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/upath/implementations/cloud.py b/upath/implementations/cloud.py index ab20f807..d19d0380 100644 --- a/upath/implementations/cloud.py +++ b/upath/implementations/cloud.py @@ -12,8 +12,6 @@ from upath._flavour import upath_strip_protocol from upath.core import UPath from upath.types import JoinablePathLike -from upath.types import OnNameCollisionFunc -from upath.types import ReadablePath from upath.types import SupportsPathLike from upath.types import WritablePath @@ -155,31 +153,6 @@ def __init__( if not self.drive and len(self.parts) > 1: raise ValueError("non key-like path provided (bucket/container missing)") - def _copy_from( - self, - source: ReadablePath, - follow_symlinks: bool = True, - on_name_collision: OnNameCollisionFunc | None = None, - **kwargs: Any, - ) -> None: - # to allow _copy_from to check if a path isfile AND isdir - # we need to disable s3fs's dircache mechanism because it - # currently implements a XOR relation the two for objects - # ref: fsspec/s3fs#999 - sopts = dict(self.storage_options) - sopts["use_listings_cache"] = False - new_self = type(self)( - self.path, - protocol=self.protocol, # type: ignore - **sopts, - ) - new_self._copy_from( - source, - follow_symlinks=follow_symlinks, - on_name_collision=on_name_collision, - **kwargs, - ) - @overload def copy(self, target: _WT, **kwargs: Any) -> _WT: ... From 2ca57d468c642ceb7a7bf301d94bc9861d252daa Mon Sep 17 00:00:00 2001 From: Andreas Poehlmann Date: Sun, 11 Jan 2026 12:52:27 +0100 Subject: [PATCH 08/15] upath.implementations.cloud: S3Path.copy ensure type in super --- upath/implementations/cloud.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/upath/implementations/cloud.py b/upath/implementations/cloud.py index d19d0380..0f76b4c1 100644 --- a/upath/implementations/cloud.py +++ b/upath/implementations/cloud.py @@ -174,7 +174,8 @@ def copy(self, target: _WT | SupportsPathLike | str, **kwargs: Any) -> _WT | UPa protocol=self.protocol, # type: ignore **sopts, ) - return super(type(self), new_self).copy(target, **kwargs) + assert type(self) is type(new_self) + return super(type(new_self), new_self).copy(target, **kwargs) class AzurePath(CloudPath): From 627de46769848f29a4851c256621e9dfcbff54e5 Mon Sep 17 00:00:00 2001 From: Andreas Poehlmann Date: Sun, 11 Jan 2026 12:58:29 +0100 Subject: [PATCH 09/15] upath.types: fix type error on py3.9 --- upath/types/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/upath/types/__init__.py b/upath/types/__init__.py index a1e3730b..77182777 100644 --- a/upath/types/__init__.py +++ b/upath/types/__init__.py @@ -6,6 +6,7 @@ from os import PathLike from typing import TYPE_CHECKING from typing import Any +from typing import Optional from typing import Protocol from typing import Union from typing import runtime_checkable @@ -129,5 +130,5 @@ def splitroot(self, path: JoinablePathLike) -> tuple[str, str, str]: ... OnNameCollisionFunc: TypeAlias = Callable[ - [ReadablePath, WritablePath], tuple[WritablePath | None, WritablePath | None] + [ReadablePath, WritablePath], tuple[Optional[WritablePath], Optional[WritablePath]] ] From 54cd04c217fb79d6fc22577e685ce1374209b604 Mon Sep 17 00:00:00 2001 From: Andreas Poehlmann Date: Sun, 11 Jan 2026 13:01:23 +0100 Subject: [PATCH 10/15] tests: fix windows test path comparision --- upath/tests/implementations/test_s3.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/upath/tests/implementations/test_s3.py b/upath/tests/implementations/test_s3.py index 63cd5289..a7fcc5ad 100644 --- a/upath/tests/implementations/test_s3.py +++ b/upath/tests/implementations/test_s3.py @@ -13,6 +13,7 @@ from ..utils import OverrideMeta from ..utils import extends_base from ..utils import overrides_base +from ..utils import posixify def silence_botocore_datetime_deprecation(cls): @@ -204,7 +205,7 @@ def on_collision_rename_file(src, dst): src.copy_into(dst, on_name_collision=on_collision_rename_file) # check results - dst_files = sorted(str(x.relative_to(tmp_path)) for x in dst.glob("**/*")) + dst_files = sorted(posixify(x.relative_to(tmp_path)) for x in dst.glob("**/*")) assert dst_files == [ "src", "src/common_prefix", From b75e08323ccc46b9138740f7a4627b18fa3c0187 Mon Sep 17 00:00:00 2001 From: Andreas Poehlmann Date: Sun, 11 Jan 2026 13:16:38 +0100 Subject: [PATCH 11/15] upath.types: correct __all__ --- upath/types/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/upath/types/__init__.py b/upath/types/__init__.py index 77182777..0bb07878 100644 --- a/upath/types/__init__.py +++ b/upath/types/__init__.py @@ -37,7 +37,7 @@ "PathParser", "UPathParser", "UNSET_DEFAULT", - "OnIsFileAndDirHandler", + "OnNameCollisionFunc", ] From 6fb606294c9be82fc965ba37a06d5bf81762c473 Mon Sep 17 00:00:00 2001 From: Andreas Poehlmann Date: Sun, 11 Jan 2026 13:26:35 +0100 Subject: [PATCH 12/15] tests: skip azurite version check for now --- upath/tests/conftest.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/upath/tests/conftest.py b/upath/tests/conftest.py index c9333c6f..a8ffa83b 100644 --- a/upath/tests/conftest.py +++ b/upath/tests/conftest.py @@ -385,8 +385,8 @@ def docker_azurite(azurite_credentials): image = "mcr.microsoft.com/azure-storage/azurite" container_name = "azure_test" cmd = ( - f"docker run --rm -d -p {AZURITE_PORT}:10000 --name {container_name} {image}" # noqa: E501 - " azurite-blob --loose --blobHost 0.0.0.0" # noqa: E501 + f"docker run --rm -d -p {AZURITE_PORT}:10000 --name {container_name} {image}:latest" # noqa: E501 + " azurite-blob --loose --blobHost 0.0.0.0 --skipApiVersionCheck" # noqa: E501 ) url = f"http://localhost:{AZURITE_PORT}" From 1f3a93fc3d0864ef2d2e62669fa2d8a9728d9b2e Mon Sep 17 00:00:00 2001 From: Andreas Poehlmann Date: Sun, 11 Jan 2026 17:52:36 +0100 Subject: [PATCH 13/15] tests: switch gcs fixtures to memory backend to support file/dir collisions --- upath/tests/conftest.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/upath/tests/conftest.py b/upath/tests/conftest.py index a8ffa83b..4fdeae62 100644 --- a/upath/tests/conftest.py +++ b/upath/tests/conftest.py @@ -238,9 +238,19 @@ def docker_gcs(): pytest.skip("docker not installed") container = "gcsfs_test" - cmd = ( - "docker run -d -p 4443:4443 --name gcsfs_test fsouza/fake-gcs-server:latest -scheme " # noqa: E501 - "http -public-host http://localhost:4443 -external-url http://localhost:4443" # noqa: E501 + cmd = " ".join( + [ + "docker", + "run", + "-d", + "-p 4443:4443", + "--name gcsfs_test", + "fsouza/fake-gcs-server:latest", + "-scheme http", + "-public-host http://localhost:4443", + "-external-url http://localhost:4443", + "-backend memory", + ] ) stop_docker(container) subprocess.check_output(shlex.split(cmd)) From aef48e5bc553572868c14c6afbbc77eed6451564 Mon Sep 17 00:00:00 2001 From: Andreas Poehlmann Date: Sun, 11 Jan 2026 22:27:24 +0100 Subject: [PATCH 14/15] tests: add GCSPath test for copy_into (xfail) --- upath/tests/implementations/test_gcs.py | 56 +++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/upath/tests/implementations/test_gcs.py b/upath/tests/implementations/test_gcs.py index 41d16c63..c7f13a82 100644 --- a/upath/tests/implementations/test_gcs.py +++ b/upath/tests/implementations/test_gcs.py @@ -1,3 +1,5 @@ +import warnings + import fsspec import pytest @@ -8,6 +10,7 @@ from ..utils import OverrideMeta from ..utils import extends_base from ..utils import overrides_base +from ..utils import posixify from ..utils import skip_on_windows @@ -49,3 +52,56 @@ def test_mkdir_in_empty_bucket(docker_gcs): endpoint_url=docker_gcs, token="anon", ).parent.mkdir(parents=True, exist_ok=True) + + +@skip_on_windows +@pytest.mark.xfail(reason="gcsfs returns isdir false") +def test_copy__object_key_collides_with_dir_prefix(docker_gcs, tmp_path): + gcs = fsspec.filesystem( + "gcs", + endpoint_url=docker_gcs, + token="anon", + use_listings_cache=False, + ) + bucket = "copy_into_collision_bucket" + gcs.mkdir(bucket) + # gcs.mkdir(bucket + "/src" + "/common_prefix/") + # object under common prefix as key + gcs.pipe_file(f"{bucket}/src/common_prefix", b"hello world") + # store more objects with same prefix + gcs.pipe_file(f"{bucket}/src/common_prefix/file1.txt", b"1") + gcs.pipe_file(f"{bucket}/src/common_prefix/file2.txt", b"2") + gcs.invalidate_cache() + + # make sure the sources have a collision + assert gcs.isfile(f"{bucket}/src/common_prefix") + assert gcs.isdir(f"{bucket}/src/common_prefix") # BROKEN in gcsfs + assert gcs.isfile(f"{bucket}/src/common_prefix/file1.txt") + assert gcs.isfile(f"{bucket}/src/common_prefix/file2.txt") + # prepare source and destination + src = UPath(f"gs://{bucket}/src", endpoint_url=docker_gcs, token="anon") + dst = UPath(tmp_path) + + def on_collision_rename_file(src, dst): + warnings.warn( + f"{src!s} collides with prefix. Renaming target file object to {dst!s}", + UserWarning, + stacklevel=3, + ) + return ( + dst.with_suffix(dst.suffix + ".COLLISION"), + dst, + ) + + # perform copy + src.copy_into(dst, on_name_collision=on_collision_rename_file) + + # check results + dst_files = sorted(posixify(x.relative_to(tmp_path)) for x in dst.glob("**/*")) + assert dst_files == [ + "src", + "src/common_prefix", + "src/common_prefix.COLLISION", + "src/common_prefix/file1.txt", + "src/common_prefix/file2.txt", + ] From e5bfe871c9d81255121a694322b8ff680952006b Mon Sep 17 00:00:00 2001 From: Andreas Poehlmann Date: Sun, 11 Jan 2026 22:34:08 +0100 Subject: [PATCH 15/15] tests: add AzurePath test for copy_into (xfail) --- upath/tests/implementations/test_azure.py | 57 +++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/upath/tests/implementations/test_azure.py b/upath/tests/implementations/test_azure.py index eb82e930..ee8ed625 100644 --- a/upath/tests/implementations/test_azure.py +++ b/upath/tests/implementations/test_azure.py @@ -1,3 +1,6 @@ +import warnings + +import fsspec import pytest from upath import UPath @@ -7,6 +10,7 @@ from ..utils import OverrideMeta from ..utils import extends_base from ..utils import overrides_base +from ..utils import posixify from ..utils import skip_on_windows @@ -61,3 +65,56 @@ def test_broken_mkdir(self): (path / "file").write_text("foo") assert path.exists() + + +@skip_on_windows +@pytest.mark.xfail(reason="adlfs returns isdir false") +def test_copy__object_key_collides_with_dir_prefix(azurite_credentials, tmp_path): + account_name, connection_string = azurite_credentials + storage_options = { + "account_name": account_name, + "connection_string": connection_string, + } + + az = fsspec.filesystem("az", **storage_options, use_listings_cache=False) + container = "copy-into-collision-container" + az.mkdir(container) + # store more objects with same prefix + az.pipe_file(f"{container}/src/common_prefix/file1.txt", b"1") + az.pipe_file(f"{container}/src/common_prefix/file2.txt", b"2") + # object under common prefix as key + az.pipe_file(f"{container}/src/common_prefix", b"hello world") + az.invalidate_cache() + + # make sure the sources have a collision + assert az.isfile(f"{container}/src/common_prefix") + assert az.isdir(f"{container}/src/common_prefix") + assert az.isfile(f"{container}/src/common_prefix/file1.txt") + assert az.isfile(f"{container}/src/common_prefix/file2.txt") + # prepare source and destination + src = UPath(f"az://{container}/src", **storage_options) + dst = UPath(tmp_path) + + def on_collision_rename_file(src, dst): + warnings.warn( + f"{src!s} collides with prefix. Renaming target file object to {dst!s}", + UserWarning, + stacklevel=3, + ) + return ( + dst.with_suffix(dst.suffix + ".COLLISION"), + dst, + ) + + # perform copy + src.copy_into(dst, on_name_collision=on_collision_rename_file) + + # check results + dst_files = sorted(posixify(x.relative_to(tmp_path)) for x in dst.glob("**/*")) + assert dst_files == [ + "src", + "src/common_prefix", + "src/common_prefix.COLLISION", + "src/common_prefix/file1.txt", + "src/common_prefix/file2.txt", + ]