From ab9838457db79b53d71df74c08f8d8a3ff9d9a33 Mon Sep 17 00:00:00 2001 From: "CasperWA[ai][bot]" Date: Mon, 11 May 2026 09:30:10 +0200 Subject: [PATCH 1/5] Replace pysftp with paramiko, lift paramiko ceiling to <6 (#682) pysftp 0.2.9 imports DSSKey from paramiko at module load time, but paramiko 4.0 removed DSSKey entirely. Replace the three pysftp calls in sftp.py with equivalent paramiko.SSHClient usage, remove pysftp from dependencies, and add types-paramiko to the mypy hook. --- .pre-commit-config.yaml | 1 + oteapi/strategies/download/sftp.py | 31 +++++++++++---------- pyproject.toml | 8 +----- tests/strategies/download/test_sftp.py | 37 +++++++++++++++++++------- 4 files changed, 46 insertions(+), 31 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f386d1f69..61aa74016 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -95,6 +95,7 @@ repos: - id: mypy exclude: ^tests/.*$ additional_dependencies: + - "types-paramiko" - "types-requests" - "pydantic>=2,<3" diff --git a/oteapi/strategies/download/sftp.py b/oteapi/strategies/download/sftp.py index da94d2089..f9c4652b7 100644 --- a/oteapi/strategies/download/sftp.py +++ b/oteapi/strategies/download/sftp.py @@ -6,7 +6,7 @@ from tempfile import NamedTemporaryFile from typing import Annotated -import pysftp +import paramiko from pydantic import Field from pydantic.dataclasses import dataclass from pydantic.networks import AnyUrl, UrlConstraints @@ -65,28 +65,31 @@ def initialize(self) -> AttrDict: def get(self) -> SFTPContent: """Download via sftp""" + url = self.download_config.downloadUrl + if url.host is None or url.path is None: + raise ValueError(f"Invalid (S)FTP URL (missing host or path): {url!r}") + cache = DataCache(self.download_config.configuration.datacache_config) if cache.config.accessKey and cache.config.accessKey in cache: key = cache.config.accessKey else: - # Setup connection options - cnopts = pysftp.CnOpts() - cnopts.hostkeys = None - - # open connection and store data locally - with pysftp.Connection( - host=self.download_config.downloadUrl.host, - username=self.download_config.downloadUrl.username, - password=self.download_config.downloadUrl.password, - port=self.download_config.downloadUrl.port, - cnopts=cnopts, - ) as sftp: + with paramiko.SSHClient() as client: + client.set_missing_host_key_policy( + paramiko.AutoAddPolicy() + ) # nosec B507 + client.connect( + hostname=url.host, + username=url.username, + password=url.password, + port=url.port or 22, + ) # Because of insane locking on Windows, we have to close # the downloaded file before adding it to the cache with NamedTemporaryFile(prefix="oteapi-sftp-", delete=False) as handle: localpath = Path(handle.name).resolve() try: - sftp.get(self.download_config.downloadUrl.path, localpath=localpath) + with client.open_sftp() as sftp: + sftp.get(url.path, str(localpath)) key = cache.add(localpath.read_bytes()) finally: localpath.unlink() diff --git a/pyproject.toml b/pyproject.toml index be49acaf6..233b76ef9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,10 +31,9 @@ dependencies = [ # Strategy dependencies "celery>=5.6.0,<6", "openpyxl>=3.1.5,<4", - "paramiko<4", # version 4+ has some breaking changes and is not yet supported by pysftp + "paramiko<6", "Pillow>=10.4.0,<13", "psycopg[binary]>=3.2.6,<4", - "pysftp~=0.2.9", "requests>=2.32.3,<3", ] @@ -129,11 +128,6 @@ addopts = "-rs --cov-report=term-missing:skip-covered --no-cov-on-fail" filterwarnings = [ # Treat all warnings as errors "error", - - # Ignore UserWarning from pysftp concerning known_hosts - # This is usually only a problem in testing environments, - # but we don't want to fail the tests locally if a known_hosts file is missing - "ignore:.*Failed to load HostKeys.*:UserWarning", ] [tool.ruff.lint] diff --git a/tests/strategies/download/test_sftp.py b/tests/strategies/download/test_sftp.py index a61706363..601fb982a 100644 --- a/tests/strategies/download/test_sftp.py +++ b/tests/strategies/download/test_sftp.py @@ -10,11 +10,8 @@ import pytest -class MockSFTPConnection: - """A mockup of pysftp.Connection, as used in SFTPStrategy.get().""" - - def __init__(self, **kwargs) -> None: - """Dummy initializer passing through any kwargs.""" +class MockSFTPClient: + """A mockup of paramiko.SFTPClient, as used in SFTPStrategy.get().""" def __enter__(self): """Entry into context manager.""" @@ -23,8 +20,8 @@ def __enter__(self): def __exit__(self, exc_type: object, exc_value: object, traceback: object) -> None: """Dummy exit from context manager.""" - def get(self, remotepath: str, localpath: Path) -> None: - """A mockup of pysftp.Connection.get() as called in SFTPStrategy.get().""" + def get(self, remotepath: str, localpath: str) -> None: + """A mockup of SFTPClient.get() as called in SFTPStrategy.get().""" from pathlib import Path, PureWindowsPath from shutil import copyfile @@ -35,15 +32,36 @@ def get(self, remotepath: str, localpath: Path) -> None: copyfile(remote_as_path, localpath) +class MockSSHClient: + """A mockup of paramiko.SSHClient, as used in SFTPStrategy.get().""" + + def __enter__(self): + """Entry into context manager.""" + return self + + def __exit__(self, exc_type: object, exc_value: object, traceback: object) -> None: + """Dummy exit from context manager.""" + + def set_missing_host_key_policy(self, _: object) -> None: + """Dummy set_missing_host_key_policy.""" + + def connect(self, **_kwargs: object) -> None: + """Dummy connect.""" + + def open_sftp(self) -> MockSFTPClient: + """Return a mock SFTP client.""" + return MockSFTPClient() + + def test_sftp(monkeypatch: pytest.MonkeyPatch, static_files: Path) -> None: """Test `sftp.py` download strategy by mocking download, and comparing data mock downloaded from a local file with data obtained from opening the file directly.""" - import pysftp + import paramiko from oteapi.datacache.datacache import DataCache from oteapi.strategies.download.sftp import SFTPStrategy - monkeypatch.setattr(pysftp, "Connection", MockSFTPConnection) + monkeypatch.setattr(paramiko, "SSHClient", MockSSHClient) sample_file = static_files / "sample_1280_853.jpeg" @@ -53,7 +71,6 @@ def test_sftp(monkeypatch: pytest.MonkeyPatch, static_files: Path) -> None: } # Call the strategy and get the datacache key - # datacache_key: str = SFTPStrategy(config).get().get("key", "") datacache_key: str = SFTPStrategy(config).get()["key"] # Retrieve the content from the datacache using the key datacache = DataCache() From 816b0c7e342b8d1fb24c47c406f93b18c8072a31 Mon Sep 17 00:00:00 2001 From: "CasperWA[ai][bot]" Date: Mon, 11 May 2026 10:08:22 +0200 Subject: [PATCH 2/5] Widen URL guard to catch empty host/path strings --- oteapi/strategies/download/sftp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/oteapi/strategies/download/sftp.py b/oteapi/strategies/download/sftp.py index f9c4652b7..a2fa1cbd5 100644 --- a/oteapi/strategies/download/sftp.py +++ b/oteapi/strategies/download/sftp.py @@ -66,7 +66,7 @@ def initialize(self) -> AttrDict: def get(self) -> SFTPContent: """Download via sftp""" url = self.download_config.downloadUrl - if url.host is None or url.path is None: + if not url.host or not url.path: raise ValueError(f"Invalid (S)FTP URL (missing host or path): {url!r}") cache = DataCache(self.download_config.configuration.datacache_config) From ac682f9bac2a393bb46d4ad2d01ff1dcd2b74f80 Mon Sep 17 00:00:00 2001 From: "CasperWA[ai][bot]" Date: Mon, 11 May 2026 10:41:56 +0200 Subject: [PATCH 3/5] Avoid exposing credentials in URL validation error message --- oteapi/strategies/download/sftp.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/oteapi/strategies/download/sftp.py b/oteapi/strategies/download/sftp.py index a2fa1cbd5..c32092433 100644 --- a/oteapi/strategies/download/sftp.py +++ b/oteapi/strategies/download/sftp.py @@ -67,7 +67,10 @@ def get(self) -> SFTPContent: """Download via sftp""" url = self.download_config.downloadUrl if not url.host or not url.path: - raise ValueError(f"Invalid (S)FTP URL (missing host or path): {url!r}") + raise ValueError( + "Invalid (S)FTP URL (missing host or path): " + f"host={url.host!r}, path={url.path!r}" + ) cache = DataCache(self.download_config.configuration.datacache_config) if cache.config.accessKey and cache.config.accessKey in cache: From 3f8894a3cd88a3edca9449fcfe9784c950e42ada Mon Sep 17 00:00:00 2001 From: "CasperWA[ai][bot]" Date: Mon, 11 May 2026 11:02:31 +0200 Subject: [PATCH 4/5] Add .claude/ to .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 890f1f0af..2d3acf80a 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ data/** # Edit at https://www.toptal.com/developers/gitignore?templates=python,visualstudio,vscode,emacs .docker +.claude/ ### Emacs ### # -*- mode: gitignore; -*- From 1c52c66b27bfaf20a04519a28706a545c64531aa Mon Sep 17 00:00:00 2001 From: "CasperWA[ai][bot]" Date: Mon, 11 May 2026 12:07:25 +0200 Subject: [PATCH 5/5] Add lower bound paramiko>=4 to pin past DSSKey removal --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 233b76ef9..fba17e8e3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,7 +31,7 @@ dependencies = [ # Strategy dependencies "celery>=5.6.0,<6", "openpyxl>=3.1.5,<4", - "paramiko<6", + "paramiko>=4,<6", "Pillow>=10.4.0,<13", "psycopg[binary]>=3.2.6,<4", "requests>=2.32.3,<3",