diff --git a/marimo/_plugins/ui/_impl/file_browser.py b/marimo/_plugins/ui/_impl/file_browser.py index 085c86adb2d..4a9a487a922 100644 --- a/marimo/_plugins/ui/_impl/file_browser.py +++ b/marimo/_plugins/ui/_impl/file_browser.py @@ -1,6 +1,7 @@ # Copyright 2026 Marimo. All rights reserved. from __future__ import annotations +import re from collections.abc import Callable, Sequence from dataclasses import dataclass from pathlib import Path @@ -167,6 +168,12 @@ class file_browser( filetypes (Sequence[str], optional): The file types to display in each directory; for example, filetypes=[".txt", ".csv"]. If None, all files are displayed. Defaults to None. + filter (str | re.Pattern | Callable[[Path], bool], optional): An + additional filter applied to files (directories are always shown + for navigation). Accepts a regex string or compiled pattern + matched against the filename, or a callable that receives the + file's `Path` and returns `True` to include it. Applied together + with `filetypes` (both must match). Defaults to None. selection_mode (str | Sequence[str], optional): Which kinds of entries the user can select. Accepts one of "file" (default), "directory", "all", or a list/tuple containing "file" and/or "directory". @@ -200,6 +207,7 @@ def __init__( multiple: bool = True, restrict_navigation: bool = False, *, + filter: str | re.Pattern[str] | Callable[[Path], bool] | None = None, # noqa: A002 limit: int | None = None, label: str = "", on_change: Callable[[Sequence[FileBrowserFileInfo]], None] @@ -247,6 +255,20 @@ def __init__( self._restrict_navigation = restrict_navigation self._ignore_empty_dirs = ignore_empty_dirs + if filter is None: + self._filter: re.Pattern[str] | Callable[[Path], bool] | None = ( + None + ) + elif isinstance(filter, str): + self._filter = re.compile(filter) + elif isinstance(filter, re.Pattern) or callable(filter): + self._filter = filter + else: + raise ValueError( + f"filter must be a string, re.Pattern, or callable, " + f"got {type(filter).__name__}." + ) + # Smart default limit based on path type if limit is None: if is_cloudpath(self._initial_path): @@ -294,6 +316,28 @@ def _create_path(self, path_str: str | Path) -> Path: path = self._path_cls(path_str, **kwargs) return path + def _passes_filter(self, file: Path) -> bool: + """Return True if `file` passes the configured `filter`. + + Centralizes filter evaluation so `_list_directory` and + `_has_files_recursive` stay in sync. + + A regex filter is applied with `re.Pattern.search`, so it matches + anywhere within the filename; anchor with `^`/`$` to match the whole + name. A callable filter that raises an `OSError` (e.g. a broken symlink) + is treated as "does not match" so one bad file can't take down the + listing of the other files; any other exception propagates. + """ + if self._filter is None: + return True + if isinstance(self._filter, re.Pattern): + return self._filter.search(file.name) is not None + try: + return self._filter(file) + except OSError as e: + LOGGER.debug(f"file_browser filter could not evaluate {file}: {e}") + return False + def _has_files_recursive( self, directory: Path, max_depth: int = 100 ) -> bool: @@ -330,6 +374,9 @@ def _has_files_recursive( and item.suffix.lower() not in self._filetypes ): continue + # Apply regex or callable filter + if not self._passes_filter(item): + continue return True elif item.is_dir() and not item.is_symlink(): # Skip directory symlinks to avoid infinite loops @@ -386,6 +433,10 @@ def _list_directory( if extension.lower() not in self._filetypes: continue + # Apply regex or callable filter to files + if not is_directory and not self._passes_filter(file): + continue + # Skip empty directories if ignore_empty_dirs is enabled if self._ignore_empty_dirs and is_directory: if not self._has_files_recursive(file): diff --git a/tests/_plugins/ui/_impl/test_file_browser.py b/tests/_plugins/ui/_impl/test_file_browser.py index 92491486941..69c6916e8c3 100644 --- a/tests/_plugins/ui/_impl/test_file_browser.py +++ b/tests/_plugins/ui/_impl/test_file_browser.py @@ -1232,3 +1232,141 @@ def test_list_directory_directory_only_unchanged( response = fb._list_directory(ListDirectoryArgs(path=str(tmp_path))) names = {f["name"] for f in response.files} assert names == {"sub"} + + +class TestFilterParameter: + def test_filter_regex_string(self, tmp_path: Path) -> None: + """Regex string filter matches filenames.""" + (tmp_path / "report_2024.csv").touch() + (tmp_path / "report_2025.csv").touch() + (tmp_path / "notes.txt").touch() + + fb = file_browser(initial_path=tmp_path, filter=r"report_\d{4}\.csv") + response = fb._list_directory(ListDirectoryArgs(path=str(tmp_path))) + names = {f["name"] for f in response.files if not f["is_directory"]} + assert names == {"report_2024.csv", "report_2025.csv"} + assert "notes.txt" not in names + + def test_filter_compiled_pattern(self, tmp_path: Path) -> None: + """Compiled re.Pattern works the same as a string.""" + import re as _re + + (tmp_path / "train.parquet").touch() + (tmp_path / "test.parquet").touch() + (tmp_path / "readme.md").touch() + + pattern = _re.compile(r"\.(parquet)$", _re.IGNORECASE) + fb = file_browser(initial_path=tmp_path, filter=pattern) + response = fb._list_directory(ListDirectoryArgs(path=str(tmp_path))) + names = {f["name"] for f in response.files if not f["is_directory"]} + assert names == {"train.parquet", "test.parquet"} + + def test_filter_callable(self, tmp_path: Path) -> None: + """Callable filter receives a Path and returns bool.""" + (tmp_path / "big_file.bin").write_bytes(b"x" * 100) + (tmp_path / "small_file.bin").write_bytes(b"x" * 10) + (tmp_path / "tiny.txt").write_bytes(b"hi") + + fb = file_browser( + initial_path=tmp_path, + filter=lambda p: p.stat().st_size >= 50, + ) + response = fb._list_directory(ListDirectoryArgs(path=str(tmp_path))) + names = {f["name"] for f in response.files if not f["is_directory"]} + assert names == {"big_file.bin"} + + def test_filter_callable_oserror_is_isolated(self, tmp_path: Path) -> None: + """A callable that raises OSError on one file must not crash the listing. + + The offending file is treated as "no match" and the rest of the + directory is still returned (e.g. a broken symlink shouldn't hide the + other files). + """ + (tmp_path / "good.txt").touch() + (tmp_path / "bad.txt").touch() + + def flaky(path: Path) -> bool: + if path.name == "bad.txt": + raise OSError("broken symlink") + return True + + fb = file_browser(initial_path=tmp_path, filter=flaky) + response = fb._list_directory(ListDirectoryArgs(path=str(tmp_path))) + names = {f["name"] for f in response.files if not f["is_directory"]} + assert names == {"good.txt"} + + def test_filter_callable_non_oserror_propagates( + self, tmp_path: Path + ) -> None: + """A non-OSError from the filter callable propagates (not swallowed).""" + (tmp_path / "file.txt").touch() + + def boom(_path: Path) -> bool: + raise ValueError("programming error") + + fb = file_browser(initial_path=tmp_path, filter=boom) + with pytest.raises(ValueError, match="programming error"): + fb._list_directory(ListDirectoryArgs(path=str(tmp_path))) + + def test_filter_does_not_hide_directories(self, tmp_path: Path) -> None: + """Directories are always shown regardless of filter.""" + sub = tmp_path / "subdir" + sub.mkdir() + (tmp_path / "file.txt").touch() + + fb = file_browser(initial_path=tmp_path, filter=r"\.csv$") + response = fb._list_directory(ListDirectoryArgs(path=str(tmp_path))) + names = {f["name"] for f in response.files} + assert "subdir" in names + assert "file.txt" not in names + + def test_filter_and_filetypes_both_applied(self, tmp_path: Path) -> None: + """filter and filetypes must both match (AND semantics).""" + (tmp_path / "train_v1.csv").touch() + (tmp_path / "train_v2.csv").touch() + (tmp_path / "test_v1.csv").touch() + (tmp_path / "train_v1.txt").touch() + + fb = file_browser( + initial_path=tmp_path, + filetypes=[".csv"], + filter=r"^train_", + ) + response = fb._list_directory(ListDirectoryArgs(path=str(tmp_path))) + names = {f["name"] for f in response.files if not f["is_directory"]} + assert names == {"train_v1.csv", "train_v2.csv"} + + def test_filter_none_shows_all_files(self, tmp_path: Path) -> None: + """Default filter=None does not restrict files.""" + (tmp_path / "a.csv").touch() + (tmp_path / "b.txt").touch() + + fb = file_browser(initial_path=tmp_path, filter=None) + response = fb._list_directory(ListDirectoryArgs(path=str(tmp_path))) + names = {f["name"] for f in response.files if not f["is_directory"]} + assert names == {"a.csv", "b.txt"} + + def test_filter_invalid_type_raises(self, tmp_path: Path) -> None: + """Non-string/pattern/callable raises ValueError.""" + with pytest.raises((ValueError, TypeError)): + file_browser(initial_path=tmp_path, filter=123) # type: ignore[arg-type] + + def test_filter_with_ignore_empty_dirs(self, tmp_path: Path) -> None: + """ignore_empty_dirs respects the filter when scanning recursively.""" + matched_dir = tmp_path / "matched" + matched_dir.mkdir() + (matched_dir / "data.csv").touch() + + unmatched_dir = tmp_path / "unmatched" + unmatched_dir.mkdir() + (unmatched_dir / "notes.txt").touch() + + fb = file_browser( + initial_path=tmp_path, + filter=r"\.csv$", + ignore_empty_dirs=True, + ) + response = fb._list_directory(ListDirectoryArgs(path=str(tmp_path))) + dir_names = {f["name"] for f in response.files if f["is_directory"]} + assert "matched" in dir_names + assert "unmatched" not in dir_names