From 2bd05fbda04b6fa212a0628920e2e3fe4b97eda7 Mon Sep 17 00:00:00 2001 From: Kevin Wenner Date: Sat, 27 Sep 2025 14:00:18 +0000 Subject: [PATCH 1/3] add PersistentDB class & run as nobody under root --- README.md | 30 ++++ src/tinypg/__init__.py | 3 +- src/tinypg/config.py | 56 +++++- src/tinypg/core.py | 356 ++++++++++++++++++++++++++++++++++++--- tests/test_persistent.py | 43 +++++ 5 files changed, 460 insertions(+), 28 deletions(-) create mode 100644 tests/test_persistent.py diff --git a/README.md b/README.md index 88a0660..f243302 100644 --- a/README.md +++ b/README.md @@ -79,6 +79,36 @@ finally: db.stop() ``` +## Development + +TinyPG uses [uv](https://docs.astral.sh/uv/) to manage its virtual environment +and development tooling. To contribute to the project locally: + +1. Install uv (see the [uv installation guide](https://docs.astral.sh/uv/getting-started/installation/)). +2. Create the virtual environment and install all extras plus development + dependencies: + + ```bash + uv sync --all-extras --dev + ``` + + This command creates a project-local `.venv/` directory that contains every + dependency required by the test suite and linters. + +3. Run the test suite through uv to ensure the managed environment is used: + + ```bash + uv run pytest + ``` + +4. The continuous integration workflow also runs formatting checks. Reproduce + them locally with: + + ```bash + uv run black --check . + uv run isort --check-only . + ``` + ## Requirements - Python 3.8+ diff --git a/src/tinypg/__init__.py b/src/tinypg/__init__.py index c2cf4c4..5d7e6a4 100644 --- a/src/tinypg/__init__.py +++ b/src/tinypg/__init__.py @@ -161,7 +161,7 @@ from .config import TinyPGConfig from .context import async_database, database, database_pool -from .core import AsyncEphemeralDB, EphemeralDB +from .core import AsyncEphemeralDB, EphemeralDB, PersistentDB from .exceptions import ( BinaryNotFoundError, DatabaseStartError, @@ -181,6 +181,7 @@ __all__ = [ "EphemeralDB", "AsyncEphemeralDB", + "PersistentDB", "ExtensionSpec", "ExtensionManifest", "get_available_extension", diff --git a/src/tinypg/config.py b/src/tinypg/config.py index 59cd519..1c8f5f0 100644 --- a/src/tinypg/config.py +++ b/src/tinypg/config.py @@ -1,12 +1,23 @@ -""" -TinyPG configuration management. -""" +"""TinyPG configuration management.""" import os +import shutil +import tempfile from pathlib import Path from typing import Optional +def _default_cache_dir() -> Path: + """Compute the default cache directory for downloaded PostgreSQL binaries.""" + + if hasattr(os, "geteuid") and os.geteuid() == 0: + # When running as root prefer a shared location so that unprivileged + # helper processes can still access the binaries. + return Path(tempfile.gettempdir()) / "tinypg" + + return Path.home() / ".tinypg" + + class TinyPGConfig: """Global configuration for TinyPG.""" @@ -14,7 +25,7 @@ class TinyPGConfig: default_version: str = "15" # Cache directory for PostgreSQL binaries - cache_dir: Path = Path.home() / ".tinypg" + cache_dir: Path = _default_cache_dir() # Automatic cleanup of databases auto_cleanup: bool = True @@ -28,6 +39,10 @@ class TinyPGConfig: # System temp directory override system_temp_dir: Optional[str] = None + # Runtime user/group used when dropping privileges for PostgreSQL helpers + runtime_user: Optional[str] = None + runtime_group: Optional[str] = None + @classmethod def set_cache_dir(cls, path: Path) -> None: """Set the directory for caching PostgreSQL binaries.""" @@ -50,4 +65,37 @@ def get_temp_dir(cls) -> Path: def get_cache_dir(cls) -> Path: """Get the cache directory, creating it if necessary.""" cls.cache_dir.mkdir(parents=True, exist_ok=True) + cls.cache_dir.chmod(0o755) + cls._migrate_legacy_cache() return cls.cache_dir + + @classmethod + def set_runtime_identity(cls, user: str, group: Optional[str] = None) -> None: + """Configure the user/group used to run PostgreSQL helpers.""" + + cls.runtime_user = user + cls.runtime_group = group + + @classmethod + def _migrate_legacy_cache(cls) -> None: + """Copy binaries from the legacy cache directory if necessary.""" + + legacy_dir = Path.home() / ".tinypg" + + if legacy_dir == cls.cache_dir or not legacy_dir.exists(): + return + + try: + for entry in legacy_dir.iterdir(): + destination = cls.cache_dir / entry.name + + if destination.exists(): + continue + + if entry.is_dir(): + shutil.copytree(entry, destination, dirs_exist_ok=True) + else: + shutil.copy2(entry, destination) + except OSError: + # Migration is best-effort; fallback to downloading if copying fails. + pass diff --git a/src/tinypg/core.py b/src/tinypg/core.py index fe6d17e..eec49d5 100644 --- a/src/tinypg/core.py +++ b/src/tinypg/core.py @@ -2,7 +2,9 @@ import asyncio import getpass +import grp import os +import pwd import shutil import signal import subprocess @@ -62,6 +64,44 @@ def __init__( self.version = version or TinyPGConfig.default_version self.keep_data = keep_data self._extensions = self._normalize_extensions(extensions) + self._connection_user = getpass.getuser() + + # Determine the runtime user for PostgreSQL processes. When running as + # root (as happens inside many CI environments) we drop privileges to a + # less privileged user so that ``initdb`` and ``postgres`` accept the + # invocation. + self._runtime_uid = os.geteuid() + self._runtime_gid = os.getegid() + self._runtime_user = pwd.getpwuid(self._runtime_uid).pw_name + self._runtime_group = grp.getgrgid(self._runtime_gid).gr_name + self._drop_privileges = False + + if self._runtime_uid == 0: + configured_user = ( + TinyPGConfig.runtime_user + or os.environ.get("TINYPGRUNTIME_USER") + or "nobody" + ) + user_info = pwd.getpwnam(configured_user) + self._runtime_uid = user_info.pw_uid + self._runtime_user = user_info.pw_name + + configured_group = TinyPGConfig.runtime_group or os.environ.get( + "TINYPGRUNTIME_GROUP" + ) + if configured_group: + group_info = grp.getgrnam(configured_group) + self._runtime_gid = group_info.gr_gid + self._runtime_group = group_info.gr_name + else: + self._runtime_gid = user_info.pw_gid + self._runtime_group = grp.getgrgid(self._runtime_gid).gr_name + + self._drop_privileges = True + + # Ensure the cache directory is accessible when running helper + # processes under a different user. + self._ensure_runtime_access(TinyPGConfig.get_cache_dir()) # Runtime state self._data_dir: Optional[Path] = data_dir @@ -115,11 +155,15 @@ def start(self) -> str: except Exception as e: # Clean up on failure - self.stop() + self._stop_impl() raise DatabaseStartError(f"Failed to start database: {e}") def stop(self) -> None: """Stop the database and clean up resources.""" + self._stop_impl() + + def _stop_impl(self) -> None: + """Internal implementation of :meth:`stop`.""" if not self._is_running: return @@ -184,7 +228,7 @@ def execute_sql(self, statement: Union[str, "Composable"]) -> None: import psycopg2 try: - user = getpass.getuser() + user = self._connection_user # Connect to the postgres database if self.port: @@ -243,6 +287,119 @@ def _normalize_extensions( return normalized + def _build_command_environment( + self, extra_env: Optional[Dict[str, str]] = None + ) -> Dict[str, str]: + """Create an environment mapping for PostgreSQL commands.""" + + env = os.environ.copy() + + if self.port: + env["PGPORT"] = str(self.port) + + if self._temp_dir: + env["PGHOST"] = str(self._temp_dir) + + if self._drop_privileges: + env.setdefault("HOME", str(self._temp_dir or self._data_dir or Path("."))) + env["USER"] = self._runtime_user + env["LOGNAME"] = self._runtime_user + + if extra_env: + env.update(extra_env) + + return env + + def _run_command( + self, + args: Sequence[str], + *, + cwd: Optional[Path] = None, + env: Optional[Dict[str, str]] = None, + capture_output: bool = False, + check: bool = True, + timeout: Optional[float] = None, + ) -> subprocess.CompletedProcess: + """Run a PostgreSQL helper command, dropping privileges if required.""" + + if env is None: + env = self._build_command_environment() + + run_kwargs: Dict[str, Any] = { + "cwd": cwd, + "env": env, + "check": check, + "timeout": timeout, + } + + if capture_output: + run_kwargs["capture_output"] = True + + if self._drop_privileges: + uid = self._runtime_uid + gid = self._runtime_gid + username = self._runtime_user + + def demote() -> None: + try: + groups = os.getgrouplist(username, gid) + except OSError: + groups = [gid] + + os.setgroups(groups) + os.setgid(gid) + os.setuid(uid) + + run_kwargs["preexec_fn"] = demote + + return subprocess.run( + args, **{k: v for k, v in run_kwargs.items() if v is not None} + ) + + def _ensure_directory_owner( + self, path: Path, mode: int = 0o700, *, create: bool = True + ) -> None: + """Ensure a directory exists with the correct ownership and mode.""" + + if create: + path.mkdir(parents=True, exist_ok=True) + + if not path.exists(): + return + + os.chmod(path, mode) + + if self._drop_privileges: + shutil.chown(path, self._runtime_uid, self._runtime_gid) + + def _ensure_runtime_access(self, path: Path) -> None: + """Ensure the runtime PostgreSQL user can traverse the given path.""" + + if not self._drop_privileges: + return + + try: + resolved = path.resolve() + except FileNotFoundError: + resolved = path + + temp_root = TinyPGConfig.get_temp_dir().resolve() + + if not resolved.is_relative_to(temp_root): + return + + current = resolved + + while True: + if current.exists(): + mode = current.stat().st_mode + os.chmod(current, mode | 0o111) + + if current == temp_root or current.parent == current: + break + + current = current.parent + def load_sql_file(self, file_path: Path) -> None: """Load and execute SQL from a file.""" if not file_path.exists(): @@ -260,6 +417,11 @@ def _initialize_database(self) -> Path: self._temp_dir = tempfile.mkdtemp(prefix="tinypg.") temp_path = Path(self._temp_dir) + # Ensure the PostgreSQL runtime user owns the temp directory so that + # initdb can create the cluster even when the main process runs as + # root. + self._ensure_directory_owner(temp_path, create=False) + # Data directory for this PostgreSQL version data_dir = temp_path / self.version @@ -267,7 +429,7 @@ def _initialize_database(self) -> Path: # Run initdb initdb_path = PostgreSQLBinaries.get_binary_path("initdb", self.version) - subprocess.run( + self._run_command( [ str(initdb_path), "--nosync", @@ -277,10 +439,12 @@ def _initialize_database(self) -> Path: "UNICODE", "-A", "trust", + "-U", + self._connection_user, ], - check=True, capture_output=True, cwd=temp_path, + env=self._build_command_environment(), ) # Configure PostgreSQL for ephemeral use @@ -341,10 +505,10 @@ def _start_postgres_server(self) -> None: log_file = self._data_dir / "postgres.log" # Start server - subprocess.run( + self._run_command( [ str(pg_ctl_path), - "-W", # Don't wait for server to start + "-w", # Wait for server to become ready "-o", " ".join(server_opts), "-s", # Silent mode @@ -354,12 +518,12 @@ def _start_postgres_server(self) -> None: str(log_file), "start", ], - check=True, capture_output=True, + env=self._build_command_environment(), ) - # Give PostgreSQL more time to start, especially with persistent data - time.sleep(0.5) + # Give PostgreSQL a brief moment after pg_ctl reports success. + time.sleep(0.1) except subprocess.CalledProcessError as e: raise DatabaseStartError(f"Failed to start PostgreSQL server: {e}") @@ -368,12 +532,10 @@ def _stop_postgres_server(self) -> None: """Stop the PostgreSQL server.""" try: pg_ctl_path = PostgreSQLBinaries.get_binary_path("pg_ctl", self.version) - env = self._get_pg_environment() - subprocess.run( - [str(pg_ctl_path), "-W", "-D", str(self._data_dir), "stop"], - env=env, - check=True, + self._run_command( + [str(pg_ctl_path), "-w", "-D", str(self._data_dir), "stop"], + env=self._build_command_environment(), capture_output=True, timeout=30, ) @@ -395,19 +557,12 @@ def _setup_cleanup(self) -> None: def _get_pg_environment(self) -> Dict[str, str]: """Get environment variables for PostgreSQL commands.""" - env = os.environ.copy() - - if self.port: - env["PGPORT"] = str(self.port) - - if self._temp_dir: - env["PGHOST"] = self._temp_dir - return env + return self._build_command_environment() def _build_connection_info(self) -> Dict[str, Any]: """Build connection information dictionary.""" - user = getpass.getuser() + user = self._connection_user if self.port: # Network connection @@ -480,3 +635,158 @@ async def __aenter__(self): async def __aexit__(self, exc_type, exc_val, exc_tb): """Async context manager exit.""" await self.stop() + + +class PersistentDB(EphemeralDB): + """Manage a reusable PostgreSQL database for local development.""" + + def __init__( + self, + name: str, + *, + base_dir: Optional[Path] = None, + port: Optional[int] = None, + postgres_args: Optional[List[str]] = None, + version: str = None, + extensions: Optional[Sequence[ExtensionInput]] = None, + ) -> None: + """ + Create (or reuse) a persistent PostgreSQL database instance. + + Args: + name: Identifier used to group database files on disk. + base_dir: Optional directory used to store persistent data. When + omitted the database is stored inside ``TinyPGConfig``'s cache + directory under ``persistent//``. + port: TCP port for the database (auto-assigned when ``None``). + postgres_args: Additional arguments passed to the ``postgres`` + server process. + version: PostgreSQL version identifier. Defaults to the + ``tinypg.config.TinyPGConfig`` value when ``None``. + extensions: Extensions to install after the server starts. + """ + + version = version or TinyPGConfig.default_version + + persistent_root = ( + Path(base_dir) + if base_dir is not None + else TinyPGConfig.get_cache_dir() / "persistent" / name + ) + persistent_root.mkdir(parents=True, exist_ok=True) + + version_dir = persistent_root / version + version_dir.mkdir(parents=True, exist_ok=True) + data_dir_path = version_dir / "data" + data_dir_exists = data_dir_path.exists() + + super().__init__( + port=port, + cleanup_timeout=0, + postgres_args=postgres_args, + data_dir=data_dir_path if data_dir_exists else None, + version=version, + keep_data=True, + extensions=extensions, + ) + + self._name = name + self._version_dir = version_dir + self._data_dir_path = data_dir_path + + self._ensure_runtime_access(self._version_dir) + # Ensure directories are owned by the runtime PostgreSQL user so that + # subsequent invocations can reuse the data safely. + self._ensure_directory_owner(persistent_root, mode=0o755, create=False) + self._ensure_directory_owner(self._version_dir, mode=0o755, create=False) + if data_dir_exists: + self._ensure_directory_owner(self._data_dir_path, create=False) + self._data_dir = self._data_dir_path + + self._temp_dir = str(self._version_dir) + + def start(self) -> str: + """Start the persistent database and return the connection URI.""" + + # Ensure socket directory is always set to the persistent location. + self._temp_dir = str(self._version_dir) + self._ensure_existing_instance_stopped() + return super().start() + + def _initialize_database(self) -> Path: + """Initialize the persistent PostgreSQL database cluster.""" + + data_dir = self._data_dir_path + self._ensure_directory_owner(self._version_dir, mode=0o755) + self._temp_dir = str(self._version_dir) + + pg_version_file = data_dir / "PG_VERSION" + if pg_version_file.exists(): + self._ensure_directory_owner(data_dir, create=False) + return data_dir + + if data_dir.exists(): + shutil.rmtree(data_dir) + + initdb_path = PostgreSQLBinaries.get_binary_path("initdb", self.version) + + self._run_command( + [ + str(initdb_path), + "-D", + str(data_dir), + "-E", + "UNICODE", + "-A", + "trust", + "-U", + self._connection_user, + ], + capture_output=True, + cwd=self._version_dir, + env=self._build_command_environment(), + ) + + self._configure_postgresql(data_dir) + self._ensure_directory_owner(data_dir, create=False) + return data_dir + + def _ensure_existing_instance_stopped(self) -> None: + """Stop any lingering PostgreSQL instance using this data directory.""" + + if not self._data_dir_path.exists(): + return + + pid_file = self._data_dir_path / "postmaster.pid" + + if not pid_file.exists(): + return + + pg_ctl_path = PostgreSQLBinaries.get_binary_path("pg_ctl", self.version) + + try: + self._run_command( + [str(pg_ctl_path), "-w", "-D", str(self._data_dir_path), "stop"], + capture_output=True, + env=self._build_command_environment(), + ) + except subprocess.CalledProcessError: + try: + pid_file.unlink(missing_ok=True) + except OSError: + pass + + def _configure_postgresql(self, data_dir: Path) -> None: + """Configure PostgreSQL for persistent local usage.""" + + config_file = data_dir / "postgresql.conf" + config_additions = "\n".join( + [ + "# TinyPG persistent database configuration", + f"unix_socket_directories = '{self._temp_dir}'", + "listen_addresses = 'localhost'", + ] + ) + + with open(config_file, "a") as f: + f.write(config_additions + "\n") diff --git a/tests/test_persistent.py b/tests/test_persistent.py new file mode 100644 index 0000000..bc84672 --- /dev/null +++ b/tests/test_persistent.py @@ -0,0 +1,43 @@ +"""Tests for the PersistentDB helper.""" + +import uuid + +import psycopg2 + +import tinypg + + +def test_persistent_database(tmp_path): + """PersistentDB should reuse the same data directory across restarts.""" + + name = f"test-{uuid.uuid4().hex}" + base_dir = tmp_path / "persistent" + + db = tinypg.PersistentDB(name=name, base_dir=base_dir) + + try: + uri = db.start() + conn = psycopg2.connect(uri) + with conn.cursor() as cur: + cur.execute( + "CREATE TABLE IF NOT EXISTS persistent_test (id SERIAL PRIMARY KEY, value TEXT)" + ) + cur.execute("INSERT INTO persistent_test (value) VALUES ('hello')") + conn.commit() + conn.close() + finally: + db.stop() + + db_reuse = tinypg.PersistentDB(name=name, base_dir=base_dir) + + try: + uri = db_reuse.start() + conn = psycopg2.connect(uri) + with conn.cursor() as cur: + cur.execute("SELECT COUNT(*) FROM persistent_test") + (count,) = cur.fetchone() + conn.close() + finally: + db_reuse.stop() + + assert count == 1 From 736a7ee2111c96611fd0228701b3d761ee6da860 Mon Sep 17 00:00:00 2001 From: Kevin Wenner Date: Sat, 27 Sep 2025 14:13:52 +0000 Subject: [PATCH 2/3] detect musl get around osx unix socket issue --- src/tinypg/binaries.py | 32 +++++++++++++++++++++++++++++++- src/tinypg/core.py | 20 ++++++++++++++++---- 2 files changed, 47 insertions(+), 5 deletions(-) diff --git a/src/tinypg/binaries.py b/src/tinypg/binaries.py index 2148ced..6d88e06 100644 --- a/src/tinypg/binaries.py +++ b/src/tinypg/binaries.py @@ -80,7 +80,37 @@ def _detect_arch(self) -> str: def _get_platform_string(self) -> str: """Get the platform string for binary downloads.""" - return f"{self.os_name}-{self.arch}" + platform = f"{self.os_name}-{self.arch}" + + # TODO not returning alpine yet for testing + # if False and self.os_name == "linux" and self._using_musl: + # return f"{platform}-alpine" + + return platform + + def _detect_musl(self) -> bool: + """Return ``True`` when running on a musl-based libc such as Alpine.""" + if self.os_name != "linux": + return False + + libc, _ = platform.libc_ver() + if libc and libc.lower().startswith("musl"): + return True + + if Path("/etc/alpine-release").exists(): + return True + + try: + result = subprocess.run( + ["ldd", "--version"], capture_output=True, text=True, check=False + ) + combined_output = f"{result.stdout}\n{result.stderr}".lower() + if "musl" in combined_output: + return True + except FileNotFoundError: + pass + + return False @classmethod def ensure_version(cls, version: str) -> Path: diff --git a/src/tinypg/core.py b/src/tinypg/core.py index eec49d5..2119eed 100644 --- a/src/tinypg/core.py +++ b/src/tinypg/core.py @@ -3,6 +3,7 @@ import asyncio import getpass import grp +import hashlib import os import pwd import shutil @@ -694,6 +695,15 @@ def __init__( self._version_dir = version_dir self._data_dir_path = data_dir_path + socket_root = TinyPGConfig.get_temp_dir() / "tinypg" / "sockets" + socket_key = f"{name}-{version}-{persistent_root}" + socket_id = hashlib.sha256(socket_key.encode("utf-8")).hexdigest()[ + :8 + ] + self._socket_dir = socket_root / socket_id + + self._ensure_directory_owner(self._socket_dir, mode=0o755) + self._ensure_runtime_access(self._socket_dir) self._ensure_runtime_access(self._version_dir) # Ensure directories are owned by the runtime PostgreSQL user so that # subsequent invocations can reuse the data safely. @@ -703,13 +713,14 @@ def __init__( self._ensure_directory_owner(self._data_dir_path, create=False) self._data_dir = self._data_dir_path - self._temp_dir = str(self._version_dir) + self._temp_dir = str(self._socket_dir) def start(self) -> str: """Start the persistent database and return the connection URI.""" # Ensure socket directory is always set to the persistent location. - self._temp_dir = str(self._version_dir) + self._ensure_directory_owner(self._socket_dir, mode=0o755) + self._temp_dir = str(self._socket_dir) self._ensure_existing_instance_stopped() return super().start() @@ -718,7 +729,8 @@ def _initialize_database(self) -> Path: data_dir = self._data_dir_path self._ensure_directory_owner(self._version_dir, mode=0o755) - self._temp_dir = str(self._version_dir) + self._ensure_directory_owner(self._socket_dir, mode=0o755) + self._temp_dir = str(self._socket_dir) pg_version_file = data_dir / "PG_VERSION" if pg_version_file.exists(): @@ -783,7 +795,7 @@ def _configure_postgresql(self, data_dir: Path) -> None: config_additions = "\n".join( [ "# TinyPG persistent database configuration", - f"unix_socket_directories = '{self._temp_dir}'", + f"unix_socket_directories = '{self._socket_dir}'", "listen_addresses = 'localhost'", ] ) From bb1f4267cd290211c22742d6a6d574b116faeb30 Mon Sep 17 00:00:00 2001 From: Kevin Wenner Date: Sat, 27 Sep 2025 14:15:03 +0000 Subject: [PATCH 3/3] format --- src/tinypg/core.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/tinypg/core.py b/src/tinypg/core.py index 2119eed..5a10bf5 100644 --- a/src/tinypg/core.py +++ b/src/tinypg/core.py @@ -697,9 +697,7 @@ def __init__( socket_root = TinyPGConfig.get_temp_dir() / "tinypg" / "sockets" socket_key = f"{name}-{version}-{persistent_root}" - socket_id = hashlib.sha256(socket_key.encode("utf-8")).hexdigest()[ - :8 - ] + socket_id = hashlib.sha256(socket_key.encode("utf-8")).hexdigest()[:8] self._socket_dir = socket_root / socket_id self._ensure_directory_owner(self._socket_dir, mode=0o755)