diff --git a/.gitignore b/.gitignore index 7c011af..5498a13 100644 --- a/.gitignore +++ b/.gitignore @@ -174,6 +174,7 @@ cython_debug/ .pypirc /data/plugins/ +/data/box/ /debug/ uv.lock src/.DS_Store diff --git a/pyproject.toml b/pyproject.toml index 0631261..b16c25c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "langbot-plugin" -version = "0.3.7" +version = "0.4.0-beta.1" description = "This package contains the SDK, CLI for building plugins for LangBot, plus the runtime for hosting LangBot plugins" readme = "README.md" authors = [ @@ -9,7 +9,9 @@ authors = [ requires-python = ">=3.10" dependencies = [ "aiofiles>=24.1.0", + "aiohttp>=3.9.0", "dotenv>=0.9.9", + "e2b>=2.15", "httpx>=0.28.1", "jinja2>=3.1.6", "pip>=25.2", diff --git a/src/langbot_plugin/box/__init__.py b/src/langbot_plugin/box/__init__.py new file mode 100644 index 0000000..5bd86a4 --- /dev/null +++ b/src/langbot_plugin/box/__init__.py @@ -0,0 +1,5 @@ +"""LangBot Box runtime package.""" + +from .client import BoxRuntimeClient, ActionRPCBoxClient + +__all__ = ['BoxRuntimeClient', 'ActionRPCBoxClient'] diff --git a/src/langbot_plugin/box/actions.py b/src/langbot_plugin/box/actions.py new file mode 100644 index 0000000..fea4da6 --- /dev/null +++ b/src/langbot_plugin/box/actions.py @@ -0,0 +1,34 @@ +"""Box-specific action types for the action RPC protocol.""" + +from __future__ import annotations + +from langbot_plugin.entities.io.actions.enums import ActionType + + +class LangBotToBoxAction(ActionType): + """Actions sent from LangBot to the Box runtime.""" + + INIT = "box_init" # Initialize with full box config (highest priority) + HEALTH = "box_health" + STATUS = "box_status" + EXEC = "box_exec" + CREATE_SESSION = "box_create_session" + GET_SESSION = "box_get_session" + GET_SESSIONS = "box_get_sessions" + DELETE_SESSION = "box_delete_session" + START_MANAGED_PROCESS = "box_start_managed_process" + GET_MANAGED_PROCESS = "box_get_managed_process" + STOP_MANAGED_PROCESS = "box_stop_managed_process" + GET_BACKEND_INFO = "box_get_backend_info" + LIST_SKILLS = "box_list_skills" + GET_SKILL = "box_get_skill" + CREATE_SKILL = "box_create_skill" + UPDATE_SKILL = "box_update_skill" + DELETE_SKILL = "box_delete_skill" + SCAN_SKILL_DIRECTORY = "box_scan_skill_directory" + LIST_SKILL_FILES = "box_list_skill_files" + READ_SKILL_FILE = "box_read_skill_file" + WRITE_SKILL_FILE = "box_write_skill_file" + PREVIEW_SKILL_ZIP = "box_preview_skill_zip" + INSTALL_SKILL_ZIP = "box_install_skill_zip" + SHUTDOWN = "box_shutdown" diff --git a/src/langbot_plugin/box/backend.py b/src/langbot_plugin/box/backend.py new file mode 100644 index 0000000..37ffbe3 --- /dev/null +++ b/src/langbot_plugin/box/backend.py @@ -0,0 +1,411 @@ +from __future__ import annotations + +import abc +import asyncio +import dataclasses +import datetime as dt +import logging +import re +import shlex +import shutil +import uuid + +from .errors import BoxError +from .models import ( + BoxExecutionResult, + BoxExecutionStatus, + BoxHostMountMode, + BoxNetworkMode, + BoxSessionInfo, + BoxSpec, +) +from .security import validate_sandbox_security + +# Hard cap on raw subprocess output to prevent unbounded memory usage. +# Container timeout already bounds duration, but fast commands can still +# produce large output within the time limit. After this many bytes the +# remaining output is discarded before decoding. +_MAX_RAW_OUTPUT_BYTES = 1_048_576 # 1 MB per stream + + +@dataclasses.dataclass(slots=True) +class _CommandResult: + return_code: int + stdout: str + stderr: str + timed_out: bool = False + + +class BaseSandboxBackend(abc.ABC): + name: str + instance_id: str = '' + + def __init__(self, logger: logging.Logger): + self.logger = logger + + async def initialize(self): + return None + + @abc.abstractmethod + async def is_available(self) -> bool: + pass + + @abc.abstractmethod + async def start_session(self, spec: BoxSpec) -> BoxSessionInfo: + pass + + @abc.abstractmethod + async def exec(self, session: BoxSessionInfo, spec: BoxSpec) -> BoxExecutionResult: + pass + + @abc.abstractmethod + async def stop_session(self, session: BoxSessionInfo): + pass + + async def is_session_alive(self, session: BoxSessionInfo) -> bool: + return True + + async def start_managed_process(self, session: BoxSessionInfo, spec): + raise BoxError(f'{self.name} backend does not support managed processes') + + async def cleanup_orphaned_containers(self, current_instance_id: str = ''): + """Remove lingering containers from previous runs. No-op by default.""" + pass + + +class CLISandboxBackend(BaseSandboxBackend): + command: str + + def __init__(self, logger: logging.Logger, command: str, backend_name: str): + super().__init__(logger) + self.command = command + self.name = backend_name + + async def is_available(self) -> bool: + if shutil.which(self.command) is None: + return False + + result = await self._run_command([self.command, 'info'], timeout_sec=5, check=False) + return result.return_code == 0 and not result.timed_out + + async def start_session(self, spec: BoxSpec) -> BoxSessionInfo: + validate_sandbox_security(spec) + + now = dt.datetime.now(dt.timezone.utc) + container_name = self._build_container_name(spec.session_id) + + args = [ + self.command, + 'run', + '-d', + ] + + if not spec.persistent: + args.append('--rm') + + args.extend([ + '--name', + container_name, + '--label', + 'langbot.box=true', + '--label', + f'langbot.session_id={spec.session_id}', + '--label', + f'langbot.box.instance_id={self.instance_id}', + ]) + + if spec.network == BoxNetworkMode.OFF: + args.extend(['--network', 'none']) + + # Resource limits + args.extend(['--cpus', str(spec.cpus)]) + args.extend(['--memory', f'{spec.memory_mb}m']) + args.extend(['--pids-limit', str(spec.pids_limit)]) + + if spec.read_only_rootfs: + args.append('--read-only') + args.extend(['--tmpfs', '/tmp:size=64m']) + + if spec.host_path is not None and spec.host_path_mode != BoxHostMountMode.NONE: + mount_spec = f'{spec.host_path}:{spec.mount_path}:{spec.host_path_mode.value}' + args.extend(['-v', mount_spec]) + + for mount in spec.extra_mounts: + if mount.mode != BoxHostMountMode.NONE: + args.extend(['-v', f'{mount.host_path}:{mount.mount_path}:{mount.mode.value}']) + + args.extend([spec.image, 'sh', '-lc', 'while true; do sleep 3600; done']) + + self.logger.info( + f'LangBot Box backend start_session: backend={self.name} ' + f'session_id={spec.session_id} container_name={container_name} ' + f'image={spec.image} network={spec.network.value} ' + f'host_path={spec.host_path} host_path_mode={spec.host_path_mode.value} mount_path={spec.mount_path} ' + f'cpus={spec.cpus} memory_mb={spec.memory_mb} pids_limit={spec.pids_limit} ' + f'read_only_rootfs={spec.read_only_rootfs} workspace_quota_mb={spec.workspace_quota_mb}' + ) + + await self._run_command(args, timeout_sec=30, check=True) + + return BoxSessionInfo( + session_id=spec.session_id, + backend_name=self.name, + backend_session_id=container_name, + image=spec.image, + network=spec.network, + host_path=spec.host_path, + host_path_mode=spec.host_path_mode, + mount_path=spec.mount_path, + persistent=spec.persistent, + cpus=spec.cpus, + memory_mb=spec.memory_mb, + pids_limit=spec.pids_limit, + read_only_rootfs=spec.read_only_rootfs, + workspace_quota_mb=spec.workspace_quota_mb, + created_at=now, + last_used_at=now, + ) + + async def exec(self, session: BoxSessionInfo, spec: BoxSpec) -> BoxExecutionResult: + start = dt.datetime.now(dt.timezone.utc) + args = [self.command, 'exec'] + + for key, value in spec.env.items(): + args.extend(['-e', f'{key}={value}']) + + args.extend( + [ + session.backend_session_id, + 'sh', + '-lc', + self._build_exec_command(spec.workdir, spec.cmd), + ] + ) + + cmd_preview = spec.cmd.strip() + if len(cmd_preview) > 400: + cmd_preview = f'{cmd_preview[:397]}...' + self.logger.info( + f'LangBot Box backend exec: backend={self.name} ' + f'session_id={session.session_id} container_name={session.backend_session_id} ' + f'workdir={spec.workdir} timeout_sec={spec.timeout_sec} ' + f'env_keys={sorted(spec.env.keys())} cmd={cmd_preview}' + ) + + result = await self._run_command(args, timeout_sec=spec.timeout_sec, check=False) + duration_ms = int((dt.datetime.now(dt.timezone.utc) - start).total_seconds() * 1000) + + if result.timed_out: + return BoxExecutionResult( + session_id=session.session_id, + backend_name=self.name, + status=BoxExecutionStatus.TIMED_OUT, + exit_code=None, + stdout=result.stdout, + stderr=result.stderr or f'Command timed out after {spec.timeout_sec} seconds.', + duration_ms=duration_ms, + ) + + return BoxExecutionResult( + session_id=session.session_id, + backend_name=self.name, + status=BoxExecutionStatus.COMPLETED, + exit_code=result.return_code, + stdout=result.stdout, + stderr=result.stderr, + duration_ms=duration_ms, + ) + + async def stop_session(self, session: BoxSessionInfo): + self.logger.info( + f'LangBot Box backend stop_session: backend={self.name} ' + f'session_id={session.session_id} container_name={session.backend_session_id}' + ) + await self._run_command( + [self.command, 'rm', '-f', session.backend_session_id], + timeout_sec=20, + check=False, + ) + + async def is_session_alive(self, session: BoxSessionInfo) -> bool: + result = await self._run_command( + [ + self.command, + 'inspect', + '-f', + '{{.State.Running}}', + session.backend_session_id, + ], + timeout_sec=5, + check=False, + ) + return result.return_code == 0 and result.stdout.strip().lower() == 'true' + + async def cleanup_orphaned_containers(self, current_instance_id: str = ''): + """Remove langbot.box containers from previous instances. + + Only removes containers whose ``langbot.box.instance_id`` label does + NOT match *current_instance_id*. Containers without the label (from + older versions) are also removed. + """ + result = await self._run_command( + [ + self.command, + 'ps', + '-a', + '--filter', + 'label=langbot.box=true', + '--format', + '{{.ID}}\t{{.Label "langbot.box.instance_id"}}', + ], + timeout_sec=10, + check=False, + ) + if result.return_code != 0 or not result.stdout.strip(): + return + orphan_ids = [] + for line in result.stdout.strip().split('\n'): + line = line.strip() + if not line: + continue + parts = line.split('\t', 1) + cid = parts[0].strip() + label_instance = parts[1].strip() if len(parts) > 1 else '' + if label_instance != current_instance_id: + orphan_ids.append(cid) + if not orphan_ids: + return + for cid in orphan_ids: + self.logger.info(f'Cleaning up orphaned Box container: {cid}') + await self._run_command( + [self.command, 'rm', '-f', *orphan_ids], + timeout_sec=30, + check=False, + ) + + async def start_managed_process(self, session: BoxSessionInfo, spec) -> asyncio.subprocess.Process: + args = [self.command, 'exec', '-i'] + + for key, value in spec.env.items(): + args.extend(['-e', f'{key}={value}']) + + args.extend( + [ + session.backend_session_id, + 'sh', + '-lc', + self._build_spawn_command(spec.cwd, spec.command, spec.args), + ] + ) + + self.logger.info( + f'LangBot Box backend start_managed_process: backend={self.name} ' + f'session_id={session.session_id} container_name={session.backend_session_id} ' + f'cwd={spec.cwd} env_keys={sorted(spec.env.keys())} command={spec.command} args={spec.args}' + ) + + return await asyncio.create_subprocess_exec( + *args, + stdin=asyncio.subprocess.PIPE, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + + def _build_container_name(self, session_id: str) -> str: + normalized = re.sub(r'[^a-zA-Z0-9_.-]+', '-', session_id).strip('-').lower() or 'session' + suffix = uuid.uuid4().hex[:8] + return f'langbot-box-{normalized[:32]}-{suffix}' + + def _build_exec_command(self, workdir: str, cmd: str) -> str: + quoted_workdir = shlex.quote(workdir) + return f'mkdir -p {quoted_workdir} && cd {quoted_workdir} && {cmd}' + + def _build_spawn_command(self, cwd: str, command: str, args: list[str]) -> str: + quoted_cwd = shlex.quote(cwd) + command_parts = [shlex.quote(command), *[shlex.quote(arg) for arg in args]] + return f'mkdir -p {quoted_cwd} && cd {quoted_cwd} && exec {" ".join(command_parts)}' + + async def _run_command( + self, + args: list[str], + timeout_sec: int, + check: bool, + ) -> _CommandResult: + process = await asyncio.create_subprocess_exec( + *args, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + stdout_task = asyncio.create_task(self._read_stream(process.stdout)) + stderr_task = asyncio.create_task(self._read_stream(process.stderr)) + + timed_out = False + try: + await asyncio.wait_for(process.wait(), timeout=timeout_sec) + except asyncio.TimeoutError: + process.kill() + timed_out = True + await process.wait() + + stdout_bytes, stdout_total = await stdout_task + stderr_bytes, stderr_total = await stderr_task + + if timed_out: + return _CommandResult( + return_code=-1, + stdout=self._clip_captured_bytes(stdout_bytes, stdout_total), + stderr=self._clip_captured_bytes(stderr_bytes, stderr_total), + timed_out=True, + ) + + stdout = self._clip_captured_bytes(stdout_bytes, stdout_total) + stderr = self._clip_captured_bytes(stderr_bytes, stderr_total) + + if check and process.returncode != 0: + raise BoxError(self._format_cli_error(stderr or stdout or 'unknown backend error')) + + return _CommandResult( + return_code=process.returncode, + stdout=stdout, + stderr=stderr, + timed_out=False, + ) + + @staticmethod + def _clip_captured_bytes(data: bytes, total_size: int, limit: int = _MAX_RAW_OUTPUT_BYTES) -> str: + text = data.decode('utf-8', errors='replace').strip() + if total_size > limit: + text += f'\n... [raw output clipped at {limit} bytes, {total_size - limit} bytes discarded]' + return text + + @staticmethod + async def _read_stream( + stream: asyncio.StreamReader | None, + limit: int = _MAX_RAW_OUTPUT_BYTES, + ) -> tuple[bytes, int]: + if stream is None: + return b'', 0 + + chunks = bytearray() + total_size = 0 + while True: + chunk = await stream.read(65536) + if not chunk: + break + total_size += len(chunk) + remaining = limit - len(chunks) + if remaining > 0: + chunks.extend(chunk[:remaining]) + + return bytes(chunks), total_size + + def _format_cli_error(self, message: str) -> str: + message = ' '.join(message.split()) + if len(message) > 300: + message = f'{message[:297]}...' + return f'{self.name} backend error: {message}' + + +class DockerBackend(CLISandboxBackend): + def __init__(self, logger: logging.Logger): + super().__init__(logger=logger, command='docker', backend_name='docker') diff --git a/src/langbot_plugin/box/client.py b/src/langbot_plugin/box/client.py new file mode 100644 index 0000000..dc3f78d --- /dev/null +++ b/src/langbot_plugin/box/client.py @@ -0,0 +1,377 @@ +"""BoxRuntimeClient abstraction for Box Runtime access.""" + +from __future__ import annotations + +import abc +import logging +from typing import Any + +from langbot_plugin.runtime.io.handler import Handler + +from .actions import LangBotToBoxAction +from .errors import BoxError, BoxRuntimeUnavailableError +from .models import ( + BoxExecutionResult, + BoxExecutionStatus, + BoxManagedProcessInfo, + BoxManagedProcessSpec, + BoxSpec, +) + + +class BoxRuntimeClient(abc.ABC): + """Abstract interface that BoxService uses to talk to a Box Runtime.""" + + @abc.abstractmethod + async def initialize(self) -> None: ... + + @abc.abstractmethod + async def execute(self, spec: BoxSpec) -> BoxExecutionResult: ... + + @abc.abstractmethod + async def shutdown(self) -> None: ... + + @abc.abstractmethod + async def get_status(self) -> dict: ... + + @abc.abstractmethod + async def get_sessions(self) -> list[dict]: ... + + @abc.abstractmethod + async def get_backend_info(self) -> dict: ... + + @abc.abstractmethod + async def delete_session(self, session_id: str) -> None: ... + + @abc.abstractmethod + async def create_session(self, spec: BoxSpec) -> dict: ... + + @abc.abstractmethod + async def start_managed_process( + self, session_id: str, spec: BoxManagedProcessSpec + ) -> BoxManagedProcessInfo: ... + + @abc.abstractmethod + async def get_managed_process( + self, session_id: str, process_id: str = "default" + ) -> BoxManagedProcessInfo: ... + + @abc.abstractmethod + async def stop_managed_process( + self, session_id: str, process_id: str = "default" + ) -> None: ... + + @abc.abstractmethod + async def get_session(self, session_id: str) -> dict: ... + + @abc.abstractmethod + async def init(self, config: dict) -> None: ... + + async def list_skills(self) -> list[dict]: + raise NotImplementedError + + async def get_skill(self, name: str) -> dict | None: + raise NotImplementedError + + async def create_skill(self, skill: dict) -> dict: + raise NotImplementedError + + async def update_skill(self, name: str, skill: dict) -> dict: + raise NotImplementedError + + async def delete_skill(self, name: str) -> None: + raise NotImplementedError + + async def scan_skill_directory(self, path: str) -> dict: + raise NotImplementedError + + async def list_skill_files( + self, + name: str, + path: str = ".", + include_hidden: bool = False, + max_entries: int = 200, + ) -> dict: + raise NotImplementedError + + async def read_skill_file(self, name: str, path: str) -> dict: + raise NotImplementedError + + async def write_skill_file(self, name: str, path: str, content: str) -> dict: + raise NotImplementedError + + async def preview_skill_zip( + self, + file_bytes: bytes, + filename: str, + source_subdir: str = "", + target_suffix: str = "upload", + ) -> list[dict]: + raise NotImplementedError + + async def install_skill_zip( + self, + file_bytes: bytes, + filename: str, + source_paths: list[str] | None = None, + source_path: str = "", + source_subdir: str = "", + target_suffix: str = "upload", + ) -> list[dict]: + raise NotImplementedError + + +def _translate_action_error(exc: Exception) -> BoxError: + """Convert an ActionCallError message back into the appropriate BoxError subclass.""" + from .errors import ( + BoxBackendUnavailableError, + BoxManagedProcessConflictError, + BoxManagedProcessNotFoundError, + BoxSessionConflictError, + BoxSessionNotFoundError, + BoxValidationError, + ) + + msg = str(exc) + _ERROR_PREFIX_MAP: list[tuple[str, type[BoxError]]] = [ + ("BoxValidationError:", BoxValidationError), + ("BoxSessionNotFoundError:", BoxSessionNotFoundError), + ("BoxSessionConflictError:", BoxSessionConflictError), + ("BoxManagedProcessNotFoundError:", BoxManagedProcessNotFoundError), + ("BoxManagedProcessConflictError:", BoxManagedProcessConflictError), + ("BoxBackendUnavailableError:", BoxBackendUnavailableError), + ] + for prefix, cls in _ERROR_PREFIX_MAP: + if prefix in msg: + return cls(msg) + return BoxError(msg) + + +class ActionRPCBoxClient(BoxRuntimeClient): + """Client that talks to BoxRuntime via the action RPC protocol.""" + + def __init__(self, logger: logging.Logger): + self._logger = logger + self._handler: Handler | None = None + + @property + def handler(self) -> Handler: + if self._handler is None: + raise BoxRuntimeUnavailableError("box runtime not connected") + return self._handler + + def set_handler(self, handler: Handler) -> None: + self._handler = handler + + async def _call( + self, action: LangBotToBoxAction, data: dict[str, Any], timeout: float = 15.0 + ) -> dict[str, Any]: + try: + return await self.handler.call_action(action, data, timeout=timeout) + except BoxRuntimeUnavailableError: + raise + except Exception as exc: + raise _translate_action_error(exc) from exc + + async def initialize(self) -> None: + try: + await self._call(LangBotToBoxAction.HEALTH, {}) + self._logger.info("LangBot Box runtime connected via action RPC.") + except Exception as exc: + raise BoxRuntimeUnavailableError(f"box runtime unavailable: {exc}") from exc + + async def execute(self, spec: BoxSpec) -> BoxExecutionResult: + data = await self._call( + LangBotToBoxAction.EXEC, spec.model_dump(mode="json"), timeout=300.0 + ) + return BoxExecutionResult( + session_id=data["session_id"], + backend_name=data["backend_name"], + status=BoxExecutionStatus(data["status"]), + exit_code=data.get("exit_code"), + stdout=data.get("stdout", ""), + stderr=data.get("stderr", ""), + duration_ms=data["duration_ms"], + ) + + async def shutdown(self) -> None: + if self._handler is not None: + try: + await self._call(LangBotToBoxAction.SHUTDOWN, {}) + except Exception: + pass + self._handler = None + + async def get_status(self) -> dict: + return await self._call(LangBotToBoxAction.STATUS, {}) + + async def get_sessions(self) -> list[dict]: + data = await self._call(LangBotToBoxAction.GET_SESSIONS, {}) + return data["sessions"] + + async def get_session(self, session_id: str) -> dict: + return await self._call( + LangBotToBoxAction.GET_SESSION, {"session_id": session_id} + ) + + async def get_backend_info(self) -> dict: + return await self._call(LangBotToBoxAction.GET_BACKEND_INFO, {}) + + async def delete_session(self, session_id: str) -> None: + await self._call( + LangBotToBoxAction.DELETE_SESSION, {"session_id": session_id}, timeout=30.0 + ) + + async def create_session(self, spec: BoxSpec) -> dict: + return await self._call( + LangBotToBoxAction.CREATE_SESSION, spec.model_dump(mode="json") + ) + + async def start_managed_process( + self, session_id: str, spec: BoxManagedProcessSpec + ) -> BoxManagedProcessInfo: + data = await self._call( + LangBotToBoxAction.START_MANAGED_PROCESS, + {"session_id": session_id, "spec": spec.model_dump(mode="json")}, + ) + return BoxManagedProcessInfo.model_validate(data) + + async def get_managed_process( + self, session_id: str, process_id: str = "default" + ) -> BoxManagedProcessInfo: + data = await self._call( + LangBotToBoxAction.GET_MANAGED_PROCESS, + { + "session_id": session_id, + "process_id": process_id, + }, + ) + return BoxManagedProcessInfo.model_validate(data) + + async def stop_managed_process( + self, session_id: str, process_id: str = "default" + ) -> None: + await self._call( + LangBotToBoxAction.STOP_MANAGED_PROCESS, + { + "session_id": session_id, + "process_id": process_id, + }, + timeout=30.0, + ) + + def get_managed_process_websocket_url( + self, session_id: str, ws_relay_base_url: str, process_id: str = "default" + ) -> str: + base = ws_relay_base_url + if base.startswith("https://"): + scheme = "wss://" + suffix = base[len("https://") :] + elif base.startswith("http://"): + scheme = "ws://" + suffix = base[len("http://") :] + else: + scheme = "ws://" + suffix = base + return ( + f"{scheme}{suffix}/v1/sessions/{session_id}/managed-process/{process_id}/ws" + ) + + async def init(self, config: dict) -> None: + await self._call(LangBotToBoxAction.INIT, config) + + async def list_skills(self) -> list[dict]: + data = await self._call(LangBotToBoxAction.LIST_SKILLS, {}) + return data["skills"] + + async def get_skill(self, name: str) -> dict | None: + data = await self._call(LangBotToBoxAction.GET_SKILL, {"name": name}) + return data.get("skill") + + async def create_skill(self, skill: dict) -> dict: + data = await self._call(LangBotToBoxAction.CREATE_SKILL, {"skill": skill}) + return data["skill"] + + async def update_skill(self, name: str, skill: dict) -> dict: + data = await self._call( + LangBotToBoxAction.UPDATE_SKILL, {"name": name, "skill": skill} + ) + return data["skill"] + + async def delete_skill(self, name: str) -> None: + await self._call(LangBotToBoxAction.DELETE_SKILL, {"name": name}) + + async def scan_skill_directory(self, path: str) -> dict: + return await self._call(LangBotToBoxAction.SCAN_SKILL_DIRECTORY, {"path": path}) + + async def list_skill_files( + self, + name: str, + path: str = ".", + include_hidden: bool = False, + max_entries: int = 200, + ) -> dict: + return await self._call( + LangBotToBoxAction.LIST_SKILL_FILES, + { + "name": name, + "path": path, + "include_hidden": include_hidden, + "max_entries": max_entries, + }, + ) + + async def read_skill_file(self, name: str, path: str) -> dict: + return await self._call( + LangBotToBoxAction.READ_SKILL_FILE, {"name": name, "path": path} + ) + + async def write_skill_file(self, name: str, path: str, content: str) -> dict: + return await self._call( + LangBotToBoxAction.WRITE_SKILL_FILE, + {"name": name, "path": path, "content": content}, + ) + + async def preview_skill_zip( + self, + file_bytes: bytes, + filename: str, + source_subdir: str = "", + target_suffix: str = "upload", + ) -> list[dict]: + file_key = await self.handler.send_file(file_bytes, "zip") + data = await self._call( + LangBotToBoxAction.PREVIEW_SKILL_ZIP, + { + "file_key": file_key, + "filename": filename, + "source_subdir": source_subdir, + "target_suffix": target_suffix, + }, + timeout=60.0, + ) + return data["skills"] + + async def install_skill_zip( + self, + file_bytes: bytes, + filename: str, + source_paths: list[str] | None = None, + source_path: str = "", + source_subdir: str = "", + target_suffix: str = "upload", + ) -> list[dict]: + file_key = await self.handler.send_file(file_bytes, "zip") + data = await self._call( + LangBotToBoxAction.INSTALL_SKILL_ZIP, + { + "file_key": file_key, + "filename": filename, + "source_paths": source_paths or [], + "source_path": source_path, + "source_subdir": source_subdir, + "target_suffix": target_suffix, + }, + timeout=120.0, + ) + return data["skills"] diff --git a/src/langbot_plugin/box/e2b_backend.py b/src/langbot_plugin/box/e2b_backend.py new file mode 100644 index 0000000..46ee031 --- /dev/null +++ b/src/langbot_plugin/box/e2b_backend.py @@ -0,0 +1,429 @@ +from __future__ import annotations + +import datetime as dt +import json +import logging +import os +import posixpath +import shlex + +from .backend import BaseSandboxBackend, _MAX_RAW_OUTPUT_BYTES +from .errors import BoxError +from .models import ( + BoxExecutionResult, + BoxExecutionStatus, + BoxHostMountMode, + BoxNetworkMode, + BoxSessionInfo, + BoxSpec, +) +from .security import validate_sandbox_security + +# E2B sandbox uses /home/user as the default writable directory +# We map /workspace to /home/user/workspace for compatibility +E2B_DEFAULT_WORKDIR = '/home/user' +E2B_WORKSPACE_DIR = '/home/user/workspace' + +# Lazy imports for e2b - only imported when actually needed +_e2b_available: bool | None = None +_AsyncSandbox = None +_CommandResult = None + + +def _check_e2b_available(force: bool = False) -> bool: + """Check if e2b package is available (cached result). + + Args: + force: If True, re-check even if cached result exists. + """ + global _e2b_available, _AsyncSandbox, _CommandResult + if _e2b_available is not None and not force: + return _e2b_available + + try: + from e2b import AsyncSandbox, CommandResult + + _AsyncSandbox = AsyncSandbox + _CommandResult = CommandResult + _e2b_available = True + except ImportError: + _e2b_available = False + + return _e2b_available + + +def _reset_e2b_cache() -> None: + """Reset the e2b availability cache, forcing re-check on next call.""" + global _e2b_available, _AsyncSandbox, _CommandResult + _e2b_available = None + _AsyncSandbox = None + _CommandResult = None + + +def _adapt_path_for_e2b(path: str) -> str: + """Adapt paths for E2B sandbox environment. + + E2B sandbox doesn't have /workspace by default, so we map it to + /home/user/workspace which is writable. + """ + if path == '/workspace' or path.startswith('/workspace/'): + return path.replace('/workspace', E2B_WORKSPACE_DIR, 1) + return path + + +def _rewrite_command_paths_for_e2b(command: str) -> str: + """Rewrite LangBot's logical /workspace paths for E2B's real writable path.""" + return command.replace('/workspace', E2B_WORKSPACE_DIR) + + +class E2BSandboxBackend(BaseSandboxBackend): + """E2B/CubeSandbox sandbox backend. + + Supports both E2B cloud service and self-hosted CubeSandbox. + Configuration sources (priority from high to low): + 1. Environment variables: E2B_API_KEY, E2B_API_URL + 2. Configuration passed via configure() method (from LangBot config.yaml) + """ + + name = 'e2b' + + def __init__(self, logger: logging.Logger): + super().__init__(logger) + self._api_key: str | None = None + self._api_url: str | None = None + self._template: str | None = None + self._config_from_langbot: dict = {} + + def configure(self, config: dict) -> None: + """Apply configuration from LangBot config.yaml. + + Environment variables take precedence over config.yaml values. + """ + self._config_from_langbot = config + # Reset cache to force re-check if e2b package was installed later + _reset_e2b_cache() + + async def initialize(self): + """Load configuration from environment variables (priority) or config.yaml.""" + # Environment variables take precedence + self._api_key = os.getenv('E2B_API_KEY') or self._config_from_langbot.get('api_key') + self._api_url = os.getenv('E2B_API_URL') or self._config_from_langbot.get('api_url') + self._template = self._config_from_langbot.get('template') + + async def is_available(self) -> bool: + """Check if E2B backend is available. + + Returns True if: + 1. e2b package is installed + 2. E2B_API_KEY environment variable is set + """ + if not _check_e2b_available(): + self.logger.info('e2b package not installed') + return False + + if not self._api_key: + self.logger.info('E2B_API_KEY not set') + return False + + return True + + async def start_session(self, spec: BoxSpec) -> BoxSessionInfo: + """Create a new E2B sandbox session. + + Maps BoxSpec fields to AsyncSandbox.create() parameters: + - template: spec.image (E2B template ID) + - envs: spec.env + - timeout: sandbox lifetime timeout (not command timeout) + - metadata: CubeSandbox host-mount configuration + """ + validate_sandbox_security(spec) + + if not _check_e2b_available(): + raise BoxError('e2b package not installed') + + now = dt.datetime.now(dt.timezone.utc) + + # Adapt paths for E2B environment + workdir = _adapt_path_for_e2b(spec.workdir) + mount_path = _adapt_path_for_e2b(spec.mount_path) + + # Build create parameters + create_kwargs = {} + + # Template - use spec.image if provided, otherwise configured template, otherwise E2B default + if spec.image and spec.image != 'rockchin/langbot-sandbox:latest': + create_kwargs['template'] = spec.image + elif self._template: + create_kwargs['template'] = self._template + + # Environment variables + if spec.env: + create_kwargs['envs'] = spec.env + + # API key and domain (for CubeSandbox self-deployment) + if self._api_key: + create_kwargs['api_key'] = self._api_key + if self._api_url: + # E2B SDK uses 'domain' for self-hosted API URL + create_kwargs['domain'] = self._api_url + + # Build metadata for CubeSandbox host-mount + metadata = {} + if spec.host_path and spec.host_path_mode != BoxHostMountMode.NONE: + metadata['host-mount'] = json.dumps([{ + 'hostPath': spec.host_path, + 'mountPath': mount_path, + 'readOnly': spec.host_path_mode == BoxHostMountMode.READ_ONLY, + }]) + if metadata: + create_kwargs['metadata'] = metadata + + # Network mode - E2B uses allow_internet_access parameter + # Note: E2B SDK doesn't have this directly in create(), but CubeSandbox may support it + # For now, we rely on template configuration for network access + + self.logger.info( + f'LangBot Box backend start_session: backend=e2b ' + f'session_id={spec.session_id} ' + f'template={create_kwargs.get("template", "default")} ' + f'network={spec.network.value} ' + f'host_path={spec.host_path} host_path_mode={spec.host_path_mode.value} mount_path={mount_path} ' + f'env_keys={sorted(spec.env.keys())}' + ) + + try: + sandbox = await _AsyncSandbox.create(**create_kwargs) + except Exception as exc: + raise BoxError(f'Failed to create E2B sandbox: {exc}') + + return BoxSessionInfo( + session_id=spec.session_id, + backend_name=self.name, + backend_session_id=sandbox.sandbox_id, + image=spec.image, + network=spec.network, + host_path=spec.host_path, + host_path_mode=spec.host_path_mode, + # Keep the logical mount path in session metadata. The runtime + # compares future BoxSpec objects against this value when reusing + # sessions; storing the E2B-internal path here makes every later + # /workspace request look incompatible. + mount_path=spec.mount_path, + persistent=spec.persistent, + cpus=spec.cpus, + memory_mb=spec.memory_mb, + pids_limit=spec.pids_limit, + read_only_rootfs=spec.read_only_rootfs, + workspace_quota_mb=spec.workspace_quota_mb, + created_at=now, + last_used_at=now, + ) + + async def exec(self, session: BoxSessionInfo, spec: BoxSpec) -> BoxExecutionResult: + """Execute a command in the E2B sandbox. + + Reconnects to existing sandbox via AsyncSandbox.connect() and runs command. + """ + if not _check_e2b_available(): + raise BoxError('e2b package not installed') + + start = dt.datetime.now(dt.timezone.utc) + + # Connect kwargs + connect_kwargs = {} + if self._api_key: + connect_kwargs['api_key'] = self._api_key + if self._api_url: + connect_kwargs['domain'] = self._api_url + + # Adapt workdir and logical /workspace command paths for E2B. + workdir = _adapt_path_for_e2b(spec.workdir) + command = _rewrite_command_paths_for_e2b(spec.cmd) + + cmd_preview = spec.cmd.strip() + if len(cmd_preview) > 400: + cmd_preview = f'{cmd_preview[:397]}...' + self.logger.info( + f'LangBot Box backend exec: backend=e2b ' + f'session_id={session.session_id} sandbox_id={session.backend_session_id} ' + f'workdir={workdir} timeout_sec={spec.timeout_sec} ' + f'env_keys={sorted(spec.env.keys())} cmd={cmd_preview}' + ) + + try: + sandbox = await _AsyncSandbox.connect( + sandbox_id=session.backend_session_id, + **connect_kwargs + ) + except Exception as exc: + raise BoxError(f'Failed to connect to E2B sandbox: {exc}') + + await self._sync_mounts_to_e2b(sandbox, spec) + + # Run the command + # Note: E2B requires cwd to exist before running command. We create it + # as part of the command and then run from that directory. + run_kwargs = { + 'cmd': f'mkdir -p {shlex.quote(workdir)} && cd {shlex.quote(workdir)} && {command}', + 'timeout': spec.timeout_sec, + } + if spec.env: + run_kwargs['envs'] = spec.env + + try: + result = await sandbox.commands.run(**run_kwargs) + except Exception as exc: + # Check if it's a timeout + duration_ms = int((dt.datetime.now(dt.timezone.utc) - start).total_seconds() * 1000) + error_msg = str(exc) + if 'timeout' in error_msg.lower() or 'timed out' in error_msg.lower(): + return BoxExecutionResult( + session_id=session.session_id, + backend_name=self.name, + status=BoxExecutionStatus.TIMED_OUT, + exit_code=None, + stdout='', + stderr=f'Command timed out after {spec.timeout_sec} seconds.', + duration_ms=duration_ms, + ) + raise BoxError(f'E2B command execution failed: {exc}') + + await self._sync_mounts_from_e2b(sandbox, spec) + + duration_ms = int((dt.datetime.now(dt.timezone.utc) - start).total_seconds() * 1000) + + # Process output - apply truncation if needed + stdout = self._truncate_output(result.stdout or '') + stderr = self._truncate_output(result.stderr or '') + + return BoxExecutionResult( + session_id=session.session_id, + backend_name=self.name, + status=BoxExecutionStatus.COMPLETED, + exit_code=result.exit_code, + stdout=stdout, + stderr=stderr, + duration_ms=duration_ms, + ) + + async def _sync_mounts_to_e2b(self, sandbox, spec: BoxSpec) -> None: + """Best-effort upload of all logical mounts into public E2B.""" + if spec.host_path is not None and spec.host_path_mode != BoxHostMountMode.NONE: + await self._sync_host_tree_to_e2b( + sandbox, + host_root=spec.host_path, + remote_root=_adapt_path_for_e2b(spec.mount_path), + ) + + for mount in spec.extra_mounts: + if mount.mode == BoxHostMountMode.NONE: + continue + await self._sync_host_tree_to_e2b( + sandbox, + host_root=mount.host_path, + remote_root=_adapt_path_for_e2b(mount.mount_path), + ) + + async def _sync_mounts_from_e2b(self, sandbox, spec: BoxSpec) -> None: + """Best-effort download of writable E2B mounts into host paths.""" + if spec.host_path is not None and spec.host_path_mode == BoxHostMountMode.READ_WRITE: + await self._sync_e2b_tree_to_host( + sandbox, + remote_root=_adapt_path_for_e2b(spec.mount_path), + host_root=spec.host_path, + ) + + for mount in spec.extra_mounts: + if mount.mode != BoxHostMountMode.READ_WRITE: + continue + await self._sync_e2b_tree_to_host( + sandbox, + remote_root=_adapt_path_for_e2b(mount.mount_path), + host_root=mount.host_path, + ) + + async def _sync_host_tree_to_e2b(self, sandbox, *, host_root: str, remote_root: str) -> None: + """Best-effort sync for public E2B, which has no local bind mounts.""" + if not os.path.isdir(host_root): + return + + for root, dirs, files in os.walk(host_root): + dirs[:] = [d for d in dirs if d not in {'.git', '__pycache__', '.venv', 'node_modules'}] + rel_dir = os.path.relpath(root, host_root) + remote_dir = remote_root if rel_dir == '.' else posixpath.join(remote_root, rel_dir.replace(os.sep, '/')) + try: + await sandbox.commands.run(f'mkdir -p {shlex.quote(remote_dir)}', timeout=10) + except Exception as exc: + self.logger.debug(f'Failed to create E2B sync dir {remote_dir}: {exc}') + continue + + for filename in files: + host_file = os.path.join(root, filename) + try: + if os.path.getsize(host_file) > _MAX_RAW_OUTPUT_BYTES: + continue + with open(host_file, 'rb') as f: + data = f.read() + remote_file = posixpath.join(remote_dir, filename) + await sandbox.files.write(remote_file, data) + except Exception as exc: + self.logger.debug(f'Failed to sync host file to E2B {host_file}: {exc}') + + async def _sync_e2b_tree_to_host(self, sandbox, *, remote_root: str, host_root: str) -> None: + """Best-effort download of an E2B mount into the matching host path.""" + os.makedirs(host_root, exist_ok=True) + try: + entries = await sandbox.files.list(remote_root, depth=16) + except Exception as exc: + self.logger.debug(f'Failed to list E2B mount for sync {remote_root}: {exc}') + return + + for entry in entries: + remote_path = str(getattr(entry, 'path', '') or '') + if not remote_path or remote_path == remote_root or not remote_path.startswith(remote_root + '/'): + continue + rel_path = remote_path[len(remote_root) :].lstrip('/') + real_host_root = os.path.realpath(host_root) + host_path = os.path.realpath(os.path.join(real_host_root, *rel_path.split('/'))) + if not (host_path == real_host_root or host_path.startswith(real_host_root + os.sep)): + continue + + entry_type = getattr(getattr(entry, 'type', None), 'value', '') + try: + if entry_type == 'dir': + os.makedirs(host_path, exist_ok=True) + elif entry_type == 'file': + os.makedirs(os.path.dirname(host_path), exist_ok=True) + data = await sandbox.files.read(remote_path, format='bytes') + with open(host_path, 'wb') as f: + f.write(bytes(data)) + except Exception as exc: + self.logger.debug(f'Failed to sync E2B file to host {remote_path}: {exc}') + + async def stop_session(self, session: BoxSessionInfo): + """Kill the E2B sandbox.""" + self.logger.info( + f'LangBot Box backend stop_session: backend=e2b ' + f'session_id={session.session_id} sandbox_id={session.backend_session_id}' + ) + + if not _check_e2b_available(): + return # Nothing to do if package not available + + try: + await _AsyncSandbox.kill( + sandbox_id=session.backend_session_id, + api_key=self._api_key, + domain=self._api_url, + ) + except Exception as exc: + self.logger.warning(f'Failed to kill E2B sandbox: {exc}') + + def _truncate_output(self, output: str, limit: int = _MAX_RAW_OUTPUT_BYTES) -> str: + """Truncate output if exceeds the limit.""" + if len(output.encode('utf-8', errors='replace')) > limit: + # Truncate to approximately the limit + truncated = output[:limit] + truncated += f'\n... [output clipped at {limit} bytes]' + return truncated + return output diff --git a/src/langbot_plugin/box/errors.py b/src/langbot_plugin/box/errors.py new file mode 100644 index 0000000..ecdde7a --- /dev/null +++ b/src/langbot_plugin/box/errors.py @@ -0,0 +1,33 @@ +from __future__ import annotations + + +class BoxError(RuntimeError): + """Base error for LangBot Box failures.""" + + +class BoxValidationError(BoxError): + """Raised when exec tool arguments are invalid.""" + + +class BoxBackendUnavailableError(BoxError): + """Raised when no supported container backend is available.""" + + +class BoxRuntimeUnavailableError(BoxError): + """Raised when the standalone Box Runtime service is unavailable.""" + + +class BoxSessionConflictError(BoxError): + """Raised when an existing session cannot satisfy a new request.""" + + +class BoxSessionNotFoundError(BoxError): + """Raised when a referenced session does not exist.""" + + +class BoxManagedProcessConflictError(BoxError): + """Raised when a session already has an active managed process.""" + + +class BoxManagedProcessNotFoundError(BoxError): + """Raised when a referenced managed process does not exist.""" diff --git a/src/langbot_plugin/box/models.py b/src/langbot_plugin/box/models.py new file mode 100644 index 0000000..fa34e36 --- /dev/null +++ b/src/langbot_plugin/box/models.py @@ -0,0 +1,331 @@ +from __future__ import annotations + +import datetime as dt +import enum +import ntpath +import posixpath + +import pydantic + + +DEFAULT_BOX_IMAGE = 'rockchin/langbot-sandbox:latest' +DEFAULT_BOX_MOUNT_PATH = '/workspace' + + +class BoxNetworkMode(str, enum.Enum): + OFF = 'off' + ON = 'on' + + +class BoxExecutionStatus(str, enum.Enum): + COMPLETED = 'completed' + TIMED_OUT = 'timed_out' + + +class BoxHostMountMode(str, enum.Enum): + NONE = 'none' + READ_ONLY = 'ro' + READ_WRITE = 'rw' + + +class BoxManagedProcessStatus(str, enum.Enum): + RUNNING = 'running' + EXITED = 'exited' + + +class BoxMountSpec(pydantic.BaseModel): + """A single additional bind mount specification.""" + + host_path: str + mount_path: str + mode: BoxHostMountMode = BoxHostMountMode.READ_WRITE + + @pydantic.field_validator('host_path') + @classmethod + def validate_host_path(cls, value: str) -> str: + value = value.strip() + if not (posixpath.isabs(value) or ntpath.isabs(value)): + raise ValueError('host_path must be an absolute host path') + return value + + @pydantic.field_validator('mount_path') + @classmethod + def validate_mount_path(cls, value: str) -> str: + value = value.strip() + if not value.startswith('/'): + raise ValueError('mount_path must be an absolute path inside the sandbox') + return value + + +class BoxSpec(pydantic.BaseModel): + cmd: str = '' + workdir: str = DEFAULT_BOX_MOUNT_PATH + timeout_sec: int = 30 + network: BoxNetworkMode = BoxNetworkMode.OFF + session_id: str + env: dict[str, str] = pydantic.Field(default_factory=dict) + image: str = DEFAULT_BOX_IMAGE + host_path: str | None = None + host_path_mode: BoxHostMountMode = BoxHostMountMode.READ_WRITE + mount_path: str = DEFAULT_BOX_MOUNT_PATH + extra_mounts: list[BoxMountSpec] = pydantic.Field(default_factory=list) + persistent: bool = False + # Resource limits + cpus: float = 1.0 + memory_mb: int = 512 + pids_limit: int = 128 + read_only_rootfs: bool = True + workspace_quota_mb: int = 0 + + @pydantic.model_validator(mode='before') + @classmethod + def populate_workdir_from_mount_path(cls, data): + if not isinstance(data, dict): + return data + if data.get('workdir') not in (None, ''): + return data + mount_path = data.get('mount_path') + if isinstance(mount_path, str) and mount_path.strip(): + data = dict(data) + data['workdir'] = mount_path + return data + + @pydantic.field_validator('cmd') + @classmethod + def validate_cmd(cls, value: str) -> str: + return value.strip() + + @pydantic.field_validator('workdir') + @classmethod + def validate_workdir(cls, value: str) -> str: + value = value.strip() + if not value.startswith('/'): + raise ValueError('workdir must be an absolute path inside the sandbox') + return value + + @pydantic.field_validator('timeout_sec') + @classmethod + def validate_timeout_sec(cls, value: int) -> int: + if value <= 0: + raise ValueError('timeout_sec must be greater than 0') + return value + + @pydantic.field_validator('cpus') + @classmethod + def validate_cpus(cls, value: float) -> float: + if value <= 0: + raise ValueError('cpus must be greater than 0') + return value + + @pydantic.field_validator('memory_mb') + @classmethod + def validate_memory_mb(cls, value: int) -> int: + if value < 32: + raise ValueError('memory_mb must be at least 32') + return value + + @pydantic.field_validator('pids_limit') + @classmethod + def validate_pids_limit(cls, value: int) -> int: + if value < 1: + raise ValueError('pids_limit must be at least 1') + return value + + @pydantic.field_validator('workspace_quota_mb') + @classmethod + def validate_workspace_quota_mb(cls, value: int) -> int: + if value < 0: + raise ValueError('workspace_quota_mb must be greater than or equal to 0') + return value + + @pydantic.field_validator('session_id') + @classmethod + def validate_session_id(cls, value: str) -> str: + value = value.strip() + if not value: + raise ValueError('session_id must not be empty') + return value + + @pydantic.field_validator('env') + @classmethod + def validate_env(cls, value: dict[str, str]) -> dict[str, str]: + return {str(k): str(v) for k, v in value.items()} + + @pydantic.field_validator('host_path') + @classmethod + def validate_host_path(cls, value: str | None) -> str | None: + if value is None: + return None + value = value.strip() + if not (posixpath.isabs(value) or ntpath.isabs(value)): + raise ValueError('host_path must be an absolute host path') + return value + + @pydantic.field_validator('mount_path') + @classmethod + def validate_mount_path(cls, value: str) -> str: + value = value.strip() + if not value.startswith('/'): + raise ValueError('mount_path must be an absolute path inside the sandbox') + return value + + @pydantic.model_validator(mode='after') + def validate_host_mount_consistency(self) -> 'BoxSpec': + if self.host_path is None: + return self + if self.host_path_mode == BoxHostMountMode.NONE: + return self + if self.workdir != self.mount_path and not self.workdir.startswith(f'{self.mount_path}/'): + raise ValueError('workdir must stay under mount_path when host_path is provided') + return self + + +class BoxProfile(pydantic.BaseModel): + """Preset sandbox configuration. + + Provides default values for BoxSpec fields and optionally locks fields + so that tool-call parameters cannot override them. + """ + + name: str + image: str = DEFAULT_BOX_IMAGE + network: BoxNetworkMode = BoxNetworkMode.OFF + timeout_sec: int = 30 + host_path_mode: BoxHostMountMode = BoxHostMountMode.READ_WRITE + max_timeout_sec: int = 120 + # Resource limits + cpus: float = 1.0 + memory_mb: int = 512 + pids_limit: int = 128 + read_only_rootfs: bool = True + workspace_quota_mb: int = 0 + locked: frozenset[str] = frozenset() + + model_config = pydantic.ConfigDict(frozen=True) + + +BUILTIN_PROFILES: dict[str, BoxProfile] = { + 'default': BoxProfile( + name='default', + network=BoxNetworkMode.OFF, + host_path_mode=BoxHostMountMode.READ_WRITE, + cpus=1.0, + memory_mb=512, + pids_limit=128, + read_only_rootfs=True, + max_timeout_sec=120, + ), + 'offline_readonly': BoxProfile( + name='offline_readonly', + network=BoxNetworkMode.OFF, + host_path_mode=BoxHostMountMode.READ_ONLY, + cpus=0.5, + memory_mb=256, + pids_limit=64, + read_only_rootfs=True, + max_timeout_sec=60, + locked=frozenset({'network', 'host_path_mode', 'read_only_rootfs'}), + ), + 'network_basic': BoxProfile( + name='network_basic', + network=BoxNetworkMode.ON, + host_path_mode=BoxHostMountMode.READ_WRITE, + cpus=1.0, + memory_mb=512, + pids_limit=128, + read_only_rootfs=True, + max_timeout_sec=120, + ), + 'network_extended': BoxProfile( + name='network_extended', + network=BoxNetworkMode.ON, + host_path_mode=BoxHostMountMode.READ_WRITE, + cpus=2.0, + memory_mb=1024, + pids_limit=256, + read_only_rootfs=False, + max_timeout_sec=300, + ), +} + + +class BoxSessionInfo(pydantic.BaseModel): + session_id: str + backend_name: str + backend_session_id: str + image: str + network: BoxNetworkMode + host_path: str | None = None + host_path_mode: BoxHostMountMode = BoxHostMountMode.READ_WRITE + mount_path: str = DEFAULT_BOX_MOUNT_PATH + persistent: bool = False + cpus: float = 1.0 + memory_mb: int = 512 + pids_limit: int = 128 + read_only_rootfs: bool = True + workspace_quota_mb: int = 0 + created_at: dt.datetime + last_used_at: dt.datetime + + +class BoxManagedProcessSpec(pydantic.BaseModel): + process_id: str = 'default' + command: str + args: list[str] = pydantic.Field(default_factory=list) + env: dict[str, str] = pydantic.Field(default_factory=dict) + cwd: str = DEFAULT_BOX_MOUNT_PATH + + @pydantic.field_validator('command') + @classmethod + def validate_command(cls, value: str) -> str: + value = value.strip() + if not value: + raise ValueError('command must not be empty') + return value + + @pydantic.field_validator('args') + @classmethod + def validate_args(cls, value: list[str]) -> list[str]: + return [str(item) for item in value] + + @pydantic.field_validator('env') + @classmethod + def validate_env(cls, value: dict[str, str]) -> dict[str, str]: + return {str(k): str(v) for k, v in value.items()} + + @pydantic.field_validator('cwd') + @classmethod + def validate_cwd(cls, value: str) -> str: + value = value.strip() + if not value.startswith('/'): + raise ValueError('cwd must be an absolute path inside the sandbox') + return value + + +class BoxManagedProcessInfo(pydantic.BaseModel): + session_id: str + process_id: str = 'default' + status: BoxManagedProcessStatus + command: str + args: list[str] + cwd: str + env_keys: list[str] + attached: bool = False + started_at: dt.datetime + exited_at: dt.datetime | None = None + exit_code: int | None = None + stderr_preview: str = '' + + +class BoxExecutionResult(pydantic.BaseModel): + session_id: str + backend_name: str + status: BoxExecutionStatus + exit_code: int | None + stdout: str = '' + stderr: str = '' + duration_ms: int + + @property + def ok(self) -> bool: + return self.status == BoxExecutionStatus.COMPLETED and self.exit_code == 0 diff --git a/src/langbot_plugin/box/nsjail_backend.py b/src/langbot_plugin/box/nsjail_backend.py new file mode 100644 index 0000000..e8eced6 --- /dev/null +++ b/src/langbot_plugin/box/nsjail_backend.py @@ -0,0 +1,552 @@ +from __future__ import annotations + +import asyncio +import datetime as dt +import json +import logging +import os +import pathlib +import shlex +import shutil +import signal +import uuid + +from .backend import BaseSandboxBackend, _CommandResult, _MAX_RAW_OUTPUT_BYTES +from .errors import BoxError +from .models import ( + BoxExecutionResult, + BoxExecutionStatus, + BoxHostMountMode, + BoxNetworkMode, + BoxSessionInfo, + BoxSpec, +) +from .security import validate_sandbox_security + +# System directories to mount read-only inside the sandbox. +# Only well-known paths needed for running Python/Node/shell commands. +_READONLY_SYSTEM_MOUNTS: list[str] = [ + '/usr', + '/lib', + '/lib64', + '/bin', + '/sbin', +] + +# Specific /etc entries required for dynamic linking and TLS. +_READONLY_ETC_ENTRIES: list[str] = [ + '/etc/alternatives', + '/etc/ld.so.cache', + '/etc/ld.so.conf', + '/etc/ld.so.conf.d', + '/etc/ssl/certs', + '/etc/localtime', + '/etc/resolv.conf', # needed when network=ON +] + +_DEFAULT_BASE_DIR = '/tmp/langbot-box-nsjail' + + +class NsjailBackend(BaseSandboxBackend): + """Lightweight sandbox backend using nsjail. + + Each ``exec`` invocation spawns an independent nsjail process. Session + state (workspace files) persists via a shared host directory that is + bind-mounted into every invocation. + """ + + name = 'nsjail' + + def __init__( + self, + logger: logging.Logger, + nsjail_bin: str = 'nsjail', + base_dir: str = _DEFAULT_BASE_DIR, + ): + super().__init__(logger) + self._nsjail_bin = nsjail_bin + self._base_dir = pathlib.Path(base_dir) + self._cgroup_v2_available: bool = False + + # ── lifecycle ───────────────────────────────────────────────────── + + async def is_available(self) -> bool: + if shutil.which(self._nsjail_bin) is None: + self.logger.info('nsjail binary not found in PATH') + return False + + # Quick sanity check – nsjail --help exits 0. + try: + proc = await asyncio.create_subprocess_exec( + self._nsjail_bin, '--help', + stdout=asyncio.subprocess.DEVNULL, + stderr=asyncio.subprocess.DEVNULL, + ) + await asyncio.wait_for(proc.wait(), timeout=5) + if proc.returncode != 0: + self.logger.info('nsjail --help returned non-zero') + return False + except Exception as exc: + self.logger.info(f'nsjail probe failed: {exc}') + return False + + self._cgroup_v2_available = self._detect_cgroup_v2() + if not self._cgroup_v2_available: + self.logger.warning( + 'cgroup v2 not available for nsjail; ' + 'falling back to rlimit-based resource limits' + ) + + self._base_dir.mkdir(parents=True, exist_ok=True) + return True + + async def start_session(self, spec: BoxSpec) -> BoxSessionInfo: + validate_sandbox_security(spec) + + now = dt.datetime.now(dt.timezone.utc) + session_dir_name = f'{self.instance_id}_{spec.session_id}_{uuid.uuid4().hex[:8]}' + session_dir = self._base_dir / session_dir_name + + # Per-session writable directories. + root_dir = session_dir / 'root' + workspace_dir = session_dir / 'workspace' + tmp_dir = session_dir / 'tmp' + home_dir = session_dir / 'home' + + for d in (root_dir, workspace_dir, tmp_dir, home_dir): + d.mkdir(parents=True, exist_ok=True) + + # If host_path is specified, we will use it directly instead of the + # per-session workspace when building nsjail args (see _build_mounts). + meta = { + 'session_id': spec.session_id, + 'instance_id': self.instance_id, + 'host_path': spec.host_path, + 'host_path_mode': spec.host_path_mode.value if spec.host_path else None, + 'mount_path': spec.mount_path, + 'network': spec.network.value, + 'cpus': spec.cpus, + 'memory_mb': spec.memory_mb, + 'pids_limit': spec.pids_limit, + 'created_at': now.isoformat(), + } + (session_dir / 'meta.json').write_text(json.dumps(meta, indent=2)) + + self.logger.info( + f'LangBot Box backend start_session: backend=nsjail ' + f'session_id={spec.session_id} session_dir={session_dir} ' + f'network={spec.network.value} ' + f'host_path={spec.host_path} host_path_mode={spec.host_path_mode.value} mount_path={spec.mount_path} ' + f'cpus={spec.cpus} memory_mb={spec.memory_mb} pids_limit={spec.pids_limit} ' + f'workspace_quota_mb={spec.workspace_quota_mb}' + ) + + return BoxSessionInfo( + session_id=spec.session_id, + backend_name=self.name, + backend_session_id=str(session_dir), + # Keep the requested logical image in metadata so runtime session + # reuse sees later specs as compatible. nsjail still executes + # against host-mounted system paths rather than a container image. + image=spec.image, + network=spec.network, + host_path=spec.host_path, + host_path_mode=spec.host_path_mode, + mount_path=spec.mount_path, + cpus=spec.cpus, + memory_mb=spec.memory_mb, + pids_limit=spec.pids_limit, + read_only_rootfs=spec.read_only_rootfs, + workspace_quota_mb=spec.workspace_quota_mb, + persistent=spec.persistent, + created_at=now, + last_used_at=now, + ) + + async def exec(self, session: BoxSessionInfo, spec: BoxSpec) -> BoxExecutionResult: + start = dt.datetime.now(dt.timezone.utc) + session_dir = pathlib.Path(session.backend_session_id) + + args = self._build_nsjail_args(session, spec, session_dir) + + cmd_preview = spec.cmd.strip() + if len(cmd_preview) > 400: + cmd_preview = f'{cmd_preview[:397]}...' + self.logger.info( + f'LangBot Box backend exec: backend=nsjail ' + f'session_id={session.session_id} session_dir={session_dir} ' + f'workdir={spec.workdir} timeout_sec={spec.timeout_sec} ' + f'env_keys={sorted(spec.env.keys())} cmd={cmd_preview}' + ) + + result = await self._run_nsjail(args, timeout_sec=spec.timeout_sec) + duration_ms = int((dt.datetime.now(dt.timezone.utc) - start).total_seconds() * 1000) + + if result.timed_out: + return BoxExecutionResult( + session_id=session.session_id, + backend_name=self.name, + status=BoxExecutionStatus.TIMED_OUT, + exit_code=None, + stdout=result.stdout, + stderr=result.stderr or f'Command timed out after {spec.timeout_sec} seconds.', + duration_ms=duration_ms, + ) + + return BoxExecutionResult( + session_id=session.session_id, + backend_name=self.name, + status=BoxExecutionStatus.COMPLETED, + exit_code=result.return_code, + stdout=result.stdout, + stderr=result.stderr, + duration_ms=duration_ms, + ) + + async def stop_session(self, session: BoxSessionInfo): + session_dir = pathlib.Path(session.backend_session_id) + self.logger.info( + f'LangBot Box backend stop_session: backend=nsjail ' + f'session_id={session.session_id} session_dir={session_dir}' + ) + + # Kill any lingering nsjail processes whose cwd is inside session_dir. + await self._kill_session_processes(session_dir) + + try: + if session_dir.exists(): + shutil.rmtree(session_dir) + except Exception as exc: + self.logger.warning(f'Failed to remove nsjail session dir {session_dir}: {exc}') + + async def start_managed_process( + self, session: BoxSessionInfo, spec + ) -> asyncio.subprocess.Process: + session_dir = pathlib.Path(session.backend_session_id) + + # Build a BoxSpec-like object so we can reuse _build_nsjail_args. + # ManagedProcessSpec has command/args/cwd/env but not the full BoxSpec. + inner_cmd = ' '.join([shlex.quote(spec.command), *[shlex.quote(a) for a in spec.args]]) + pseudo_spec = BoxSpec( + cmd=inner_cmd, + workdir=spec.cwd, + timeout_sec=86400, # not used here + network=session.network, + session_id=session.session_id, + env=spec.env, + host_path=session.host_path, + host_path_mode=session.host_path_mode, + mount_path=session.mount_path, + cpus=session.cpus, + memory_mb=session.memory_mb, + pids_limit=session.pids_limit, + read_only_rootfs=session.read_only_rootfs, + ) + + args = self._build_nsjail_args(session, pseudo_spec, session_dir) + + self.logger.info( + f'LangBot Box backend start_managed_process: backend=nsjail ' + f'session_id={session.session_id} session_dir={session_dir} ' + f'cwd={spec.cwd} env_keys={sorted(spec.env.keys())} ' + f'command={spec.command} args={spec.args}' + ) + + return await asyncio.create_subprocess_exec( + *args, + stdin=asyncio.subprocess.PIPE, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + + async def cleanup_orphaned_containers(self, current_instance_id: str = ''): + if not self._base_dir.exists(): + return + + for entry in self._base_dir.iterdir(): + if not entry.is_dir(): + continue + + # Session dirs are named: __ + # If it doesn't start with the current instance_id, it's orphaned. + if entry.name.startswith(f'{current_instance_id}_'): + continue + + self.logger.info(f'Cleaning up orphaned nsjail session dir: {entry}') + try: + await self._kill_session_processes(entry) + shutil.rmtree(entry) + except Exception as exc: + self.logger.warning(f'Failed to clean up orphaned nsjail dir {entry}: {exc}') + + # ── nsjail argument construction ────────────────────────────────── + + def _build_nsjail_args( + self, + session: BoxSessionInfo, + spec: BoxSpec, + session_dir: pathlib.Path, + ) -> list[str]: + args: list[str] = [self._nsjail_bin] + + # Mode: one-shot execution. + args.extend(['--mode', 'o']) + + # nsjail enables the relevant clone namespaces by default. Some + # versions do not expose positive --clone_new* flags, only disable + # flags, so rely on defaults for broad compatibility. + + # Use a per-session chroot root so nsjail can create mount targets + # without needing write access to the host root. + root_dir = session_dir / 'root' + root_dir.mkdir(parents=True, exist_ok=True) + self._ensure_chroot_mount_targets(root_dir, session, spec) + args.extend(['--chroot', str(root_dir)]) + + # Network namespace. + if spec.network != BoxNetworkMode.OFF: + args.append('--disable_clone_newnet') + + # Read-only system mounts. + args.extend(self._build_readonly_mounts(spec.network)) + + # Writable per-session mounts. + args.extend(self._build_writable_mounts(session, spec, session_dir)) + + # Isolated /proc and minimal /dev. + args.extend(['--mount', 'none:/proc:proc:rw']) + args.extend(['--mount', 'none:/dev:tmpfs:rw']) + + # Working directory. + args.extend(['--cwd', spec.workdir]) + + # Environment variables. + args.extend(['--env', 'PYTHONUNBUFFERED=1']) + args.extend(['--env', 'HOME=/home']) + args.extend(['--env', 'PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin']) + for key, value in spec.env.items(): + args.extend(['--env', f'{key}={value}']) + + # Resource limits. + args.extend(self._build_resource_limits(spec)) + + # Suppress nsjail's own log output. + args.append('--really_quiet') + + # The actual command. + quoted_workdir = shlex.quote(spec.workdir) + user_cmd = f'mkdir -p {quoted_workdir} && cd {quoted_workdir} && {spec.cmd}' + args.extend(['--', '/bin/sh', '-lc', user_cmd]) + + return args + + def _build_readonly_mounts(self, network: BoxNetworkMode) -> list[str]: + args: list[str] = [] + + for path in _READONLY_SYSTEM_MOUNTS: + if os.path.exists(path): + args.extend(['--bindmount_ro', f'{path}:{path}']) + + for path in _READONLY_ETC_ENTRIES: + # /etc/resolv.conf is only needed when network is ON. + if path == '/etc/resolv.conf' and network == BoxNetworkMode.OFF: + continue + if os.path.exists(path): + args.extend(['--bindmount_ro', f'{path}:{path}']) + + return args + + def _build_writable_mounts( + self, + session: BoxSessionInfo, + spec: BoxSpec, + session_dir: pathlib.Path, + ) -> list[str]: + args: list[str] = [] + + # Workspace mount. + if spec.host_path is not None and spec.host_path_mode != BoxHostMountMode.NONE: + if spec.host_path_mode == BoxHostMountMode.READ_ONLY: + args.extend(['--bindmount_ro', f'{spec.host_path}:{spec.mount_path}']) + else: + args.extend(['--bindmount', f'{spec.host_path}:{spec.mount_path}']) + else: + workspace_dir = session_dir / 'workspace' + args.extend(['--bindmount', f'{workspace_dir}:{spec.mount_path}']) + + for mount in spec.extra_mounts: + if mount.mode == BoxHostMountMode.READ_ONLY: + args.extend(['--bindmount_ro', f'{mount.host_path}:{mount.mount_path}']) + elif mount.mode == BoxHostMountMode.READ_WRITE: + args.extend(['--bindmount', f'{mount.host_path}:{mount.mount_path}']) + + # /tmp and /home are always per-session writable. + tmp_dir = session_dir / 'tmp' + home_dir = session_dir / 'home' + args.extend(['--bindmount', f'{tmp_dir}:/tmp']) + args.extend(['--bindmount', f'{home_dir}:/home']) + + return args + + def _ensure_chroot_mount_targets( + self, + root_dir: pathlib.Path, + session: BoxSessionInfo, + spec: BoxSpec, + ) -> None: + mount_paths = { + '/proc', + '/dev', + '/tmp', + '/home', + spec.mount_path, + session.mount_path, + } + mount_paths.update(_READONLY_SYSTEM_MOUNTS) + mount_paths.update(_READONLY_ETC_ENTRIES) + for mount in spec.extra_mounts: + mount_paths.add(mount.mount_path) + + for mount_path in mount_paths: + if not mount_path: + continue + target = root_dir / mount_path.lstrip('/') + try: + if os.path.isfile(mount_path): + target.parent.mkdir(parents=True, exist_ok=True) + target.touch(exist_ok=True) + else: + target.mkdir(parents=True, exist_ok=True) + except Exception as exc: + self.logger.debug(f'Failed to prepare nsjail mount target {target}: {exc}') + + def _build_resource_limits(self, spec: BoxSpec) -> list[str]: + args: list[str] = [] + + if self._cgroup_v2_available: + # cgroup v2 – precise limits. + memory_bytes = spec.memory_mb * 1024 * 1024 + args.extend(['--cgroup_mem_max', str(memory_bytes)]) + args.extend(['--cgroup_pids_max', str(spec.pids_limit)]) + cpu_ms = int(spec.cpus * 1000) + args.extend(['--cgroup_cpu_ms_per_sec', str(cpu_ms)]) + else: + # rlimit fallback – best-effort. + args.extend(['--rlimit_as', str(spec.memory_mb)]) + args.extend(['--rlimit_nproc', str(spec.pids_limit)]) + + # Always set these rlimits regardless of cgroup mode. + args.extend(['--rlimit_fsize', '512']) # max file size 512 MB + args.extend(['--rlimit_nofile', '256']) # max open fds + + return args + + # ── process execution ───────────────────────────────────────────── + + async def _run_nsjail( + self, + args: list[str], + timeout_sec: int, + ) -> _CommandResult: + process = await asyncio.create_subprocess_exec( + *args, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + stdout_task = asyncio.create_task(self._read_stream(process.stdout)) + stderr_task = asyncio.create_task(self._read_stream(process.stderr)) + + timed_out = False + try: + await asyncio.wait_for(process.wait(), timeout=timeout_sec) + except asyncio.TimeoutError: + process.kill() + timed_out = True + await process.wait() + + stdout_bytes, stdout_total = await stdout_task + stderr_bytes, stderr_total = await stderr_task + + return _CommandResult( + return_code=process.returncode if not timed_out else -1, + stdout=self._clip_captured_bytes(stdout_bytes, stdout_total), + stderr=self._clip_captured_bytes(stderr_bytes, stderr_total), + timed_out=timed_out, + ) + + # ── helpers ─────────────────────────────────────────────────────── + + @staticmethod + def _detect_cgroup_v2() -> bool: + """Check whether the host runs cgroup v2 and we can write to it.""" + cgroup_mount = pathlib.Path('/sys/fs/cgroup') + if not cgroup_mount.exists(): + return False + # cgroup v2 has a single hierarchy with cgroup.controllers file. + controllers = cgroup_mount / 'cgroup.controllers' + if not controllers.exists(): + return False + # Check if we can write to a cgroup subtree (needed for nsjail). + # A rough heuristic: if the user owns a cgroup directory we're probably + # running under systemd user delegation. + user_slice = cgroup_mount / f'user.slice/user-{os.getuid()}.slice' + if user_slice.exists() and os.access(user_slice, os.W_OK): + return True + # If running as root (uid 0), cgroup v2 is always usable. + if os.getuid() == 0: + return True + # Conservative: if we can't confirm writability, report unavailable. + return False + + async def _kill_session_processes(self, session_dir: pathlib.Path) -> None: + """Best-effort kill of nsjail processes associated with a session dir. + + We scan /proc for nsjail processes whose command line contains the + session directory path. + """ + session_path_str = str(session_dir) + proc_dir = pathlib.Path('/proc') + if not proc_dir.exists(): + return + + for pid_dir in proc_dir.iterdir(): + if not pid_dir.name.isdigit(): + continue + try: + cmdline = (pid_dir / 'cmdline').read_bytes().decode('utf-8', errors='replace') + if self._nsjail_bin in cmdline and session_path_str in cmdline: + pid = int(pid_dir.name) + os.kill(pid, signal.SIGKILL) + self.logger.info(f'Killed orphaned nsjail process {pid}') + except (OSError, ValueError): + continue + + @staticmethod + def _clip_captured_bytes( + data: bytes, total_size: int, limit: int = _MAX_RAW_OUTPUT_BYTES + ) -> str: + text = data.decode('utf-8', errors='replace').strip() + if total_size > limit: + text += f'\n... [raw output clipped at {limit} bytes, {total_size - limit} bytes discarded]' + return text + + @staticmethod + async def _read_stream( + stream: asyncio.StreamReader | None, + limit: int = _MAX_RAW_OUTPUT_BYTES, + ) -> tuple[bytes, int]: + if stream is None: + return b'', 0 + + chunks = bytearray() + total_size = 0 + while True: + chunk = await stream.read(65536) + if not chunk: + break + total_size += len(chunk) + remaining = limit - len(chunks) + if remaining > 0: + chunks.extend(chunk[:remaining]) + + return bytes(chunks), total_size diff --git a/src/langbot_plugin/box/runtime.py b/src/langbot_plugin/box/runtime.py new file mode 100644 index 0000000..7550816 --- /dev/null +++ b/src/langbot_plugin/box/runtime.py @@ -0,0 +1,598 @@ +from __future__ import annotations + +import asyncio +import collections +import dataclasses +import datetime as dt +import json +import logging +import os +import uuid +from typing import TYPE_CHECKING + +from .backend import BaseSandboxBackend, DockerBackend +from .nsjail_backend import NsjailBackend +from .errors import ( + BoxBackendUnavailableError, + BoxManagedProcessNotFoundError, + BoxSessionConflictError, + BoxSessionNotFoundError, + BoxValidationError, +) +from .models import ( + BoxExecutionResult, + BoxExecutionStatus, + BoxManagedProcessInfo, + BoxManagedProcessSpec, + BoxManagedProcessStatus, + BoxSessionInfo, + BoxSpec, +) +from .skill_store import BoxSkillStore + +if TYPE_CHECKING: + from .e2b_backend import E2BSandboxBackend + +_UTC = dt.timezone.utc +_MANAGED_PROCESS_STDERR_PREVIEW_LIMIT = 4000 + + +@dataclasses.dataclass(slots=True) +class _ManagedProcess: + spec: BoxManagedProcessSpec + process: asyncio.subprocess.Process + started_at: dt.datetime + attach_lock: asyncio.Lock + stderr_chunks: collections.deque[str] + stderr_total_len: int = 0 + exit_code: int | None = None + exited_at: dt.datetime | None = None + + @property + def is_running(self) -> bool: + return self.exit_code is None and self.process.returncode is None + + +@dataclasses.dataclass(slots=True) +class _RuntimeSession: + info: BoxSessionInfo + lock: asyncio.Lock + managed_processes: dict[str, _ManagedProcess] = dataclasses.field( + default_factory=dict + ) + + +class BoxRuntime: + def __init__( + self, + logger: logging.Logger, + backends: list[BaseSandboxBackend] | None = None, + session_ttl_sec: int = 300, + ): + self.logger = logger + + # Load configuration from environment variable (passed by LangBot) + self._box_config: dict = {} + config_json = os.getenv("LANGBOT_BOX_CONFIG", "") + if config_json: + try: + self._box_config = json.loads(config_json) + except json.JSONDecodeError: + logger.warning( + f"Failed to parse LANGBOT_BOX_CONFIG: {config_json[:100]}" + ) + + # Build backend list + if backends is None: + backends = [ + DockerBackend(logger), + NsjailBackend(logger), + self._create_e2b_backend(logger), + ] + + self.backends = backends + self.session_ttl_sec = session_ttl_sec + self._backend: BaseSandboxBackend | None = None + self._sessions: dict[str, _RuntimeSession] = {} + self._lock = asyncio.Lock() + self.instance_id = uuid.uuid4().hex[:12] + self.skill_store = BoxSkillStore(self._box_config) + + def _create_e2b_backend(self, logger: logging.Logger) -> "E2BSandboxBackend | None": + """Create E2B backend if package is installed.""" + try: + from .e2b_backend import E2BSandboxBackend + + return E2BSandboxBackend(logger) + except ImportError: + logger.debug("e2b package not installed, E2B backend unavailable") + return None + + async def initialize(self): + # Apply configuration from env var to all backends + if self._box_config: + self._apply_config_to_backends(self._box_config) + + self._backend = await self._select_backend() + if self._backend is not None: + self._backend.instance_id = self.instance_id + try: + await self._backend.cleanup_orphaned_containers(self.instance_id) + except Exception as exc: + self.logger.warning( + f"LangBot Box orphan container cleanup failed: {exc}" + ) + + def init(self, config: dict) -> None: + """Initialize with full box configuration from LangBot. + + Called via RPC (INIT action) when connecting over WebSocket. + """ + self._box_config.update(config) + self._apply_config_to_backends(config) + self.skill_store.update_config(self._box_config) + if not self._sessions: + self._backend = None + + def _apply_config_to_backends(self, config: dict) -> None: + """Apply configuration sections to corresponding backends.""" + for backend in self.backends: + if backend is None: + continue + backend_config = config.get(backend.name, {}) + if backend_config and hasattr(backend, "configure"): + backend.configure(backend_config) + + async def execute(self, spec: BoxSpec) -> BoxExecutionResult: + if not spec.cmd: + raise BoxValidationError("cmd must not be empty") + session = await self._get_or_create_session(spec) + + async with session.lock: + self.logger.info( + "LangBot Box execute: " + f"session_id={spec.session_id} " + f"backend_session_id={session.info.backend_session_id} " + f"backend={session.info.backend_name} " + f"workdir={spec.workdir} " + f"timeout_sec={spec.timeout_sec}" + ) + result = await (await self._get_backend()).exec(session.info, spec) + + async with self._lock: + now = dt.datetime.now(_UTC) + if spec.session_id in self._sessions: + self._sessions[spec.session_id].info.last_used_at = now + + if result.status == BoxExecutionStatus.TIMED_OUT: + await self._drop_session_locked(spec.session_id) + + return result + + async def shutdown(self): + async with self._lock: + session_ids = list(self._sessions.keys()) + for session_id in session_ids: + session = self._sessions.get(session_id) + if session is not None and session.info.persistent: + continue + await self._drop_session_locked(session_id) + + async def create_session(self, spec: BoxSpec) -> dict: + session = await self._get_or_create_session(spec) + return self._session_to_dict(session.info) + + async def delete_session(self, session_id: str) -> None: + async with self._lock: + if session_id not in self._sessions: + raise BoxSessionNotFoundError(f"session {session_id} not found") + await self._drop_session_locked(session_id) + + async def start_managed_process( + self, session_id: str, spec: BoxManagedProcessSpec + ) -> dict: + async with self._lock: + runtime_session = self._sessions.get(session_id) + if runtime_session is None: + raise BoxSessionNotFoundError(f"session {session_id} not found") + + async with runtime_session.lock: + process_id = spec.process_id + existing = runtime_session.managed_processes.get(process_id) + if existing is not None and existing.is_running: + # Terminate the stale process before starting a new one. + # This happens when LangBot restarts while the Box runtime + # keeps the persistent session alive. + self.logger.info( + f"LangBot Box terminating stale managed process before restart: " + f"session_id={session_id} process_id={process_id}" + ) + await self._terminate_managed_process(existing) + del runtime_session.managed_processes[process_id] + + backend = await self._get_backend() + process = await backend.start_managed_process(runtime_session.info, spec) + managed_process = _ManagedProcess( + spec=spec, + process=process, + started_at=dt.datetime.now(_UTC), + attach_lock=asyncio.Lock(), + stderr_chunks=collections.deque(), + ) + runtime_session.managed_processes[process_id] = managed_process + runtime_session.info.last_used_at = dt.datetime.now(_UTC) + asyncio.create_task( + self._drain_managed_process_stderr( + runtime_session.info.session_id, process_id, managed_process + ) + ) + asyncio.create_task( + self._watch_managed_process( + runtime_session.info.session_id, process_id, managed_process + ) + ) + return self._managed_process_to_dict( + runtime_session.info.session_id, process_id, managed_process + ) + + def get_managed_process(self, session_id: str, process_id: str = "default") -> dict: + runtime_session = self._sessions.get(session_id) + if runtime_session is None: + raise BoxSessionNotFoundError(f"session {session_id} not found") + managed_process = runtime_session.managed_processes.get(process_id) + if managed_process is None: + raise BoxManagedProcessNotFoundError( + f"session {session_id} has no managed process with process_id={process_id}" + ) + return self._managed_process_to_dict(session_id, process_id, managed_process) + + async def stop_managed_process( + self, session_id: str, process_id: str = "default" + ) -> None: + runtime_session = self._sessions.get(session_id) + if runtime_session is None: + raise BoxSessionNotFoundError(f"session {session_id} not found") + + async with runtime_session.lock: + managed_process = runtime_session.managed_processes.pop(process_id, None) + if managed_process is None: + raise BoxManagedProcessNotFoundError( + f"session {session_id} has no managed process with process_id={process_id}" + ) + await self._terminate_managed_process(managed_process) + runtime_session.info.last_used_at = dt.datetime.now(_UTC) + self.logger.info( + f"LangBot Box managed process stopped: session_id={session_id} process_id={process_id}" + ) + + # ── Observability ───────────────────────────────────────────────── + + async def get_backend_info(self) -> dict: + if self._backend is None: + self._backend = await self._select_backend() + backend = self._backend + if backend is None: + return {"name": None, "available": False} + try: + available = await backend.is_available() + except Exception: + available = False + return {"name": backend.name, "available": available} + + def get_sessions(self) -> list[dict]: + return [self._session_to_dict(s.info) for s in self._sessions.values()] + + def get_session(self, session_id: str) -> dict: + runtime_session = self._sessions.get(session_id) + if runtime_session is None: + raise BoxSessionNotFoundError(f"session {session_id} not found") + result = self._session_to_dict(runtime_session.info) + if runtime_session.managed_processes: + managed_processes = { + pid: self._managed_process_to_dict(session_id, pid, mp) + for pid, mp in runtime_session.managed_processes.items() + } + result["managed_processes"] = managed_processes + if "default" in managed_processes: + result["managed_process"] = managed_processes["default"] + return result + + async def get_status(self) -> dict: + backend_info = await self.get_backend_info() + return { + "backend": backend_info, + "active_sessions": len(self._sessions), + "managed_processes": sum( + 1 + for runtime_session in self._sessions.values() + for mp in runtime_session.managed_processes.values() + if mp.is_running + ), + "session_ttl_sec": self.session_ttl_sec, + } + + async def _get_or_create_session(self, spec: BoxSpec) -> _RuntimeSession: + async with self._lock: + await self._reap_expired_sessions_locked() + + existing = self._sessions.get(spec.session_id) + if existing is not None: + self._assert_session_compatible(existing.info, spec) + backend = await self._get_backend() + if not await backend.is_session_alive(existing.info): + self.logger.warning( + "LangBot Box session backend disappeared, recreating: " + f"session_id={spec.session_id} " + f"backend_session_id={existing.info.backend_session_id} " + f"backend={existing.info.backend_name}" + ) + await self._drop_session_locked(spec.session_id) + existing = None + + if existing is not None: + existing.info.last_used_at = dt.datetime.now(_UTC) + self.logger.info( + "LangBot Box session reused: " + f"session_id={spec.session_id} " + f"backend_session_id={existing.info.backend_session_id} " + f"backend={existing.info.backend_name}" + ) + return existing + + backend = await self._get_backend() + info = await backend.start_session(spec) + runtime_session = _RuntimeSession(info=info, lock=asyncio.Lock()) + self._sessions[spec.session_id] = runtime_session + self.logger.info( + "LangBot Box session created: " + f"session_id={spec.session_id} " + f"backend_session_id={info.backend_session_id} " + f"backend={info.backend_name} " + f"image={info.image} " + f"network={info.network.value} " + f"host_path={info.host_path} " + f"host_path_mode={info.host_path_mode.value} " + f"mount_path={info.mount_path} " + f"workspace_quota_mb={info.workspace_quota_mb}" + ) + return runtime_session + + async def _get_backend(self) -> BaseSandboxBackend: + if self._backend is None: + self._backend = await self._select_backend() + if self._backend is None: + raise BoxBackendUnavailableError( + "LangBot Box backend unavailable. Install and start Docker or nsjail before using exec." + ) + return self._backend + + # Backends grouped under each top-level box.backend choice. + # 'local' picks the first available local container backend (docker → nsjail). + _LOCAL_BACKEND_NAMES = ("docker", "nsjail") + + async def _select_backend(self) -> BaseSandboxBackend | None: + # Backend selection comes from box.backend only. + # Accepted values: 'local', 'docker', 'nsjail', 'e2b'. 'local' fans out + # to local container backends; everything else must match one backend exactly. + forced = (self._box_config.get("backend") or "").strip() + source_label = "box.backend" + + candidates: list[BaseSandboxBackend] + if forced == "local": + candidates = [ + b + for b in self.backends + if b is not None and b.name in self._LOCAL_BACKEND_NAMES + ] + if not candidates: + self.logger.error( + f"LangBot Box: no local backend registered " + f"({source_label}={forced})" + ) + return None + elif forced: + candidates = [ + b for b in self.backends if b is not None and b.name == forced + ] + if not candidates: + available_names = [b.name for b in self.backends if b is not None] + self.logger.error( + f'LangBot Box backend "{forced}" not found ' + f"({source_label}={forced}, available: {available_names})" + ) + return None + else: + candidates = [b for b in self.backends if b is not None] + + for backend in candidates: + try: + await backend.initialize() + if await backend.is_available(): + label = ( + f"{backend.name} (forced via {source_label}={forced})" + if forced + else backend.name + ) + self.logger.info(f"LangBot Box using backend: {label}") + return backend + except Exception as exc: + self.logger.warning( + f"LangBot Box backend {backend.name} probe failed: {exc}" + ) + + if forced: + self.logger.error( + f'LangBot Box backend "{forced}" probed but not available ' + f"({source_label}={forced})" + ) + + self.logger.warning( + "LangBot Box backend unavailable: no supported backend (Docker, nsjail, E2B) is ready" + ) + return None + + async def _reap_expired_sessions_locked(self): + if self.session_ttl_sec <= 0: + return + + deadline = dt.datetime.now(_UTC) - dt.timedelta(seconds=self.session_ttl_sec) + expired_session_ids = [ + session_id + for session_id, session in self._sessions.items() + if not session.info.persistent + and session.info.last_used_at < deadline + and not any(mp.is_running for mp in session.managed_processes.values()) + ] + + for session_id in expired_session_ids: + await self._drop_session_locked(session_id) + + async def _drop_session_locked(self, session_id: str): + runtime_session = self._sessions.pop(session_id, None) + if runtime_session is None or self._backend is None: + return + + for mp in runtime_session.managed_processes.values(): + await self._terminate_managed_process(mp) + + try: + self.logger.info( + "LangBot Box session cleanup: " + f"session_id={session_id} " + f"backend_session_id={runtime_session.info.backend_session_id} " + f"backend={runtime_session.info.backend_name}" + ) + await self._backend.stop_session(runtime_session.info) + except Exception as exc: + self.logger.warning(f"Failed to clean up box session {session_id}: {exc}") + + def _assert_session_compatible(self, session: BoxSessionInfo, spec: BoxSpec): + _COMPAT_FIELDS = ( + "network", + "image", + "host_path", + "host_path_mode", + "mount_path", + "persistent", + "cpus", + "memory_mb", + "pids_limit", + "read_only_rootfs", + "workspace_quota_mb", + ) + for field in _COMPAT_FIELDS: + session_val = getattr(session, field) + spec_val = getattr(spec, field) + if session_val != spec_val: + display = ( + session_val.value if hasattr(session_val, "value") else session_val + ) + raise BoxSessionConflictError( + f"Box session {spec.session_id} already exists with {field}={display}" + ) + + async def _drain_managed_process_stderr( + self, session_id: str, process_id: str, managed_process: _ManagedProcess + ) -> None: + stream = managed_process.process.stderr + if stream is None: + return + + try: + while True: + chunk = await stream.readline() + if not chunk: + break + text = chunk.decode("utf-8", errors="replace").rstrip() + if not text: + continue + managed_process.stderr_chunks.append(text) + managed_process.stderr_total_len += ( + len(text) + 1 + ) # +1 for '\n' separator + while ( + managed_process.stderr_total_len + > _MANAGED_PROCESS_STDERR_PREVIEW_LIMIT + and managed_process.stderr_chunks + ): + removed = managed_process.stderr_chunks.popleft() + managed_process.stderr_total_len -= len(removed) + 1 + self.logger.info( + f"LangBot Box managed process stderr: session_id={session_id} process_id={process_id} {text}" + ) + except Exception as exc: + self.logger.warning( + f"Failed to drain managed process stderr for {session_id}/{process_id}: {exc}" + ) + + async def _watch_managed_process( + self, session_id: str, process_id: str, managed_process: _ManagedProcess + ) -> None: + return_code = await managed_process.process.wait() + managed_process.exit_code = return_code + managed_process.exited_at = dt.datetime.now(_UTC) + runtime_session = self._sessions.get(session_id) + if runtime_session is not None: + runtime_session.info.last_used_at = managed_process.exited_at + self.logger.info( + f"LangBot Box managed process exited: session_id={session_id} process_id={process_id} return_code={return_code}" + ) + + async def _terminate_managed_process( + self, managed_process: _ManagedProcess + ) -> None: + if not managed_process.is_running: + return + + process = managed_process.process + try: + if process.stdin is not None: + process.stdin.close() + except Exception: + pass + + try: + if process.returncode is None: + try: + process.terminate() + except ProcessLookupError: + pass + await asyncio.wait_for(asyncio.shield(process.wait()), timeout=5) + except asyncio.TimeoutError: + if process.returncode is None: + try: + process.kill() + except ProcessLookupError: + pass + await process.wait() + finally: + managed_process.exit_code = process.returncode + managed_process.exited_at = dt.datetime.now(_UTC) + + def _managed_process_to_dict( + self, session_id: str, process_id: str, managed_process: _ManagedProcess + ) -> dict: + stderr_preview = "\n".join(managed_process.stderr_chunks) + status = ( + BoxManagedProcessStatus.RUNNING + if managed_process.is_running + else BoxManagedProcessStatus.EXITED + ) + return BoxManagedProcessInfo( + session_id=session_id, + process_id=process_id, + status=status, + command=managed_process.spec.command, + args=managed_process.spec.args, + cwd=managed_process.spec.cwd, + env_keys=sorted(managed_process.spec.env.keys()), + attached=managed_process.attach_lock.locked(), + started_at=managed_process.started_at, + exited_at=managed_process.exited_at, + exit_code=managed_process.exit_code, + stderr_preview=stderr_preview, + ).model_dump(mode="json") + + @staticmethod + def _session_to_dict(info: BoxSessionInfo) -> dict: + return info.model_dump(mode="json") diff --git a/src/langbot_plugin/box/security.py b/src/langbot_plugin/box/security.py new file mode 100644 index 0000000..7b3b98e --- /dev/null +++ b/src/langbot_plugin/box/security.py @@ -0,0 +1,52 @@ +from __future__ import annotations + +import os +import sys + +from .errors import BoxValidationError +from .models import BoxSpec + +_BLOCKED_HOST_PATHS_POSIX = frozenset( + { + '/etc', + '/proc', + '/sys', + '/dev', + '/root', + '/boot', + '/run', + '/var/run', + '/run/docker.sock', + '/var/run/docker.sock', + } +) + +_BLOCKED_HOST_PATHS_WINDOWS = frozenset( + { + r'C:\Windows', + r'C:\Program Files', + r'C:\Program Files (x86)', + r'C:\ProgramData', + r'\\.\pipe\docker_engine', + } +) + +BLOCKED_HOST_PATHS = ( + _BLOCKED_HOST_PATHS_POSIX | _BLOCKED_HOST_PATHS_WINDOWS + if sys.platform == 'win32' + else _BLOCKED_HOST_PATHS_POSIX +) + + +def validate_sandbox_security(spec: BoxSpec) -> None: + """Validate that a BoxSpec does not request dangerous container config. + + Raises BoxValidationError when the spec contains a blocked host_path. + """ + if spec.host_path: + real = os.path.realpath(spec.host_path) + sep = os.sep + _norm = os.path.normcase + for blocked in BLOCKED_HOST_PATHS: + if _norm(real) == _norm(blocked) or _norm(real).startswith(_norm(blocked) + sep): + raise BoxValidationError(f'host_path {spec.host_path} is blocked for security') diff --git a/src/langbot_plugin/box/server.py b/src/langbot_plugin/box/server.py new file mode 100644 index 0000000..601b028 --- /dev/null +++ b/src/langbot_plugin/box/server.py @@ -0,0 +1,494 @@ +"""Box Runtime service exposing BoxRuntime via action RPC. + +This module is the implementation of the `box` CLI subcommand. The only +supported entry point is the `lbp` CLI, which mirrors the plugin runtime's +`rt` subcommand: + + lbp box # WebSocket control transport (default) + lbp box -s # stdio control transport + +`main()` is invoked by the CLI with the parsed argument namespace, exactly +as `lbp rt` drives ``langbot_plugin.runtime.app.main``. There is no +``python -m langbot_plugin.box`` / ``python -m langbot_plugin.box.server`` +launch path. + +All WebSocket endpoints share a single port (default 5410): + /rpc/ws — Action RPC (control channel) + /v1/sessions/{session_id}/managed-process/{process_id}/ws — Managed process stdio relay + /v1/sessions/{session_id}/managed-process/ws — Legacy (process_id defaults to 'default') +""" + +from __future__ import annotations + +import argparse +import asyncio +import datetime as dt +import logging +import sys +from typing import Any + +import pydantic +from aiohttp import web + +from langbot_plugin.entities.io.actions.enums import CommonAction +from langbot_plugin.entities.io.errors import ConnectionClosedError +from langbot_plugin.entities.io.resp import ActionResponse +from langbot_plugin.runtime.io.connection import Connection +from langbot_plugin.runtime.io.handler import Handler +from langbot_plugin.utils.log import configure_process_logging + +from .actions import LangBotToBoxAction +from .errors import ( + BoxManagedProcessConflictError, + BoxManagedProcessNotFoundError, + BoxSessionNotFoundError, +) +from .models import BoxExecutionResult, BoxManagedProcessSpec, BoxSpec +from .runtime import BoxRuntime + +logger = logging.getLogger("langbot.box.server") + + +def _result_to_dict(result: BoxExecutionResult) -> dict: + return result.model_dump(mode="json") + + +# ── aiohttp WebSocket → Connection adapter ─────────────────────────── + + +class AiohttpWSConnection(Connection): + """Adapt an aiohttp ``WebSocketResponse`` to the SDK ``Connection`` interface. + + This allows ``BoxServerHandler`` (and therefore ``Handler``) to work over + an aiohttp WebSocket without any changes to the handler/IO layer. + """ + + def __init__(self, ws: web.WebSocketResponse) -> None: + self._ws = ws + self._send_lock = asyncio.Lock() + + async def send(self, message: str) -> None: + async with self._send_lock: + try: + await self._ws.send_str(message) + except ConnectionResetError: + raise ConnectionClosedError("Connection closed during send") + + async def receive(self) -> str: + msg = await self._ws.receive() + if msg.type == web.WSMsgType.TEXT: + return msg.data + if msg.type in ( + web.WSMsgType.CLOSE, + web.WSMsgType.CLOSING, + web.WSMsgType.CLOSED, + web.WSMsgType.ERROR, + ): + raise ConnectionClosedError("Connection closed") + raise ConnectionClosedError(f"Unexpected message type: {msg.type}") + + async def close(self) -> None: + await self._ws.close() + + +# ── BoxServerHandler ───────────────────────────────────────────────── + + +class BoxServerHandler(Handler): + """Server-side handler that registers box actions backed by BoxRuntime.""" + + name = "BoxServerHandler" + + def __init__(self, connection: Connection, runtime: BoxRuntime): + super().__init__(connection) + self._runtime = runtime + self._register_actions() + + def _register_actions(self) -> None: + @self.action(CommonAction.PING) + async def ping(data: dict[str, Any]) -> ActionResponse: + return ActionResponse.success({}) + + @self.action(LangBotToBoxAction.HEALTH) + async def health(data: dict[str, Any]) -> ActionResponse: + info = await self._runtime.get_backend_info() + return ActionResponse.success(info) + + @self.action(LangBotToBoxAction.STATUS) + async def status(data: dict[str, Any]) -> ActionResponse: + result = await self._runtime.get_status() + return ActionResponse.success(result) + + @self.action(LangBotToBoxAction.EXEC) + async def exec_cmd(data: dict[str, Any]) -> ActionResponse: + try: + spec = BoxSpec.model_validate(data) + except pydantic.ValidationError as exc: + return ActionResponse.error(f"BoxValidationError: {exc}") + result = await self._runtime.execute(spec) + return ActionResponse.success(_result_to_dict(result)) + + @self.action(LangBotToBoxAction.CREATE_SESSION) + async def create_session(data: dict[str, Any]) -> ActionResponse: + try: + spec = BoxSpec.model_validate(data) + except pydantic.ValidationError as exc: + return ActionResponse.error(f"BoxValidationError: {exc}") + info = await self._runtime.create_session(spec) + return ActionResponse.success(info) + + @self.action(LangBotToBoxAction.GET_SESSION) + async def get_session(data: dict[str, Any]) -> ActionResponse: + return ActionResponse.success(self._runtime.get_session(data["session_id"])) + + @self.action(LangBotToBoxAction.GET_SESSIONS) + async def get_sessions(data: dict[str, Any]) -> ActionResponse: + return ActionResponse.success({"sessions": self._runtime.get_sessions()}) + + @self.action(LangBotToBoxAction.DELETE_SESSION) + async def delete_session(data: dict[str, Any]) -> ActionResponse: + await self._runtime.delete_session(data["session_id"]) + return ActionResponse.success({"deleted": data["session_id"]}) + + @self.action(LangBotToBoxAction.START_MANAGED_PROCESS) + async def start_managed_process(data: dict[str, Any]) -> ActionResponse: + session_id = data["session_id"] + try: + spec = BoxManagedProcessSpec.model_validate(data["spec"]) + except pydantic.ValidationError as exc: + return ActionResponse.error(f"BoxValidationError: {exc}") + info = await self._runtime.start_managed_process(session_id, spec) + return ActionResponse.success(info) + + @self.action(LangBotToBoxAction.GET_MANAGED_PROCESS) + async def get_managed_process(data: dict[str, Any]) -> ActionResponse: + return ActionResponse.success( + self._runtime.get_managed_process( + data["session_id"], + data.get("process_id", "default"), + ) + ) + + @self.action(LangBotToBoxAction.STOP_MANAGED_PROCESS) + async def stop_managed_process(data: dict[str, Any]) -> ActionResponse: + await self._runtime.stop_managed_process( + data["session_id"], data.get("process_id", "default") + ) + return ActionResponse.success( + {"stopped": data.get("process_id", "default")} + ) + + @self.action(LangBotToBoxAction.GET_BACKEND_INFO) + async def get_backend_info(data: dict[str, Any]) -> ActionResponse: + info = await self._runtime.get_backend_info() + return ActionResponse.success(info) + + @self.action(LangBotToBoxAction.LIST_SKILLS) + async def list_skills(data: dict[str, Any]) -> ActionResponse: + return ActionResponse.success( + {"skills": self._runtime.skill_store.list_skills()} + ) + + @self.action(LangBotToBoxAction.GET_SKILL) + async def get_skill(data: dict[str, Any]) -> ActionResponse: + skill = self._runtime.skill_store.get_skill(data["name"]) + return ActionResponse.success({"skill": skill}) + + @self.action(LangBotToBoxAction.CREATE_SKILL) + async def create_skill(data: dict[str, Any]) -> ActionResponse: + try: + skill = self._runtime.skill_store.create_skill(data["skill"]) + except Exception as exc: + return ActionResponse.error(f"BoxValidationError: {exc}") + return ActionResponse.success({"skill": skill}) + + @self.action(LangBotToBoxAction.UPDATE_SKILL) + async def update_skill(data: dict[str, Any]) -> ActionResponse: + try: + skill = self._runtime.skill_store.update_skill( + data["name"], data["skill"] + ) + except Exception as exc: + return ActionResponse.error(f"BoxValidationError: {exc}") + return ActionResponse.success({"skill": skill}) + + @self.action(LangBotToBoxAction.DELETE_SKILL) + async def delete_skill(data: dict[str, Any]) -> ActionResponse: + try: + result = self._runtime.skill_store.delete_skill(data["name"]) + except Exception as exc: + return ActionResponse.error(f"BoxValidationError: {exc}") + return ActionResponse.success(result) + + @self.action(LangBotToBoxAction.SCAN_SKILL_DIRECTORY) + async def scan_skill_directory(data: dict[str, Any]) -> ActionResponse: + try: + skill = self._runtime.skill_store.scan_directory(data["path"]) + except Exception as exc: + return ActionResponse.error(f"BoxValidationError: {exc}") + return ActionResponse.success(skill) + + @self.action(LangBotToBoxAction.LIST_SKILL_FILES) + async def list_skill_files(data: dict[str, Any]) -> ActionResponse: + try: + result = self._runtime.skill_store.list_skill_files( + data["name"], + data.get("path", "."), + include_hidden=bool(data.get("include_hidden", False)), + max_entries=int(data.get("max_entries", 200)), + ) + except Exception as exc: + return ActionResponse.error(f"BoxValidationError: {exc}") + return ActionResponse.success(result) + + @self.action(LangBotToBoxAction.READ_SKILL_FILE) + async def read_skill_file(data: dict[str, Any]) -> ActionResponse: + try: + result = self._runtime.skill_store.read_skill_file( + data["name"], data["path"] + ) + except Exception as exc: + return ActionResponse.error(f"BoxValidationError: {exc}") + return ActionResponse.success(result) + + @self.action(LangBotToBoxAction.WRITE_SKILL_FILE) + async def write_skill_file(data: dict[str, Any]) -> ActionResponse: + try: + result = self._runtime.skill_store.write_skill_file( + data["name"], data["path"], data.get("content", "") + ) + except Exception as exc: + return ActionResponse.error(f"BoxValidationError: {exc}") + return ActionResponse.success(result) + + @self.action(LangBotToBoxAction.PREVIEW_SKILL_ZIP) + async def preview_skill_zip(data: dict[str, Any]) -> ActionResponse: + try: + file_bytes = await self.read_local_file(data["file_key"]) + await self.delete_local_file(data["file_key"]) + result = self._runtime.skill_store.preview_zip_upload( + file_bytes=file_bytes, + filename=data.get("filename", "skill.zip"), + source_subdir=data.get("source_subdir") or "", + target_suffix=data.get("target_suffix", "upload"), + ) + except Exception as exc: + return ActionResponse.error(f"BoxValidationError: {exc}") + return ActionResponse.success({"skills": result}) + + @self.action(LangBotToBoxAction.INSTALL_SKILL_ZIP) + async def install_skill_zip(data: dict[str, Any]) -> ActionResponse: + try: + file_bytes = await self.read_local_file(data["file_key"]) + await self.delete_local_file(data["file_key"]) + result = self._runtime.skill_store.install_zip_upload( + file_bytes=file_bytes, + filename=data.get("filename", "skill.zip"), + source_paths=data.get("source_paths") or [], + source_path=data.get("source_path") or "", + source_subdir=data.get("source_subdir") or "", + target_suffix=data.get("target_suffix", "upload"), + ) + except Exception as exc: + return ActionResponse.error(f"BoxValidationError: {exc}") + return ActionResponse.success({"skills": result}) + + @self.action(LangBotToBoxAction.INIT) + async def init(data: dict[str, Any]) -> ActionResponse: + self._runtime.init(data) + return ActionResponse.success({"initialized": True}) + + @self.action(LangBotToBoxAction.SHUTDOWN) + async def shutdown(data: dict[str, Any]) -> ActionResponse: + await self._runtime.shutdown() + return ActionResponse.success({}) + + +# ── Managed process WebSocket relay ────────────────────────────────── + + +def _error_response(exc: Exception) -> web.Response: + return web.json_response( + {"error": {"code": type(exc).__name__, "message": str(exc)}}, + status=400, + ) + + +async def handle_managed_process_ws(request: web.Request) -> web.StreamResponse: + runtime: BoxRuntime = request.app["runtime"] + session_id = request.match_info["session_id"] + process_id = request.match_info.get("process_id", "default") + + runtime_session = runtime._sessions.get(session_id) + if runtime_session is None: + return _error_response( + BoxSessionNotFoundError(f"session {session_id} not found") + ) + + managed_process = runtime_session.managed_processes.get(process_id) + if managed_process is None: + return _error_response( + BoxManagedProcessNotFoundError( + f"session {session_id} has no managed process with process_id={process_id}" + ) + ) + if not managed_process.is_running: + return _error_response( + BoxManagedProcessConflictError( + f"managed process {process_id} in session {session_id} is not running" + ) + ) + + ws = web.WebSocketResponse(protocols=("mcp",)) + await ws.prepare(request) + + async with managed_process.attach_lock: + process = managed_process.process + stdout = process.stdout + stdin = process.stdin + if stdout is None or stdin is None: + await ws.close(message=b"managed process stdio unavailable") + return ws + + async def _stdout_to_ws() -> None: + while True: + line = await stdout.readline() + if not line: + break + await ws.send_str(line.decode("utf-8", errors="replace").rstrip("\n")) + runtime_session.info.last_used_at = dt.datetime.now(dt.timezone.utc) + + async def _ws_to_stdin() -> None: + async for msg in ws: + if msg.type == web.WSMsgType.TEXT: + stdin.write((msg.data + "\n").encode("utf-8")) + await stdin.drain() + runtime_session.info.last_used_at = dt.datetime.now(dt.timezone.utc) + elif msg.type in ( + web.WSMsgType.CLOSE, + web.WSMsgType.CLOSING, + web.WSMsgType.CLOSED, + web.WSMsgType.ERROR, + ): + break + + stdout_task = asyncio.create_task(_stdout_to_ws()) + stdin_task = asyncio.create_task(_ws_to_stdin()) + try: + done, pending = await asyncio.wait( + [stdout_task, stdin_task], + return_when=asyncio.FIRST_COMPLETED, + ) + for task in pending: + task.cancel() + for task in done: + task.result() + finally: + await ws.close() + + return ws + + +# ── Action RPC WebSocket handler ───────────────────────────────────── + + +async def handle_rpc_ws(request: web.Request) -> web.StreamResponse: + """Handle action RPC over a single aiohttp WebSocket connection.""" + runtime: BoxRuntime = request.app["runtime"] + + ws = web.WebSocketResponse() + await ws.prepare(request) + + connection = AiohttpWSConnection(ws) + handler = BoxServerHandler(connection, runtime) + await handler.run() + + return ws + + +# ── App factory ────────────────────────────────────────────────────── + + +def create_app(runtime: BoxRuntime) -> web.Application: + """Create the aiohttp app with all WebSocket routes on a single port.""" + app = web.Application() + app["runtime"] = runtime + app.router.add_get("/rpc/ws", handle_rpc_ws) + app.router.add_get( + "/v1/sessions/{session_id}/managed-process/{process_id}/ws", + handle_managed_process_ws, + ) + # Backward-compatible route (defaults to process_id='default') + app.router.add_get( + "/v1/sessions/{session_id}/managed-process/ws", handle_managed_process_ws + ) + return app + + +def create_ws_relay_app(runtime: BoxRuntime) -> web.Application: + """Backward-compatible alias for older callers. + + The relay and action RPC endpoints now live in one aiohttp app. + """ + return create_app(runtime) + + +# ── Entry point ────────────────────────────────────────────────────── + + +async def _run_server(host: str, port: int, mode: str) -> None: + runtime = BoxRuntime(logger=logger) + await runtime.initialize() + + # Start aiohttp — serves managed-process relay and (in ws mode) + # also the action RPC endpoint, all on the same port. + runner: web.AppRunner | None = None + try: + ws_app = create_app(runtime) + runner = web.AppRunner(ws_app) + await runner.setup() + site = web.TCPSite(runner, host, port) + await site.start() + logger.info(f"Box server listening on {host}:{port}") + except OSError as exc: + logger.warning(f"Box server failed to bind {host}:{port}: {exc}") + logger.warning("Managed process WebSocket attach will be unavailable.") + + try: + if mode == "stdio": + from langbot_plugin.runtime.io.controllers.stdio.server import ( + StdioServerController, + ) + + async def new_connection_callback(connection: Connection) -> None: + handler = BoxServerHandler(connection, runtime) + await handler.run() + + ctrl = StdioServerController() + await ctrl.run(new_connection_callback) + else: + # In ws mode, action RPC is served via aiohttp on /rpc/ws. + # Keep the server alive until cancelled. + logger.info(f"Box action RPC available at ws://{host}:{port}/rpc/ws") + stop_event = asyncio.Event() + await stop_event.wait() + finally: + await runtime.shutdown() + if runner is not None: + await runner.cleanup() + + +def main(args: argparse.Namespace) -> None: + """Run the Box runtime service. + + Invoked by the `box` CLI subcommand with the parsed argument namespace, + mirroring how `lbp rt` drives ``langbot_plugin.runtime.app.main``. The + argument schema is defined once, on the `box` subparser in + ``langbot_plugin.cli``. + """ + # Mode selection mirrors the plugin runtime (`lbp rt`): WebSocket by + # default, stdio when `-s`/`--stdio-control` is passed. + control_mode = "stdio" if args.stdio_control else "ws" + + configure_process_logging(stream=sys.stderr) + asyncio.run(_run_server(args.host, args.ws_control_port, control_mode)) diff --git a/src/langbot_plugin/box/skill_store.py b/src/langbot_plugin/box/skill_store.py new file mode 100644 index 0000000..ad8eb15 --- /dev/null +++ b/src/langbot_plugin/box/skill_store.py @@ -0,0 +1,647 @@ +from __future__ import annotations + +import datetime as dt +import io +import os +import posixpath +import shutil +import tempfile +import zipfile +from pathlib import Path +from typing import Optional + +import yaml + + +_FRONTMATTER_FIELDS = ( + 'name', + 'display_name', + 'description', +) + +_PUBLIC_SKILL_FIELDS = ( + 'name', + 'display_name', + 'description', + 'instructions', + 'package_root', + 'entry_file', + 'created_at', + 'updated_at', +) + + +def parse_frontmatter(content: str) -> tuple[dict, str]: + if not content.startswith('---'): + return {}, content + + lines = content.splitlines(keepends=True) + if not lines or lines[0].strip() != '---': + return {}, content + + for index in range(1, len(lines)): + if lines[index].strip() == '---': + metadata_text = ''.join(lines[1:index]) + instructions = ''.join(lines[index + 1 :]).lstrip('\n') + metadata = yaml.safe_load(metadata_text) or {} + if not isinstance(metadata, dict): + metadata = {} + return metadata, instructions + + return {}, content + + +def build_skill_md(metadata: dict, instructions: str) -> str: + frontmatter = {} + for key in _FRONTMATTER_FIELDS: + value = metadata.get(key) + if value is None: + continue + if isinstance(value, str) and not value.strip(): + continue + frontmatter[key] = value + + if not frontmatter: + return instructions + + frontmatter_text = yaml.dump(frontmatter, default_flow_style=False, allow_unicode=True, sort_keys=False).strip() + return f'---\n{frontmatter_text}\n---\n\n{instructions}' + + +class BoxSkillStore: + """Skill package storage owned by the Box runtime process.""" + + def __init__(self, config: dict | None = None): + self._config = config or {} + + def update_config(self, config: dict) -> None: + self._config = config or {} + + @property + def root(self) -> str: + local_config = self._config.get('local') or {} + host_root = str(local_config.get('host_root') or './data/box').strip() + skills_root = str(local_config.get('skills_root') or 'skills').strip() + + host_root_path = Path(host_root).expanduser() + if not host_root_path.is_absolute(): + host_root_path = Path.cwd() / host_root_path + host_root_path = host_root_path.resolve() + + skills_root_path = Path(skills_root).expanduser() + if not skills_root_path.is_absolute(): + skills_root_path = host_root_path / skills_root_path + return str(skills_root_path.resolve()) + + def list_skills(self) -> list[dict]: + os.makedirs(self.root, exist_ok=True) + skills: list[dict] = [] + for package_root, entry_file in self._discover_skill_directories(self.root, max_depth=6): + try: + skills.append(self._load_skill_package(package_root, entry_file)) + except Exception: + continue + skills.sort(key=lambda item: item.get('updated_at', ''), reverse=True) + return [self._serialize_skill(skill) for skill in skills] + + def get_skill(self, skill_name: str) -> Optional[dict]: + for skill in self.list_skills(): + if skill.get('name') == skill_name: + return skill + return None + + def create_skill(self, data: dict) -> dict: + name = self._validate_skill_name(data.get('name', '')) + if self.get_skill(name): + raise ValueError(f'Skill with name "{name}" already exists') + + package_root = self._normalize_package_root(data.get('package_root', '')) + managed_root = self._managed_skill_path(name) + target_root = managed_root + imported_skill_data: dict | None = None + + if package_root and self._managed_install_root_for_package(package_root): + if not os.path.isdir(package_root): + raise ValueError(f'Directory does not exist: {package_root}') + target_root = package_root + imported_skill_data = self._read_skill_package(target_root) + elif package_root and package_root != managed_root: + if not os.path.isdir(package_root): + raise ValueError(f'Directory does not exist: {package_root}') + if os.path.exists(managed_root): + raise ValueError(f'Skill directory already exists: {managed_root}') + os.makedirs(os.path.dirname(managed_root), exist_ok=True) + shutil.copytree(package_root, managed_root) + imported_skill_data = self._read_skill_package(managed_root) + else: + os.makedirs(managed_root, exist_ok=True) + + metadata = { + 'name': name, + 'display_name': self._resolve_create_field(data, 'display_name', imported_skill_data, default=''), + 'description': self._resolve_create_field(data, 'description', imported_skill_data, default=''), + } + instructions = self._resolve_create_field(data, 'instructions', imported_skill_data, default='') + self._write_skill_md(target_root, metadata, instructions) + + created = self.get_skill(name) + if not created: + raise ValueError(f'Failed to create skill "{name}"') + return created + + def update_skill(self, skill_name: str, data: dict) -> dict: + skill = self.get_skill(skill_name) + if not skill: + raise ValueError(f'Skill "{skill_name}" not found') + + requested_name = str(data.get('name', skill['name']) or skill['name']).strip() + if requested_name != skill['name']: + raise ValueError('Renaming skills is not supported') + + requested_package_root = str(data.get('package_root', '') or '').strip() + existing_package_root = self._normalize_package_root(skill['package_root']) + if requested_package_root and self._normalize_package_root(requested_package_root) != existing_package_root: + raise ValueError('Updating package_root is not supported; recreate the skill to import a different package') + + metadata = { + 'name': skill['name'], + 'display_name': data.get('display_name', skill.get('display_name', '')), + 'description': data.get('description', skill.get('description', '')), + } + instructions = str(data.get('instructions', skill.get('instructions', '')) or '') + self._write_skill_md(skill['package_root'], metadata, instructions) + + updated = self.get_skill(skill_name) + if not updated: + raise ValueError(f'Skill "{skill_name}" not found after update') + return updated + + def delete_skill(self, skill_name: str) -> dict: + skill = self.get_skill(skill_name) + if not skill: + raise ValueError(f'Skill "{skill_name}" not found') + + package_root = self._normalize_package_root(skill['package_root']) + managed_install_root = self._managed_install_root_for_package(package_root) + if not managed_install_root: + raise ValueError('Only managed skills under the Box skills root can be deleted') + + shutil.rmtree(managed_install_root, ignore_errors=True) + return {'deleted': skill_name} + + def scan_directory(self, path: str) -> dict: + if not os.path.isdir(path): + raise ValueError(f'Directory does not exist: {path}') + + discovered = self._discover_skill_directories(path, max_depth=2) + if not discovered: + raise ValueError(f'No SKILL.md found in {path} or its subdirectories (max depth: 2)') + if len(discovered) > 1: + candidates = ', '.join(found_path for found_path, _entry in discovered) + raise ValueError( + f'Multiple skill directories found in {path}. Please choose a more specific path: {candidates}' + ) + + package_root, entry_file = discovered[0] + return self._load_skill_package(package_root, entry_file) + + def list_skill_files( + self, + skill_name: str, + path: str = '.', + include_hidden: bool = False, + max_entries: int = 200, + ) -> dict: + skill = self._require_skill(skill_name) + target_dir, relative_path = self._resolve_skill_path(skill, path, expect_directory=True) + entries: list[dict] = [] + with os.scandir(target_dir) as iterator: + for entry in sorted(iterator, key=lambda item: item.name): + if not include_hidden and entry.name.startswith('.'): + continue + entry_rel_path = entry.name if relative_path in ('', '.') else os.path.join(relative_path, entry.name) + is_dir = entry.is_dir() + entries.append( + { + 'path': entry_rel_path.replace(os.sep, '/'), + 'name': entry.name, + 'is_dir': is_dir, + 'size': None if is_dir else entry.stat().st_size, + } + ) + if len(entries) >= max_entries: + break + + return { + 'skill': {'name': skill['name']}, + 'base_path': '.' if relative_path in ('', '.') else relative_path.replace(os.sep, '/'), + 'entries': entries, + 'truncated': len(entries) >= max_entries, + } + + def read_skill_file(self, skill_name: str, path: str) -> dict: + skill = self._require_skill(skill_name) + target_path, relative_path = self._resolve_skill_path(skill, path, expect_directory=False) + if not os.path.isfile(target_path): + raise ValueError(f'Skill file not found: {relative_path}') + + try: + with open(target_path, 'r', encoding='utf-8') as f: + content = f.read() + except UnicodeDecodeError as exc: + raise ValueError(f'Skill file is not valid UTF-8 text: {relative_path}') from exc + + return { + 'skill': {'name': skill['name']}, + 'path': relative_path.replace(os.sep, '/'), + 'content': content, + } + + def write_skill_file(self, skill_name: str, path: str, content: str) -> dict: + skill = self._require_skill(skill_name) + target_path, relative_path = self._resolve_skill_path(skill, path, expect_directory=False) + os.makedirs(os.path.dirname(target_path), exist_ok=True) + with open(target_path, 'w', encoding='utf-8') as f: + f.write(content) + + return { + 'skill': {'name': skill['name']}, + 'path': relative_path.replace(os.sep, '/'), + 'bytes_written': len(content.encode('utf-8')), + } + + def preview_zip_upload( + self, + *, + file_bytes: bytes, + filename: str, + source_subdir: str = '', + target_suffix: str = 'upload', + ) -> list[dict]: + if not file_bytes: + raise ValueError('Uploaded file is empty') + + tmp_dir = tempfile.mkdtemp(prefix='langbot_box_skill_preview_') + try: + skill_root = self._extract_uploaded_skill_to_temp(file_bytes, tmp_dir) + skill_root = self._resolve_source_subdir_root(skill_root, source_subdir) + return self._preview_skill_candidates( + skill_root, + base_target_name=self._uploaded_skill_target_stem(filename), + suffix=target_suffix, + ) + finally: + shutil.rmtree(tmp_dir, ignore_errors=True) + + def install_zip_upload( + self, + *, + file_bytes: bytes, + filename: str, + source_paths: list[str] | None = None, + source_path: str = '', + source_subdir: str = '', + target_suffix: str = 'upload', + ) -> list[dict]: + if not file_bytes: + raise ValueError('Uploaded file is empty') + + tmp_dir = tempfile.mkdtemp(prefix='langbot_box_skill_upload_') + try: + skill_root = self._extract_uploaded_skill_to_temp(file_bytes, tmp_dir) + skill_root = self._resolve_source_subdir_root(skill_root, source_subdir) + previews = self._preview_skill_candidates( + skill_root, + base_target_name=self._uploaded_skill_target_stem(filename), + suffix=target_suffix, + ) + selected_previews = self._select_preview_candidates( + previews, + {'source_paths': source_paths or [], 'source_path': source_path}, + ) + scanned = self._install_preview_candidates(skill_root, selected_previews) + return [self.get_skill(skill['name']) or self._serialize_skill(skill) for skill in scanned] + finally: + shutil.rmtree(tmp_dir, ignore_errors=True) + + def _require_skill(self, skill_name: str) -> dict: + skill = self.get_skill(skill_name) + if not skill: + raise ValueError(f'Skill "{skill_name}" not found') + return skill + + @staticmethod + def _serialize_skill(skill: dict) -> dict: + return {field: skill.get(field) for field in _PUBLIC_SKILL_FIELDS if field in skill} + + def _load_skill_package(self, package_root: str, entry_file: str = 'SKILL.md') -> dict: + package_root = self._normalize_package_root(package_root) + entry_path = os.path.join(package_root, entry_file) + with open(entry_path, 'r', encoding='utf-8') as f: + content = f.read() + + metadata, instructions = parse_frontmatter(content) + dir_name = os.path.basename(os.path.normpath(package_root)) + stat = os.stat(entry_path) + return { + 'name': str(metadata.get('name') or dir_name).strip(), + 'display_name': str(metadata.get('display_name') or metadata.get('name') or dir_name).strip(), + 'description': str(metadata.get('description') or '').strip(), + 'instructions': instructions, + 'package_root': package_root, + 'entry_file': entry_file, + 'created_at': dt.datetime.fromtimestamp(stat.st_ctime, tz=dt.timezone.utc).isoformat(), + 'updated_at': dt.datetime.fromtimestamp(stat.st_mtime, tz=dt.timezone.utc).isoformat(), + } + + def _read_skill_package(self, package_root: str) -> dict: + entry = self._find_skill_entry(package_root) + if entry is None: + raise ValueError(f'No SKILL.md found in {package_root}') + + skill = self._load_skill_package(entry[0], entry[1]) + return { + 'entry_file': skill.get('entry_file', 'SKILL.md'), + 'display_name': skill.get('display_name', ''), + 'description': skill.get('description', ''), + 'instructions': skill.get('instructions', ''), + } + + def _write_skill_md(self, package_root: str, metadata: dict, instructions: str) -> None: + package_root = self._normalize_package_root(package_root) + os.makedirs(package_root, exist_ok=True) + content = build_skill_md(metadata, instructions) + with open(os.path.join(package_root, 'SKILL.md'), 'w', encoding='utf-8') as f: + f.write(content) + + def _managed_skill_path(self, skill_name: str) -> str: + return self._normalize_package_root(os.path.join(self.root, skill_name)) + + def _managed_install_root_for_package(self, package_root: str) -> str: + managed_root = self._normalize_package_root(self.root) + package_root = self._normalize_package_root(package_root) + if not package_root or package_root == managed_root: + return '' + + prefix = f'{managed_root}{os.sep}' + if not package_root.startswith(prefix): + return '' + + relative = os.path.relpath(package_root, managed_root) + top_level = relative.split(os.sep, 1)[0] + if top_level in ('', '.', '..'): + return '' + return os.path.join(managed_root, top_level) + + def _build_preview_target_dir(self, base_target_name: str, source_path: str, suffix: str) -> str: + relative = str(source_path or '').strip().replace('\\', '/').strip('/') + leaf_name = relative.split('/')[-1] if relative else '' + target_name = base_target_name + if leaf_name and leaf_name != base_target_name: + target_name = f'{base_target_name}-{leaf_name}' + if suffix: + target_name = f'{target_name}-{suffix}' + return os.path.join(self.root, target_name) + + def _preview_skill_candidates(self, root_path: str, *, base_target_name: str, suffix: str) -> list[dict]: + discovered = self._discover_skill_directories(root_path, max_depth=2) + if not discovered: + raise ValueError(f'No SKILL.md found in {root_path} or its subdirectories (max depth: 2)') + + previews: list[dict] = [] + for package_root, entry_file in discovered: + skill = self._load_skill_package(package_root, entry_file) + relative_path = os.path.relpath(package_root, root_path) + if relative_path in ('', '.'): + relative_path = '' + skill['source_path'] = relative_path.replace(os.sep, '/') + skill['package_root'] = self._build_preview_target_dir(base_target_name, relative_path, suffix) + previews.append(skill) + + previews.sort(key=lambda item: item['source_path']) + return [self._serialize_skill_with_source(preview) for preview in previews] + + @staticmethod + def _serialize_skill_with_source(skill: dict) -> dict: + data = BoxSkillStore._serialize_skill(skill) + if 'source_path' in skill: + data['source_path'] = skill['source_path'] + return data + + def _select_preview_candidates(self, previews: list[dict], data: dict) -> list[dict]: + normalized_paths: list[str] = [] + raw_source_paths = data.get('source_paths', []) + if isinstance(raw_source_paths, list): + for source_path in raw_source_paths: + normalized = str(source_path or '').strip().replace('\\', '/').strip('/') + if normalized not in normalized_paths: + normalized_paths.append(normalized) + + legacy_source_path = str(data.get('source_path', '') or '').strip().replace('\\', '/').strip('/') + if legacy_source_path and legacy_source_path not in normalized_paths: + normalized_paths.append(legacy_source_path) + + if len(previews) == 1 and not normalized_paths: + return previews + + if not normalized_paths: + candidates = ', '.join(item['source_path'] or '.' for item in previews) + raise ValueError(f'Multiple skills found. Please choose one or more source_paths: {candidates}') + + selected: list[dict] = [] + available = {preview['source_path']: preview for preview in previews} + for normalized_path in normalized_paths: + preview = available.get(normalized_path) + if preview is None: + candidates = ', '.join(item['source_path'] or '.' for item in previews) + raise ValueError(f'Invalid source_path "{normalized_path}". Available: {candidates}') + selected.append(preview) + + return selected + + def _install_preview_candidates(self, root_path: str, selected_previews: list[dict]) -> list[dict]: + target_dirs: list[str] = [] + for preview in selected_previews: + target_dir = self._normalize_package_root(preview['package_root']) + if target_dir in target_dirs: + raise ValueError(f'Duplicate target directory selected: {target_dir}') + if os.path.exists(target_dir): + raise ValueError(f'Skill directory already exists: {target_dir}') + target_dirs.append(target_dir) + + installed_scans: list[dict] = [] + created_dirs: list[str] = [] + try: + for preview in selected_previews: + target_dir = self._normalize_package_root(preview['package_root']) + source_root = self._preview_source_root(root_path, preview['source_path']) + os.makedirs(os.path.dirname(target_dir), exist_ok=True) + shutil.copytree(source_root, target_dir) + created_dirs.append(target_dir) + installed_scans.append(self.scan_directory(target_dir)) + except Exception: + for target_dir in created_dirs: + shutil.rmtree(target_dir, ignore_errors=True) + raise + + return installed_scans + + def _extract_uploaded_skill_to_temp(self, file_bytes: bytes, tmp_dir: str) -> str: + extract_dir = os.path.join(tmp_dir, 'extracted') + try: + with zipfile.ZipFile(io.BytesIO(file_bytes), 'r') as zf: + self._safe_extract_zip(zf, extract_dir) + except zipfile.BadZipFile as exc: + raise ValueError('Uploaded file must be a valid .zip archive') from exc + + entries = os.listdir(extract_dir) + if len(entries) == 1 and os.path.isdir(os.path.join(extract_dir, entries[0])): + return os.path.join(extract_dir, entries[0]) + return extract_dir + + @staticmethod + def _uploaded_skill_target_stem(filename: str) -> str: + stem = os.path.splitext(os.path.basename(str(filename or '').strip()))[0] + safe_stem = ''.join(ch if ch.isalnum() or ch in ('-', '_') else '-' for ch in stem).strip('-_') + return safe_stem or 'uploaded-skill' + + @staticmethod + def _preview_source_root(root_path: str, source_path: str) -> str: + normalized = str(source_path or '').strip().replace('\\', '/').strip('/') + if not normalized: + return root_path + return os.path.join(root_path, normalized) + + @staticmethod + def _resolve_source_subdir_root(root_path: str, source_subdir: str) -> str: + normalized = str(source_subdir or '').strip().replace('\\', '/').strip('/') + if not normalized: + return root_path + + normalized_path = os.path.normpath(normalized) + if normalized_path.startswith('..') or normalized_path == '..' or os.path.isabs(normalized_path): + raise ValueError('source_subdir must stay within the uploaded archive') + + target_root = os.path.realpath(os.path.join(root_path, normalized_path)) + archive_root = os.path.realpath(root_path) + if target_root != archive_root and not target_root.startswith(f'{archive_root}{os.sep}'): + raise ValueError('source_subdir must stay within the uploaded archive') + if not os.path.isdir(target_root): + raise ValueError(f'source_subdir does not exist in the uploaded archive: {normalized}') + return target_root + + @staticmethod + def _safe_extract_zip(archive: zipfile.ZipFile, target_dir: str) -> None: + target_root = os.path.realpath(target_dir) + os.makedirs(target_root, exist_ok=True) + + for member in archive.infolist(): + member_name = member.filename + if not member_name or member_name.endswith('/'): + continue + + normalized = posixpath.normpath(member_name) + if normalized.startswith('../') or normalized == '..' or os.path.isabs(normalized): + raise ValueError(f'Archive contains an unsafe path: {member_name}') + + destination = os.path.realpath(os.path.join(target_root, normalized)) + if destination != target_root and not destination.startswith(f'{target_root}{os.sep}'): + raise ValueError(f'Archive contains an unsafe path: {member_name}') + + archive.extractall(target_root) + + def _resolve_skill_path(self, skill: dict, path: str, *, expect_directory: bool) -> tuple[str, str]: + package_root = self._normalize_package_root(skill.get('package_root', '')) + if not package_root: + raise ValueError(f'Skill "{skill.get("name", "")}" has no package_root') + + relative_path = str(path or '.').strip() or '.' + if os.path.isabs(relative_path): + raise ValueError('path must be relative to the skill package root') + + normalized_relative = os.path.normpath(relative_path) + if normalized_relative.startswith('..') or normalized_relative == '..': + raise ValueError('path must stay within the skill package root') + + target_path = os.path.realpath(os.path.join(package_root, normalized_relative)) + if target_path != package_root and not target_path.startswith(f'{package_root}{os.sep}'): + raise ValueError('path must stay within the skill package root') + + if expect_directory: + if not os.path.isdir(target_path): + raise ValueError(f'Skill directory not found: {relative_path}') + else: + parent_dir = os.path.dirname(target_path) or package_root + if parent_dir != package_root and not parent_dir.startswith(f'{package_root}{os.sep}'): + raise ValueError('path must stay within the skill package root') + + return target_path, normalized_relative + + @staticmethod + def _find_skill_entry(path: str) -> Optional[tuple[str, str]]: + for candidate in ('SKILL.md', 'skill.md'): + if os.path.isfile(os.path.join(path, candidate)): + return path, candidate + return None + + def _discover_skill_directories(self, root_path: str, max_depth: int = 2) -> list[tuple[str, str]]: + discovered: list[tuple[str, str]] = [] + queue: list[tuple[str, int]] = [(root_path, 0)] + seen: set[str] = set() + + while queue: + current_path, depth = queue.pop(0) + normalized_path = os.path.abspath(current_path) + if normalized_path in seen: + continue + seen.add(normalized_path) + + found = self._find_skill_entry(normalized_path) + if found: + discovered.append(found) + continue + + if depth >= max_depth: + continue + + try: + entries = sorted(os.scandir(normalized_path), key=lambda entry: entry.name) + except OSError: + continue + + for entry in entries: + if entry.is_dir(): + queue.append((entry.path, depth + 1)) + + return discovered + + @staticmethod + def _validate_skill_name(name: str) -> str: + name = str(name or '').strip() + if not name: + raise ValueError('Skill name is required') + if not name.replace('-', '').replace('_', '').isalnum(): + raise ValueError('Skill name can only contain letters, numbers, hyphens and underscores') + if len(name) > 64: + raise ValueError('Skill name cannot exceed 64 characters') + return name + + @staticmethod + def _normalize_package_root(package_root: str) -> str: + package_root = str(package_root).strip() + if not package_root: + return '' + return os.path.realpath(os.path.abspath(package_root)) + + @staticmethod + def _resolve_create_field(data: dict, field: str, imported_skill_data: dict | None, *, default: str) -> str: + raw_value = data.get(field) if field in data else None + if raw_value is None: + if imported_skill_data is not None: + return str(imported_skill_data.get(field, default) or default) + return default + + value = str(raw_value or '') + if imported_skill_data is not None and not value.strip(): + return str(imported_skill_data.get(field, default) or default) + return value diff --git a/src/langbot_plugin/cli/__init__.py b/src/langbot_plugin/cli/__init__.py index 7f913f2..dc48f27 100644 --- a/src/langbot_plugin/cli/__init__.py +++ b/src/langbot_plugin/cli/__init__.py @@ -33,6 +33,10 @@ - [--stdio-control -s]: Use stdio for control connection - [--ws-control-port]: The port for control connection - [--ws-debug-port]: The port for debug connection + box: Run the sandbox box runtime + - [--host]: Bind address, default is 0.0.0.0 + - [--stdio-control]: Use stdio for control connection + - [--ws-control-port]: The port for control connection, default is 5410 """ @@ -120,6 +124,24 @@ def main(): help="Skip checking and installing dependencies for all installed plugins", ) + # box command + box_parser = subparsers.add_parser("box", help="Run the sandbox box runtime") + box_parser.add_argument( + "--host", default="0.0.0.0", help="Bind address" + ) + box_parser.add_argument( + "-s", + "--stdio-control", + action="store_true", + help="Use stdio for control connection", + ) + box_parser.add_argument( + "--ws-control-port", + type=int, + default=5410, + help="The port for control connection", + ) + args = parser.parse_args() if not args.command: @@ -148,6 +170,10 @@ def main(): publish_process() case "rt": runtime_app.main(args) + case "box": + from langbot_plugin.box.server import main as box_main + + box_main(args) case _: cli_print("unknown_command", args.command) sys.exit(1) diff --git a/src/langbot_plugin/runtime/io/handler.py b/src/langbot_plugin/runtime/io/handler.py index 1436c94..eecc997 100644 --- a/src/langbot_plugin/runtime/io/handler.py +++ b/src/langbot_plugin/runtime/io/handler.py @@ -16,6 +16,7 @@ import os import hashlib import base64 +import uuid import aiofiles import aiofiles.os import logging @@ -70,11 +71,9 @@ def __init__( @self.action(CommonAction.FILE_CHUNK) async def file_chunk(data: dict[str, Any]) -> ActionResponse: file_key = data["file_key"] - file_length = data["file_length"] chunk_base64 = data["chunk_base64"] chunk_index = data["chunk_index"] chunk_amount = data["chunk_amount"] - chunk_size = data["chunk_size"] # append the chunk to the file async with aiofiles.open( os.path.join(FILE_STORAGE_DIR, file_key), "ab" @@ -268,8 +267,10 @@ def decorator( # ====== file transfer ====== async def send_file(self, file_bytes: bytes, file_extension: str) -> str: """Send a file to the peer, chunk by chunk, in base64.""" - hash_value = hashlib.sha256(file_bytes).hexdigest() - file_key = f"{hash_value}.{file_extension}" + hash_value = hashlib.sha256(file_bytes).hexdigest()[:16] + extension = file_extension.strip(".") + suffix = f".{extension}" if extension else "" + file_key = f"{hash_value}-{uuid.uuid4().hex}{suffix}" file_length = len(file_bytes) chunk_amount = max( 1, (file_length + FILE_CHUNK_LENGTH - 1) // FILE_CHUNK_LENGTH @@ -308,4 +309,7 @@ async def read_local_file(self, file_key: str) -> bytes: return await f.read() async def delete_local_file(self, file_key: str) -> None: - await aiofiles.os.remove(os.path.join(FILE_STORAGE_DIR, file_key)) + try: + await aiofiles.os.remove(os.path.join(FILE_STORAGE_DIR, file_key)) + except FileNotFoundError: + return diff --git a/src/langbot_plugin/version.py b/src/langbot_plugin/version.py index d7b30e1..3bf78d8 100644 --- a/src/langbot_plugin/version.py +++ b/src/langbot_plugin/version.py @@ -1 +1 @@ -__version__ = "0.3.6" +__version__ = "0.4.0-beta.1" diff --git a/tests/box/__init__.py b/tests/box/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/box/test_backend_selection.py b/tests/box/test_backend_selection.py new file mode 100644 index 0000000..2439394 --- /dev/null +++ b/tests/box/test_backend_selection.py @@ -0,0 +1,275 @@ +"""Unit tests for BoxRuntime backend selection mechanism.""" + +from __future__ import annotations + +import logging +import datetime as dt +from unittest import mock + +import pytest + +from langbot_plugin.box.backend import BaseSandboxBackend +from langbot_plugin.box.models import BoxSessionInfo, BoxSpec +from langbot_plugin.box.runtime import BoxRuntime + + +@pytest.fixture +def logger(): + return logging.getLogger('test.runtime') + + +class MockBackend(BaseSandboxBackend): + """Mock backend for testing.""" + + def __init__(self, logger: logging.Logger, name: str, available: bool = True): + super().__init__(logger) + self.name = name + self._available = available + self._alive = True + self.started_sessions = 0 + self.stopped_sessions = 0 + + async def is_available(self) -> bool: + return self._available + + async def start_session(self, spec): + self.started_sessions += 1 + now = dt.datetime.now(dt.timezone.utc) + return BoxSessionInfo( + session_id=spec.session_id, + backend_name=self.name, + backend_session_id=f'{self.name}-{self.started_sessions}', + image=spec.image, + network=spec.network, + host_path=spec.host_path, + host_path_mode=spec.host_path_mode, + mount_path=spec.mount_path, + persistent=spec.persistent, + cpus=spec.cpus, + memory_mb=spec.memory_mb, + pids_limit=spec.pids_limit, + read_only_rootfs=spec.read_only_rootfs, + workspace_quota_mb=spec.workspace_quota_mb, + created_at=now, + last_used_at=now, + ) + + async def exec(self, session, spec): + pass + + async def stop_session(self, session): + self.stopped_sessions += 1 + + async def is_session_alive(self, session) -> bool: + return self._alive + + +# ── E2B backend creation ──────────────────────────────────────────────── + +def test_e2b_backend_created_if_package_installed(logger): + """E2B backend is created when package is installed.""" + with mock.patch('os.getenv', return_value=''): + runtime = BoxRuntime(logger) + # E2B backend exists (package installed) + e2b_backend = runtime.backends[2] + assert e2b_backend is not None + assert e2b_backend.name == 'e2b' + + +def test_e2b_backend_none_if_package_not_installed(logger): + """E2B backend is None when package is not installed.""" + with ( + mock.patch('os.getenv', return_value=''), + mock.patch.object(BoxRuntime, '_create_e2b_backend', return_value=None), + ): + runtime = BoxRuntime(logger) + # Third backend is None (package not installed) + assert runtime.backends[2] is None + # Filtered list for selection + active_backends = [b for b in runtime.backends if b is not None] + assert len(active_backends) == 2 + + +def test_e2b_import_failure_returns_none(logger): + """Import failure for e2b package returns None, not fatal.""" + with mock.patch('os.getenv', return_value=''): + # _create_e2b_backend handles ImportError internally + runtime = BoxRuntime(logger) + # Should have Docker, nsjail, and E2B (if package installed) or None + active_backends = [b for b in runtime.backends if b is not None] + assert len(active_backends) >= 2 + + +# ── box.backend configuration ────────────────────────────────────────── + +@pytest.mark.anyio +async def test_box_backend_config_forces_specific_backend(logger): + """box.backend config forces selection of named backend.""" + backend_e2b = MockBackend(logger, 'e2b', available=True) + backend_docker = MockBackend(logger, 'docker', available=True) + backend_nsjail = MockBackend(logger, 'nsjail', available=False) + + runtime = BoxRuntime(logger, backends=[backend_e2b, backend_docker, backend_nsjail]) + runtime.init({'backend': 'docker'}) + + with mock.patch('os.getenv', return_value=None): + selected = await runtime._select_backend() + + assert selected.name == 'docker' + assert selected is backend_docker + + +@pytest.mark.anyio +async def test_box_backend_config_unavailable_returns_none(logger): + """When box.backend specifies unavailable backend, returns None.""" + backend_e2b = MockBackend(logger, 'e2b', available=False) + backend_docker = MockBackend(logger, 'docker', available=True) + + runtime = BoxRuntime(logger, backends=[backend_e2b, backend_docker]) + runtime.init({'backend': 'e2b'}) + + with mock.patch('os.getenv', return_value=None): + selected = await runtime._select_backend() + + assert selected is None + + +@pytest.mark.anyio +async def test_box_backend_config_not_found_returns_none(logger): + """When box.backend specifies unknown backend name, returns None.""" + backend_docker = MockBackend(logger, 'docker', available=True) + + runtime = BoxRuntime(logger, backends=[backend_docker]) + runtime.init({'backend': 'unknown'}) + + with mock.patch('os.getenv', return_value=None): + selected = await runtime._select_backend() + + assert selected is None + + +@pytest.mark.anyio +async def test_box_backend_config_no_fallback(logger): + """When box.backend is set but backend unavailable, does NOT fallback.""" + backend_e2b = MockBackend(logger, 'e2b', available=False) + backend_docker = MockBackend(logger, 'docker', available=True) + + runtime = BoxRuntime(logger, backends=[backend_e2b, backend_docker]) + runtime.init({'backend': 'e2b'}) + + with mock.patch('os.getenv', return_value=None): + selected = await runtime._select_backend() + + # Should return None, not fallback to docker + assert selected is None + + +@pytest.mark.anyio +async def test_box_backend_env_var_is_ignored(logger): + """BOX_BACKEND is not an independent override; use box.backend instead.""" + backend_docker = MockBackend(logger, 'docker', available=True) + backend_e2b = MockBackend(logger, 'e2b', available=True) + + runtime = BoxRuntime(logger, backends=[backend_docker, backend_e2b]) + runtime.init({'backend': 'docker'}) + + with mock.patch('os.getenv', side_effect=lambda k: 'e2b' if k == 'BOX_BACKEND' else None): + selected = await runtime._select_backend() + + assert selected is backend_docker + + +# ── Auto-detect backend selection ─────────────────────────────────────── + +@pytest.mark.anyio +async def test_auto_detect_first_available(logger): + """Without box.backend, selects first available backend.""" + backend_e2b = MockBackend(logger, 'e2b', available=False) + backend_docker = MockBackend(logger, 'docker', available=True) + backend_nsjail = MockBackend(logger, 'nsjail', available=False) + + runtime = BoxRuntime(logger, backends=[backend_e2b, backend_docker, backend_nsjail]) + + with mock.patch('os.getenv', return_value=None): + selected = await runtime._select_backend() + + assert selected.name == 'docker' + + +@pytest.mark.anyio +async def test_auto_detect_none_when_all_unavailable(logger): + """Returns None when all backends are unavailable.""" + backend_docker = MockBackend(logger, 'docker', available=False) + backend_nsjail = MockBackend(logger, 'nsjail', available=False) + + runtime = BoxRuntime(logger, backends=[backend_docker, backend_nsjail]) + + with mock.patch('os.getenv', return_value=None): + selected = await runtime._select_backend() + + assert selected is None + + +@pytest.mark.anyio +async def test_init_config_reselects_backend_before_sessions(logger): + """INIT config from LangBot can change the selected backend.""" + backend_docker = MockBackend(logger, 'docker', available=True) + backend_e2b = MockBackend(logger, 'e2b', available=True) + + runtime = BoxRuntime(logger, backends=[backend_docker, backend_e2b]) + + with mock.patch('os.getenv', return_value=None): + await runtime.initialize() + assert runtime._backend is backend_docker + + runtime.init({'backend': 'e2b'}) + assert runtime._backend is None + + selected = await runtime._get_backend() + + assert selected is backend_e2b + + +@pytest.mark.anyio +async def test_create_session_recreates_disappeared_backend_session(logger): + """A stale in-memory session is dropped if its backend session vanished.""" + backend = MockBackend(logger, 'docker', available=True) + runtime = BoxRuntime(logger, backends=[backend]) + spec = BoxSpec(session_id='mcp-shared', cmd='true', persistent=True, read_only_rootfs=False) + + with mock.patch('os.getenv', return_value=None): + first = await runtime.create_session(spec) + backend._alive = False + second = await runtime.create_session(spec) + + assert first['backend_session_id'] == 'docker-1' + assert second['backend_session_id'] == 'docker-2' + assert backend.started_sessions == 2 + assert backend.stopped_sessions == 1 + + +# ── Custom backends list ──────────────────────────────────────────────── + +def test_custom_backends_list_preserved(logger): + """Providing custom backends list overrides auto-detection.""" + custom_backend = MockBackend(logger, 'custom', available=True) + + runtime = BoxRuntime(logger, backends=[custom_backend]) + + assert len(runtime.backends) == 1 + assert runtime.backends[0].name == 'custom' + + +@pytest.mark.anyio +async def test_custom_backends_with_box_backend_config(logger): + """box.backend works with custom backends list.""" + backend_a = MockBackend(logger, 'a', available=True) + backend_b = MockBackend(logger, 'b', available=True) + + runtime = BoxRuntime(logger, backends=[backend_a, backend_b]) + runtime.init({'backend': 'b'}) + + with mock.patch('os.getenv', return_value=None): + selected = await runtime._select_backend() + + assert selected.name == 'b' diff --git a/tests/box/test_e2b_backend.py b/tests/box/test_e2b_backend.py new file mode 100644 index 0000000..0252c35 --- /dev/null +++ b/tests/box/test_e2b_backend.py @@ -0,0 +1,482 @@ +"""Unit tests for E2BSandboxBackend. + +These tests do NOT require e2b package to be installed – they mock the E2B SDK +to verify parameter mapping, session lifecycle, and availability detection. +""" + +from __future__ import annotations + +import json +import logging +from unittest import mock + +import pytest + +from langbot_plugin.box.e2b_backend import ( + E2BSandboxBackend, + _adapt_path_for_e2b, + _check_e2b_available, +) +from langbot_plugin.box.models import ( + BoxExecutionStatus, + BoxHostMountMode, + BoxNetworkMode, + BoxSessionInfo, + BoxSpec, +) + + +@pytest.fixture +def logger(): + return logging.getLogger('test.e2b') + + +@pytest.fixture +def backend(logger): + b = E2BSandboxBackend(logger=logger) + b.instance_id = 'test123' + return b + + +@pytest.fixture +def mock_e2b_module(): + """Mock the e2b module for tests.""" + mock_async_sandbox = mock.MagicMock() + mock_async_sandbox.sandbox_id = 'sandbox-test-123' + + # Mock AsyncSandbox.create + mock_async_sandbox.create = mock.AsyncMock(return_value=mock_async_sandbox) + + # Mock AsyncSandbox.connect + mock_async_sandbox.connect = mock.AsyncMock(return_value=mock_async_sandbox) + + # Mock AsyncSandbox.kill + mock_async_sandbox.kill = mock.AsyncMock(return_value=True) + + # Mock commands.run result + mock_command_result = mock.MagicMock() + mock_command_result.stdout = 'output' + mock_command_result.stderr = '' + mock_command_result.exit_code = 0 + + mock_commands = mock.MagicMock() + mock_commands.run = mock.AsyncMock(return_value=mock_command_result) + mock_async_sandbox.commands = mock_commands + + # Mock the module import + with ( + mock.patch('langbot_plugin.box.e2b_backend._e2b_available', None), + mock.patch('langbot_plugin.box.e2b_backend._AsyncSandbox', None), + mock.patch('langbot_plugin.box.e2b_backend._CommandResult', None), + ): + # Simulate successful import + import langbot_plugin.box.e2b_backend as e2b_backend + e2b_backend._e2b_available = True + e2b_backend._AsyncSandbox = mock_async_sandbox + yield mock_async_sandbox + + +# ── Path adaptation ──────────────────────────────────────────────────── + +def test_adapt_path_workspace(): + """_adapt_path_for_e2b maps /workspace to /home/user/workspace.""" + assert _adapt_path_for_e2b('/workspace') == '/home/user/workspace' + assert _adapt_path_for_e2b('/workspace/subdir') == '/home/user/workspace/subdir' + + +def test_adapt_path_other_paths_unchanged(): + """_adapt_path_for_e2b doesn't modify paths not starting with /workspace.""" + assert _adapt_path_for_e2b('/home/user') == '/home/user' + assert _adapt_path_for_e2b('/tmp') == '/tmp' + assert _adapt_path_for_e2b('/code') == '/code' + + +# ── is_available ────────────────────────────────────────────────────── + +@pytest.mark.anyio +async def test_is_available_no_package(backend): + """is_available returns False when e2b package is not installed.""" + with mock.patch('langbot_plugin.box.e2b_backend._check_e2b_available', return_value=False): + assert await backend.is_available() is False + + +@pytest.mark.anyio +async def test_is_available_no_api_key(backend): + """is_available returns False when E2B_API_KEY is not set.""" + backend._api_key = None + with mock.patch('langbot_plugin.box.e2b_backend._check_e2b_available', return_value=True): + assert await backend.is_available() is False + + +@pytest.mark.anyio +async def test_is_available_with_api_key(backend): + """is_available returns True when both package and API key are available.""" + backend._api_key = 'test-api-key' + with mock.patch('langbot_plugin.box.e2b_backend._check_e2b_available', return_value=True): + assert await backend.is_available() is True + + +@pytest.mark.anyio +async def test_configure_from_langbot(backend, mock_e2b_module): + """configure() applies settings from LangBot config.yaml.""" + backend.configure({ + 'api_key': 'config-api-key', + 'api_url': 'http://127.0.0.1:3000', + 'template': 'python-3.11', + }) + await backend.initialize() + + # Environment variable takes precedence, so if not set, use config + assert backend._api_key == 'config-api-key' + assert backend._api_url == 'http://127.0.0.1:3000' + assert backend._template == 'python-3.11' + + +@pytest.mark.anyio +async def test_env_vars_override_config(backend, mock_e2b_module): + """Environment variables take precedence over config.yaml values.""" + with mock.patch.dict('os.environ', {'E2B_API_KEY': 'env-api-key', 'E2B_API_URL': 'http://env-url'}): + backend.configure({ + 'api_key': 'config-api-key', + 'api_url': 'http://config-url', + }) + await backend.initialize() + + # Environment variables should win + assert backend._api_key == 'env-api-key' + assert backend._api_url == 'http://env-url' + + +# ── start_session ───────────────────────────────────────────────────── + +@pytest.mark.anyio +async def test_start_session_basic(backend, mock_e2b_module): + """start_session creates sandbox with default parameters.""" + backend._api_key = 'test-api-key' + spec = BoxSpec(session_id='sess1', cmd='echo hi') + + info = await backend.start_session(spec) + + assert info.backend_name == 'e2b' + assert info.session_id == 'sess1' + assert info.backend_session_id == 'sandbox-test-123' + # Session metadata keeps LangBot's logical mount path so later specs + # with /workspace can reuse the same session. + assert info.mount_path == '/workspace' + + # Verify AsyncSandbox.create was called with api_key + mock_e2b_module.create.assert_called_once() + call_kwargs = mock_e2b_module.create.call_args.kwargs + assert call_kwargs.get('api_key') == 'test-api-key' + + +@pytest.mark.anyio +async def test_start_session_with_template(backend, mock_e2b_module): + """start_session passes template parameter when image is specified.""" + backend._api_key = 'test-api-key' + spec = BoxSpec( + session_id='sess2', + cmd='python script.py', + image='python-3.11', + ) + + info = await backend.start_session(spec) + + assert info.image == 'python-3.11' + + # Verify template was passed + call_kwargs = mock_e2b_module.create.call_args.kwargs + assert call_kwargs.get('template') == 'python-3.11' + + +@pytest.mark.anyio +async def test_start_session_with_envs(backend, mock_e2b_module): + """start_session passes environment variables.""" + backend._api_key = 'test-api-key' + spec = BoxSpec( + session_id='sess3', + cmd='echo $FOO', + env={'FOO': 'bar', 'DEBUG': '1'}, + ) + + info = await backend.start_session(spec) + + call_kwargs = mock_e2b_module.create.call_args.kwargs + assert call_kwargs.get('envs') == {'FOO': 'bar', 'DEBUG': '1'} + + +@pytest.mark.anyio +async def test_start_session_with_api_url(backend, mock_e2b_module): + """start_session passes domain for CubeSandbox self-deployment.""" + backend._api_key = 'dummy' + backend._api_url = 'http://127.0.0.1:3000' + spec = BoxSpec(session_id='sess4', cmd='ls') + + info = await backend.start_session(spec) + + call_kwargs = mock_e2b_module.create.call_args.kwargs + assert call_kwargs.get('domain') == 'http://127.0.0.1:3000' + + +@pytest.mark.anyio +async def test_start_session_custom_mount_path(backend, mock_e2b_module): + """start_session adapts custom mount_path.""" + backend._api_key = 'test-api-key' + spec = BoxSpec( + session_id='sess5', + cmd='ls', + mount_path='/workspace/myproject', + ) + + info = await backend.start_session(spec) + + # Session metadata keeps the logical mount path; command execution adapts + # it to E2B's internal writable path. + assert info.mount_path == '/workspace/myproject' + + +# ── CubeSandbox host-mount metadata ─────────────────────────────────── + +@pytest.mark.anyio +async def test_start_session_host_mount_rw(backend, mock_e2b_module): + """host_path with rw mode generates correct metadata.""" + backend._api_key = 'test-api-key' + spec = BoxSpec( + session_id='sess-hp-rw', + cmd='ls', + host_path='/data/project', + host_path_mode=BoxHostMountMode.READ_WRITE, + mount_path='/workspace', + ) + + info = await backend.start_session(spec) + + call_kwargs = mock_e2b_module.create.call_args.kwargs + metadata = call_kwargs.get('metadata', {}) + + assert 'host-mount' in metadata + host_mount = json.loads(metadata['host-mount']) + assert len(host_mount) == 1 + assert host_mount[0]['hostPath'] == '/data/project' + # mountPath should be adapted + assert host_mount[0]['mountPath'] == '/home/user/workspace' + assert host_mount[0]['readOnly'] is False + + +@pytest.mark.anyio +async def test_start_session_host_mount_ro(backend, mock_e2b_module): + """host_path with ro mode generates readOnly=True in metadata.""" + backend._api_key = 'test-api-key' + spec = BoxSpec( + session_id='sess-hp-ro', + cmd='cat file.txt', + host_path='/data/source', + host_path_mode=BoxHostMountMode.READ_ONLY, + mount_path='/src', # Non-workspace path stays unchanged + ) + + info = await backend.start_session(spec) + + call_kwargs = mock_e2b_module.create.call_args.kwargs + metadata = call_kwargs.get('metadata', {}) + + host_mount = json.loads(metadata['host-mount']) + assert host_mount[0]['readOnly'] is True + # Non-workspace path stays unchanged + assert host_mount[0]['mountPath'] == '/src' + + +@pytest.mark.anyio +async def test_start_session_no_host_mount_when_none(backend, mock_e2b_module): + """host_path_mode=none skips host-mount metadata.""" + backend._api_key = 'test-api-key' + spec = BoxSpec( + session_id='sess-hp-none', + cmd='ls', + host_path='/data', + host_path_mode=BoxHostMountMode.NONE, + ) + + info = await backend.start_session(spec) + + call_kwargs = mock_e2b_module.create.call_args.kwargs + assert 'host-mount' not in call_kwargs.get('metadata', {}) + + +@pytest.mark.anyio +async def test_start_session_no_host_mount_when_empty(backend, mock_e2b_module): + """Empty host_path skips host-mount metadata.""" + backend._api_key = 'test-api-key' + spec = BoxSpec(session_id='sess-no-hp', cmd='ls') + + info = await backend.start_session(spec) + + call_kwargs = mock_e2b_module.create.call_args.kwargs + assert 'metadata' not in call_kwargs or 'host-mount' not in call_kwargs.get('metadata', {}) + + +# ── exec ────────────────────────────────────────────────────────────── + +@pytest.mark.anyio +async def test_exec_success(backend, mock_e2b_module): + """exec runs command and returns result.""" + backend._api_key = 'test-api-key' + + session = BoxSessionInfo( + session_id='exec-sess', + backend_name='e2b', + backend_session_id='sandbox-123', + image='base', + network=BoxNetworkMode.OFF, + created_at='2024-01-01T00:00:00+00:00', + last_used_at='2024-01-01T00:00:00+00:00', + ) + spec = BoxSpec(session_id='exec-sess', cmd='echo hello', workdir='/workspace', env={'FOO': 'bar'}) + + result = await backend.exec(session, spec) + + assert result.status == BoxExecutionStatus.COMPLETED + assert result.exit_code == 0 + assert result.stdout == 'output' + + # Verify connect and run were called + mock_e2b_module.connect.assert_called_once() + mock_e2b_module.commands.run.assert_called_once() + + # Verify command includes path adaptation + run_kwargs = mock_e2b_module.commands.run.call_args.kwargs + assert '/home/user/workspace' in run_kwargs['cmd'] + + +@pytest.mark.anyio +async def test_exec_timeout(backend, mock_e2b_module): + """exec handles timeout correctly.""" + backend._api_key = 'test-api-key' + + # Mock timeout error + mock_e2b_module.commands.run = mock.AsyncMock( + side_effect=Exception('Command timed out after 30 seconds') + ) + + session = BoxSessionInfo( + session_id='timeout-sess', + backend_name='e2b', + backend_session_id='sandbox-456', + image='base', + network=BoxNetworkMode.OFF, + created_at='2024-01-01T00:00:00+00:00', + last_used_at='2024-01-01T00:00:00+00:00', + ) + spec = BoxSpec(session_id='timeout-sess', cmd='sleep 100', timeout_sec=30) + + result = await backend.exec(session, spec) + + assert result.status == BoxExecutionStatus.TIMED_OUT + assert result.exit_code is None + assert 'timed out' in result.stderr.lower() + + +@pytest.mark.anyio +async def test_exec_truncates_large_output(backend, mock_e2b_module): + """exec truncates output exceeding the limit.""" + backend._api_key = 'test-api-key' + + # Create large output (over 1MB) + large_output = 'x' * (2 * 1024 * 1024) # 2MB + mock_command_result = mock.MagicMock() + mock_command_result.stdout = large_output + mock_command_result.stderr = '' + mock_command_result.exit_code = 0 + + mock_commands = mock.MagicMock() + mock_commands.run = mock.AsyncMock(return_value=mock_command_result) + mock_e2b_module.commands = mock_commands + + session = BoxSessionInfo( + session_id='truncate-sess', + backend_name='e2b', + backend_session_id='sandbox-789', + image='base', + network=BoxNetworkMode.OFF, + created_at='2024-01-01T00:00:00+00:00', + last_used_at='2024-01-01T00:00:00+00:00', + ) + spec = BoxSpec(session_id='truncate-sess', cmd='cat large_file') + + result = await backend.exec(session, spec) + + assert 'clipped' in result.stdout + + +# ── stop_session ────────────────────────────────────────────────────── + +@pytest.mark.anyio +async def test_stop_session(backend, mock_e2b_module): + """stop_session kills the sandbox.""" + backend._api_key = 'test-api-key' + + session = BoxSessionInfo( + session_id='stop-sess', + backend_name='e2b', + backend_session_id='sandbox-to-kill', + image='base', + network=BoxNetworkMode.OFF, + created_at='2024-01-01T00:00:00+00:00', + last_used_at='2024-01-01T00:00:00+00:00', + ) + + await backend.stop_session(session) + + # Verify AsyncSandbox.kill was called + mock_e2b_module.kill.assert_called_once() + + +@pytest.mark.anyio +async def test_stop_session_handles_error(backend, mock_e2b_module): + """stop_session logs error but doesn't raise on kill failure.""" + backend._api_key = 'test-api-key' + + mock_e2b_module.kill = mock.AsyncMock(side_effect=Exception('Sandbox not found')) + + session = BoxSessionInfo( + session_id='stop-fail', + backend_name='e2b', + backend_session_id='sandbox-missing', + image='base', + network=BoxNetworkMode.OFF, + created_at='2024-01-01T00:00:00+00:00', + last_used_at='2024-01-01T00:00:00+00:00', + ) + + # Should not raise + await backend.stop_session(session) + + +# ── _check_e2b_available ────────────────────────────────────────────── + +def test_check_e2b_available_caches_result(): + """_check_e2b_available caches the import check result.""" + # Reset the cache + import langbot_plugin.box.e2b_backend as e2b_backend + e2b_backend._e2b_available = None + + # First call + with mock.patch.dict('sys.modules', {'e2b': mock.MagicMock()}): + result1 = _check_e2b_available() + + # Second call should use cached result + result2 = _check_e2b_available() + + assert result1 == result2 + + +def test_check_e2b_available_returns_false_on_import_error(): + """_check_e2b_available returns False when import fails.""" + import langbot_plugin.box.e2b_backend as e2b_backend + e2b_backend._e2b_available = None + e2b_backend._AsyncSandbox = None + + with mock.patch('builtins.__import__', side_effect=ImportError('No e2b')): + result = _check_e2b_available() + + assert result is False diff --git a/tests/box/test_nsjail_backend.py b/tests/box/test_nsjail_backend.py new file mode 100644 index 0000000..2a45b19 --- /dev/null +++ b/tests/box/test_nsjail_backend.py @@ -0,0 +1,452 @@ +"""Unit tests for NsjailBackend. + +These tests do NOT require nsjail to be installed – they mock subprocess +calls and filesystem checks to verify argument construction, session +directory management, and cgroup detection logic. +""" + +from __future__ import annotations + +import asyncio +import logging +import pathlib +from unittest import mock + +import pytest + +from langbot_plugin.box.nsjail_backend import ( + NsjailBackend, + _READONLY_ETC_ENTRIES, + _READONLY_SYSTEM_MOUNTS, +) +from langbot_plugin.box.models import ( + BoxExecutionStatus, + BoxHostMountMode, + BoxMountSpec, + BoxNetworkMode, + BoxSessionInfo, + BoxSpec, +) + + +@pytest.fixture +def logger(): + return logging.getLogger('test.nsjail') + + +@pytest.fixture +def tmp_base(tmp_path: pathlib.Path): + return tmp_path / 'nsjail-base' + + +@pytest.fixture +def backend(logger, tmp_base): + b = NsjailBackend(logger=logger, base_dir=str(tmp_base)) + b.instance_id = 'test123' + return b + + +# ── is_available ────────────────────────────────────────────────────── + +@pytest.mark.anyio +async def test_is_available_no_binary(backend): + with mock.patch('shutil.which', return_value=None): + assert await backend.is_available() is False + + +@pytest.mark.anyio +async def test_is_available_binary_exists(backend, tmp_base): + with ( + mock.patch('shutil.which', return_value='/usr/bin/nsjail'), + mock.patch('asyncio.create_subprocess_exec') as mock_exec, + ): + mock_proc = mock.AsyncMock() + mock_proc.returncode = 0 + mock_proc.wait = mock.AsyncMock(return_value=0) + mock_exec.return_value = mock_proc + + result = await backend.is_available() + assert result is True + assert tmp_base.exists() + + +# ── start_session ───────────────────────────────────────────────────── + +@pytest.mark.anyio +async def test_start_session_creates_directories(backend, tmp_base): + tmp_base.mkdir(parents=True, exist_ok=True) + spec = BoxSpec(session_id='sess1', cmd='echo hi') + + info = await backend.start_session(spec) + + session_dir = pathlib.Path(info.backend_session_id) + assert session_dir.exists() + assert (session_dir / 'root').is_dir() + assert (session_dir / 'workspace').is_dir() + assert (session_dir / 'tmp').is_dir() + assert (session_dir / 'home').is_dir() + assert (session_dir / 'meta.json').exists() + + assert info.backend_name == 'nsjail' + assert info.session_id == 'sess1' + assert info.image == spec.image + assert info.read_only_rootfs is True + + +@pytest.mark.anyio +async def test_start_session_with_host_path(backend, tmp_base): + tmp_base.mkdir(parents=True, exist_ok=True) + spec = BoxSpec( + session_id='sess2', + cmd='ls', + host_path='/some/path', + host_path_mode=BoxHostMountMode.READ_WRITE, + mount_path='/project', + ) + + info = await backend.start_session(spec) + assert info.host_path == '/some/path' + assert info.host_path_mode == BoxHostMountMode.READ_WRITE + assert info.mount_path == '/project' + + +# ── stop_session ────────────────────────────────────────────────────── + +@pytest.mark.anyio +async def test_stop_session_removes_directory(backend, tmp_base): + tmp_base.mkdir(parents=True, exist_ok=True) + spec = BoxSpec(session_id='sess-rm', cmd='echo') + + info = await backend.start_session(spec) + session_dir = pathlib.Path(info.backend_session_id) + assert session_dir.exists() + + await backend.stop_session(info) + assert not session_dir.exists() + + +# ── nsjail argument construction ────────────────────────────────────── + +def test_build_nsjail_args_basic(backend, tmp_base): + tmp_base.mkdir(parents=True, exist_ok=True) + session_dir = tmp_base / 'test_session' + for d in ('root', 'workspace', 'tmp', 'home'): + (session_dir / d).mkdir(parents=True) + + spec = BoxSpec(session_id='s1', cmd='echo hello', env={'FOO': 'bar'}) + session = BoxSessionInfo( + session_id='s1', + backend_name='nsjail', + backend_session_id=str(session_dir), + image=spec.image, + network=BoxNetworkMode.OFF, + created_at='2024-01-01T00:00:00+00:00', + last_used_at='2024-01-01T00:00:00+00:00', + ) + + args = backend._build_nsjail_args(session, spec, session_dir) + + assert args[0] == 'nsjail' + assert '--mode' in args + assert args[args.index('--mode') + 1] == 'o' + assert '--chroot' in args + assert args[args.index('--chroot') + 1] == str(session_dir / 'root') + assert '--clone_newnet' not in args + assert '--clone_newuser' not in args + assert '--clone_newns' not in args + assert '--disable_clone_newnet' not in args + assert '--really_quiet' in args + + # Writable mounts should reference session directories. + rw_binds = [args[i + 1] for i, a in enumerate(args) if a == '--bindmount'] + workspace_mount = f'{session_dir}/workspace:/workspace' + assert workspace_mount in rw_binds + + # Custom env should be present. + env_values = [args[i + 1] for i, a in enumerate(args) if a == '--env'] + assert 'FOO=bar' in env_values + + # Command is the last part after '--'. + separator_idx = args.index('--') + assert args[separator_idx + 1] == '/bin/sh' + + # Mount target directories are created under the per-session chroot root. + assert (session_dir / 'root' / 'workspace').is_dir() + assert (session_dir / 'root' / 'tmp').is_dir() + assert (session_dir / 'root' / 'home').is_dir() + + +def test_build_nsjail_args_network_on(backend, tmp_base): + tmp_base.mkdir(parents=True, exist_ok=True) + session_dir = tmp_base / 'test_session_net' + for d in ('root', 'workspace', 'tmp', 'home'): + (session_dir / d).mkdir(parents=True) + + session = BoxSessionInfo( + session_id='s2', + backend_name='nsjail', + backend_session_id=str(session_dir), + image='host', + network=BoxNetworkMode.ON, + created_at='2024-01-01T00:00:00+00:00', + last_used_at='2024-01-01T00:00:00+00:00', + ) + spec = BoxSpec(session_id='s2', cmd='curl http://example.com', network=BoxNetworkMode.ON) + + args = backend._build_nsjail_args(session, spec, session_dir) + + assert '--disable_clone_newnet' in args + assert '--clone_newnet' not in args + + +def test_build_nsjail_args_host_path_ro(backend, tmp_base): + tmp_base.mkdir(parents=True, exist_ok=True) + session_dir = tmp_base / 'test_hp' + for d in ('root', 'workspace', 'tmp', 'home'): + (session_dir / d).mkdir(parents=True) + + session = BoxSessionInfo( + session_id='s3', + backend_name='nsjail', + backend_session_id=str(session_dir), + image='host', + network=BoxNetworkMode.OFF, + host_path='/data/project', + host_path_mode=BoxHostMountMode.READ_ONLY, + created_at='2024-01-01T00:00:00+00:00', + last_used_at='2024-01-01T00:00:00+00:00', + ) + spec = BoxSpec( + session_id='s3', + cmd='ls', + host_path='/data/project', + host_path_mode=BoxHostMountMode.READ_ONLY, + ) + + args = backend._build_nsjail_args(session, spec, session_dir) + + ro_binds = [args[i + 1] for i, a in enumerate(args) if a == '--bindmount_ro'] + assert '/data/project:/workspace' in ro_binds + + +def test_build_nsjail_args_uses_custom_mount_path(backend, tmp_base): + tmp_base.mkdir(parents=True, exist_ok=True) + session_dir = tmp_base / 'test_custom_mount' + for d in ('root', 'workspace', 'tmp', 'home'): + (session_dir / d).mkdir(parents=True) + + session = BoxSessionInfo( + session_id='s4', + backend_name='nsjail', + backend_session_id=str(session_dir), + image='host', + network=BoxNetworkMode.OFF, + host_path='/data/project', + host_path_mode=BoxHostMountMode.READ_WRITE, + mount_path='/project', + created_at='2024-01-01T00:00:00+00:00', + last_used_at='2024-01-01T00:00:00+00:00', + ) + spec = BoxSpec( + session_id='s4', + cmd='pwd', + workdir='/project/src', + host_path='/data/project', + host_path_mode=BoxHostMountMode.READ_WRITE, + mount_path='/project', + ) + + args = backend._build_nsjail_args(session, spec, session_dir) + + rw_binds = [args[i + 1] for i, a in enumerate(args) if a == '--bindmount'] + assert '/data/project:/project' in rw_binds + assert args[args.index('--cwd') + 1] == '/project/src' + assert (session_dir / 'root' / 'project').is_dir() + + +def test_build_nsjail_args_extra_mounts_prepare_targets(backend, tmp_base): + tmp_base.mkdir(parents=True, exist_ok=True) + session_dir = tmp_base / 'test_extra_mount' + for d in ('root', 'workspace', 'tmp', 'home'): + (session_dir / d).mkdir(parents=True) + + session = BoxSessionInfo( + session_id='s5', + backend_name='nsjail', + backend_session_id=str(session_dir), + image='host', + network=BoxNetworkMode.OFF, + created_at='2024-01-01T00:00:00+00:00', + last_used_at='2024-01-01T00:00:00+00:00', + ) + spec = BoxSpec( + session_id='s5', + cmd='ls /workspace/.skills/demo', + extra_mounts=[ + BoxMountSpec( + host_path='/data/skills/demo', + mount_path='/workspace/.skills/demo', + mode=BoxHostMountMode.READ_WRITE, + ) + ], + ) + + args = backend._build_nsjail_args(session, spec, session_dir) + + rw_binds = [args[i + 1] for i, a in enumerate(args) if a == '--bindmount'] + assert '/data/skills/demo:/workspace/.skills/demo' in rw_binds + assert (session_dir / 'root' / 'workspace' / '.skills' / 'demo').is_dir() + + +def test_build_resource_limits_cgroup(backend): + backend._cgroup_v2_available = True + spec = BoxSpec(session_id='s', cmd='x', cpus=2.0, memory_mb=1024, pids_limit=256) + + args = backend._build_resource_limits(spec) + + assert '--cgroup_mem_max' in args + mem_idx = args.index('--cgroup_mem_max') + assert args[mem_idx + 1] == str(1024 * 1024 * 1024) + + pids_idx = args.index('--cgroup_pids_max') + assert args[pids_idx + 1] == '256' + + cpu_idx = args.index('--cgroup_cpu_ms_per_sec') + assert args[cpu_idx + 1] == '2000' + + +def test_build_resource_limits_rlimit_fallback(backend): + backend._cgroup_v2_available = False + spec = BoxSpec(session_id='s', cmd='x', memory_mb=512, pids_limit=128) + + args = backend._build_resource_limits(spec) + + assert '--rlimit_as' in args + as_idx = args.index('--rlimit_as') + assert args[as_idx + 1] == '512' + + nproc_idx = args.index('--rlimit_nproc') + assert args[nproc_idx + 1] == '128' + + # cgroup flags should NOT be present. + assert '--cgroup_mem_max' not in args + + +# ── exec ────────────────────────────────────────────────────────────── + +@pytest.mark.anyio +async def test_exec_success(backend, tmp_base): + tmp_base.mkdir(parents=True, exist_ok=True) + spec = BoxSpec(session_id='exec1', cmd='echo hello') + info = await backend.start_session(spec) + + with mock.patch.object(backend, '_run_nsjail') as mock_run: + from langbot_plugin.box.backend import _CommandResult + mock_run.return_value = _CommandResult( + return_code=0, stdout='hello\n', stderr='', timed_out=False + ) + + result = await backend.exec(info, spec) + + assert result.status == BoxExecutionStatus.COMPLETED + assert result.exit_code == 0 + assert result.stdout == 'hello\n' + assert result.backend_name == 'nsjail' + + +@pytest.mark.anyio +async def test_exec_timeout(backend, tmp_base): + tmp_base.mkdir(parents=True, exist_ok=True) + spec = BoxSpec(session_id='exec2', cmd='sleep 100', timeout_sec=1) + info = await backend.start_session(spec) + + with mock.patch.object(backend, '_run_nsjail') as mock_run: + from langbot_plugin.box.backend import _CommandResult + mock_run.return_value = _CommandResult( + return_code=-1, stdout='', stderr='', timed_out=True + ) + + result = await backend.exec(info, spec) + + assert result.status == BoxExecutionStatus.TIMED_OUT + assert result.exit_code is None + + +# ── cgroup detection ────────────────────────────────────────────────── + +def test_detect_cgroup_v2_no_mount(): + with mock.patch.object(pathlib.Path, 'exists', return_value=False): + assert NsjailBackend._detect_cgroup_v2() is False + + +def test_detect_cgroup_v2_root_user(): + orig_exists = pathlib.Path.exists + + def always_exists(self): + return True + + with ( + mock.patch('os.getuid', return_value=0), + mock.patch.object(pathlib.Path, 'exists', always_exists), + ): + assert NsjailBackend._detect_cgroup_v2() is True + + +def test_detect_cgroup_v2_user_slice_must_be_writable(): + orig_exists = pathlib.Path.exists + + def fake_exists(self): + path = str(self) + return path == '/sys/fs/cgroup' or path.endswith('cgroup.controllers') or 'user.slice' in path + + with ( + mock.patch('os.getuid', return_value=1000), + mock.patch.object(pathlib.Path, 'exists', fake_exists), + mock.patch('os.access', return_value=False), + ): + assert NsjailBackend._detect_cgroup_v2() is False + + with ( + mock.patch('os.getuid', return_value=1000), + mock.patch.object(pathlib.Path, 'exists', fake_exists), + mock.patch('os.access', return_value=True), + ): + assert NsjailBackend._detect_cgroup_v2() is True + + +# ── cleanup_orphaned_containers ─────────────────────────────────────── + +@pytest.mark.anyio +async def test_cleanup_orphaned_removes_old_sessions(backend, tmp_base): + tmp_base.mkdir(parents=True, exist_ok=True) + + # Create a dir from a different instance. + old_dir = tmp_base / 'oldinst_sess1_abc' + old_dir.mkdir() + (old_dir / 'workspace').mkdir() + + # Create a dir from current instance. + current_dir = tmp_base / 'test123_sess2_def' + current_dir.mkdir() + (current_dir / 'workspace').mkdir() + + with mock.patch.object(backend, '_kill_session_processes', new_callable=mock.AsyncMock): + await backend.cleanup_orphaned_containers('test123') + + assert not old_dir.exists() + assert current_dir.exists() + + +# ── output clipping ────────────────────────────────────────────────── + +def test_clip_captured_bytes_within_limit(): + data = b'hello world' + result = NsjailBackend._clip_captured_bytes(data, len(data)) + assert result == 'hello world' + + +def test_clip_captured_bytes_exceeds_limit(): + data = b'hello' + result = NsjailBackend._clip_captured_bytes(data, 2_000_000, limit=1_000_000) + assert 'clipped' in result + assert '1000000' in result diff --git a/tests/box/test_skill_store.py b/tests/box/test_skill_store.py new file mode 100644 index 0000000..dcba96f --- /dev/null +++ b/tests/box/test_skill_store.py @@ -0,0 +1,88 @@ +from __future__ import annotations + +import io +import zipfile + +from langbot_plugin.box.skill_store import BoxSkillStore + + +def _skill_zip(name: str = 'demo') -> bytes: + buffer = io.BytesIO() + with zipfile.ZipFile(buffer, 'w') as zf: + zf.writestr( + f'{name}/SKILL.md', + '---\n' + f'name: {name}\n' + f'display_name: {name.title()}\n' + 'description: Demo skill\n' + '---\n\n' + 'Use this skill for tests.\n', + ) + zf.writestr(f'{name}/notes.txt', 'hello') + return buffer.getvalue() + + +def _nested_skill_zip() -> bytes: + buffer = io.BytesIO() + with zipfile.ZipFile(buffer, 'w') as zf: + zf.writestr( + 'repo/packages/alpha/SKILL.md', + '---\nname: alpha\ndisplay_name: Alpha\n---\n\nAlpha instructions.\n', + ) + zf.writestr( + 'repo/packages/beta/SKILL.md', + '---\nname: beta\ndisplay_name: Beta\n---\n\nBeta instructions.\n', + ) + return buffer.getvalue() + + +def test_skill_store_installs_zip_under_configured_relative_skills_root(tmp_path): + store = BoxSkillStore({ + 'local': { + 'host_root': str(tmp_path), + 'skills_root': 'custom-skills', + } + }) + + preview = store.preview_zip_upload(file_bytes=_skill_zip(), filename='demo.zip') + assert preview[0]['package_root'] == str(tmp_path / 'custom-skills' / 'demo-upload') + + installed = store.install_zip_upload(file_bytes=_skill_zip(), filename='demo.zip') + assert installed[0]['name'] == 'demo' + assert installed[0]['package_root'] == str(tmp_path / 'custom-skills' / 'demo-upload') + + files = store.list_skill_files('demo') + assert {entry['name'] for entry in files['entries']} == {'SKILL.md', 'notes.txt'} + + content = store.read_skill_file('demo', 'notes.txt') + assert content['content'] == 'hello' + + store.write_skill_file('demo', 'notes.txt', 'updated') + assert store.read_skill_file('demo', 'notes.txt')['content'] == 'updated' + + +def test_skill_store_supports_source_subdir_before_selecting_candidates(tmp_path): + store = BoxSkillStore({ + 'local': { + 'host_root': str(tmp_path), + 'skills_root': 'skills', + } + }) + + preview = store.preview_zip_upload( + file_bytes=_nested_skill_zip(), + filename='repo.zip', + source_subdir='packages', + ) + + assert [skill['source_path'] for skill in preview] == ['alpha', 'beta'] + + installed = store.install_zip_upload( + file_bytes=_nested_skill_zip(), + filename='repo.zip', + source_subdir='packages', + source_paths=['beta'], + ) + + assert [skill['name'] for skill in installed] == ['beta'] + assert installed[0]['package_root'] == str(tmp_path / 'skills' / 'repo-beta-upload')