From 5f97b4d49d1f6be95ae3abab72db8c590e60e5a3 Mon Sep 17 00:00:00 2001 From: youhuanghe <1051233107@qq.com> Date: Sun, 22 Mar 2026 07:25:20 +0000 Subject: [PATCH 01/34] feat(box): add box runtime package and `lbp box` CLI command Move sandbox box runtime code from LangBot core into the plugin SDK as `langbot_plugin.box` package. Add `lbp box` CLI command to start the box runtime standalone, similar to `lbp rt` for the plugin runtime. New package includes: actions, backend (Docker/Podman), client, errors, models, runtime, security, server, and `__main__` entry point --- src/langbot_plugin/box/__init__.py | 1 + src/langbot_plugin/box/__main__.py | 5 + src/langbot_plugin/box/actions.py | 21 ++ src/langbot_plugin/box/backend.py | 388 +++++++++++++++++++++++++++++ src/langbot_plugin/box/client.py | 177 +++++++++++++ src/langbot_plugin/box/errors.py | 33 +++ src/langbot_plugin/box/models.py | 267 ++++++++++++++++++++ src/langbot_plugin/box/runtime.py | 386 ++++++++++++++++++++++++++++ src/langbot_plugin/box/security.py | 35 +++ src/langbot_plugin/box/server.py | 267 ++++++++++++++++++++ src/langbot_plugin/cli/__init__.py | 20 ++ 11 files changed, 1600 insertions(+) create mode 100644 src/langbot_plugin/box/__init__.py create mode 100644 src/langbot_plugin/box/__main__.py create mode 100644 src/langbot_plugin/box/actions.py create mode 100644 src/langbot_plugin/box/backend.py create mode 100644 src/langbot_plugin/box/client.py create mode 100644 src/langbot_plugin/box/errors.py create mode 100644 src/langbot_plugin/box/models.py create mode 100644 src/langbot_plugin/box/runtime.py create mode 100644 src/langbot_plugin/box/security.py create mode 100644 src/langbot_plugin/box/server.py diff --git a/src/langbot_plugin/box/__init__.py b/src/langbot_plugin/box/__init__.py new file mode 100644 index 0000000..c1ea6e1 --- /dev/null +++ b/src/langbot_plugin/box/__init__.py @@ -0,0 +1 @@ +"""LangBot Box runtime package.""" diff --git a/src/langbot_plugin/box/__main__.py b/src/langbot_plugin/box/__main__.py new file mode 100644 index 0000000..c6144f0 --- /dev/null +++ b/src/langbot_plugin/box/__main__.py @@ -0,0 +1,5 @@ +"""Allow running the Box server via ``python -m langbot_plugin.box``.""" + +from .server import main + +main() diff --git a/src/langbot_plugin/box/actions.py b/src/langbot_plugin/box/actions.py new file mode 100644 index 0000000..954c606 --- /dev/null +++ b/src/langbot_plugin/box/actions.py @@ -0,0 +1,21 @@ +"""Box-specific action types for the action RPC protocol.""" + +from __future__ import annotations + +from langbot_plugin.entities.io.actions.enums import ActionType + + +class LangBotToBoxAction(ActionType): + """Actions sent from LangBot to the Box runtime.""" + + HEALTH = 'box_health' + STATUS = 'box_status' + EXEC = 'box_exec' + CREATE_SESSION = 'box_create_session' + GET_SESSION = 'box_get_session' + GET_SESSIONS = 'box_get_sessions' + DELETE_SESSION = 'box_delete_session' + START_MANAGED_PROCESS = 'box_start_managed_process' + GET_MANAGED_PROCESS = 'box_get_managed_process' + GET_BACKEND_INFO = 'box_get_backend_info' + SHUTDOWN = 'box_shutdown' diff --git a/src/langbot_plugin/box/backend.py b/src/langbot_plugin/box/backend.py new file mode 100644 index 0000000..e5bbe56 --- /dev/null +++ b/src/langbot_plugin/box/backend.py @@ -0,0 +1,388 @@ +from __future__ import annotations + +import abc +import asyncio +import dataclasses +import datetime as dt +import logging +import re +import shlex +import shutil +import uuid + +from .errors import BoxError +from .models import ( + DEFAULT_BOX_MOUNT_PATH, + BoxExecutionResult, + BoxExecutionStatus, + BoxHostMountMode, + BoxNetworkMode, + BoxSessionInfo, + BoxSpec, +) +from .security import validate_sandbox_security + +# Hard cap on raw subprocess output to prevent unbounded memory usage. +# Container timeout already bounds duration, but fast commands can still +# produce large output within the time limit. After this many bytes the +# remaining output is discarded before decoding. +_MAX_RAW_OUTPUT_BYTES = 1_048_576 # 1 MB per stream + + +@dataclasses.dataclass(slots=True) +class _CommandResult: + return_code: int + stdout: str + stderr: str + timed_out: bool = False + + +class BaseSandboxBackend(abc.ABC): + name: str + instance_id: str = '' + + def __init__(self, logger: logging.Logger): + self.logger = logger + + async def initialize(self): + return None + + @abc.abstractmethod + async def is_available(self) -> bool: + pass + + @abc.abstractmethod + async def start_session(self, spec: BoxSpec) -> BoxSessionInfo: + pass + + @abc.abstractmethod + async def exec(self, session: BoxSessionInfo, spec: BoxSpec) -> BoxExecutionResult: + pass + + @abc.abstractmethod + async def stop_session(self, session: BoxSessionInfo): + pass + + async def start_managed_process(self, session: BoxSessionInfo, spec): + raise BoxError(f'{self.name} backend does not support managed processes') + + async def cleanup_orphaned_containers(self, current_instance_id: str = ''): + """Remove lingering containers from previous runs. No-op by default.""" + pass + + +class CLISandboxBackend(BaseSandboxBackend): + command: str + + def __init__(self, logger: logging.Logger, command: str, backend_name: str): + super().__init__(logger) + self.command = command + self.name = backend_name + + async def is_available(self) -> bool: + if shutil.which(self.command) is None: + return False + + result = await self._run_command([self.command, 'info'], timeout_sec=5, check=False) + return result.return_code == 0 and not result.timed_out + + async def start_session(self, spec: BoxSpec) -> BoxSessionInfo: + validate_sandbox_security(spec) + + now = dt.datetime.now(dt.UTC) + container_name = self._build_container_name(spec.session_id) + + args = [ + self.command, + 'run', + '-d', + '--rm', + '--name', + container_name, + '--label', + 'langbot.box=true', + '--label', + f'langbot.session_id={spec.session_id}', + '--label', + f'langbot.box.instance_id={self.instance_id}', + ] + + if spec.network == BoxNetworkMode.OFF: + args.extend(['--network', 'none']) + + # Resource limits + args.extend(['--cpus', str(spec.cpus)]) + args.extend(['--memory', f'{spec.memory_mb}m']) + args.extend(['--pids-limit', str(spec.pids_limit)]) + + if spec.read_only_rootfs: + args.append('--read-only') + args.extend(['--tmpfs', '/tmp:size=64m']) + + if spec.host_path is not None and spec.host_path_mode != BoxHostMountMode.NONE: + mount_spec = f'{spec.host_path}:{DEFAULT_BOX_MOUNT_PATH}:{spec.host_path_mode.value}' + args.extend(['-v', mount_spec]) + + args.extend([spec.image, 'sh', '-lc', 'while true; do sleep 3600; done']) + + self.logger.info( + f'LangBot Box backend start_session: backend={self.name} ' + f'session_id={spec.session_id} container_name={container_name} ' + f'image={spec.image} network={spec.network.value} ' + f'host_path={spec.host_path} host_path_mode={spec.host_path_mode.value} ' + f'cpus={spec.cpus} memory_mb={spec.memory_mb} pids_limit={spec.pids_limit} ' + f'read_only_rootfs={spec.read_only_rootfs}' + ) + + await self._run_command(args, timeout_sec=30, check=True) + + return BoxSessionInfo( + session_id=spec.session_id, + backend_name=self.name, + backend_session_id=container_name, + image=spec.image, + network=spec.network, + host_path=spec.host_path, + host_path_mode=spec.host_path_mode, + cpus=spec.cpus, + memory_mb=spec.memory_mb, + pids_limit=spec.pids_limit, + read_only_rootfs=spec.read_only_rootfs, + created_at=now, + last_used_at=now, + ) + + async def exec(self, session: BoxSessionInfo, spec: BoxSpec) -> BoxExecutionResult: + start = dt.datetime.now(dt.UTC) + args = [self.command, 'exec'] + + for key, value in spec.env.items(): + args.extend(['-e', f'{key}={value}']) + + args.extend( + [ + session.backend_session_id, + 'sh', + '-lc', + self._build_exec_command(spec.workdir, spec.cmd), + ] + ) + + cmd_preview = spec.cmd.strip() + if len(cmd_preview) > 400: + cmd_preview = f'{cmd_preview[:397]}...' + self.logger.info( + f'LangBot Box backend exec: backend={self.name} ' + f'session_id={session.session_id} container_name={session.backend_session_id} ' + f'workdir={spec.workdir} timeout_sec={spec.timeout_sec} ' + f'env_keys={sorted(spec.env.keys())} cmd={cmd_preview}' + ) + + result = await self._run_command(args, timeout_sec=spec.timeout_sec, check=False) + duration_ms = int((dt.datetime.now(dt.UTC) - start).total_seconds() * 1000) + + if result.timed_out: + return BoxExecutionResult( + session_id=session.session_id, + backend_name=self.name, + status=BoxExecutionStatus.TIMED_OUT, + exit_code=None, + stdout=result.stdout, + stderr=result.stderr or f'Command timed out after {spec.timeout_sec} seconds.', + duration_ms=duration_ms, + ) + + return BoxExecutionResult( + session_id=session.session_id, + backend_name=self.name, + status=BoxExecutionStatus.COMPLETED, + exit_code=result.return_code, + stdout=result.stdout, + stderr=result.stderr, + duration_ms=duration_ms, + ) + + async def stop_session(self, session: BoxSessionInfo): + self.logger.info( + f'LangBot Box backend stop_session: backend={self.name} ' + f'session_id={session.session_id} container_name={session.backend_session_id}' + ) + await self._run_command( + [self.command, 'rm', '-f', session.backend_session_id], + timeout_sec=20, + check=False, + ) + + async def cleanup_orphaned_containers(self, current_instance_id: str = ''): + """Remove langbot.box containers from previous instances. + + Only removes containers whose ``langbot.box.instance_id`` label does + NOT match *current_instance_id*. Containers without the label (from + older versions) are also removed. + """ + result = await self._run_command( + [ + self.command, + 'ps', + '-a', + '--filter', + 'label=langbot.box=true', + '--format', + '{{.ID}}\t{{.Label "langbot.box.instance_id"}}', + ], + timeout_sec=10, + check=False, + ) + if result.return_code != 0 or not result.stdout.strip(): + return + orphan_ids = [] + for line in result.stdout.strip().split('\n'): + line = line.strip() + if not line: + continue + parts = line.split('\t', 1) + cid = parts[0].strip() + label_instance = parts[1].strip() if len(parts) > 1 else '' + if label_instance != current_instance_id: + orphan_ids.append(cid) + if not orphan_ids: + return + for cid in orphan_ids: + self.logger.info(f'Cleaning up orphaned Box container: {cid}') + await self._run_command( + [self.command, 'rm', '-f', *orphan_ids], + timeout_sec=30, + check=False, + ) + + async def start_managed_process(self, session: BoxSessionInfo, spec) -> asyncio.subprocess.Process: + args = [self.command, 'exec', '-i'] + + for key, value in spec.env.items(): + args.extend(['-e', f'{key}={value}']) + + args.extend( + [ + session.backend_session_id, + 'sh', + '-lc', + self._build_spawn_command(spec.cwd, spec.command, spec.args), + ] + ) + + self.logger.info( + f'LangBot Box backend start_managed_process: backend={self.name} ' + f'session_id={session.session_id} container_name={session.backend_session_id} ' + f'cwd={spec.cwd} env_keys={sorted(spec.env.keys())} command={spec.command} args={spec.args}' + ) + + return await asyncio.create_subprocess_exec( + *args, + stdin=asyncio.subprocess.PIPE, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + + def _build_container_name(self, session_id: str) -> str: + normalized = re.sub(r'[^a-zA-Z0-9_.-]+', '-', session_id).strip('-').lower() or 'session' + suffix = uuid.uuid4().hex[:8] + return f'langbot-box-{normalized[:32]}-{suffix}' + + def _build_exec_command(self, workdir: str, cmd: str) -> str: + quoted_workdir = shlex.quote(workdir) + return f'mkdir -p {quoted_workdir} && cd {quoted_workdir} && {cmd}' + + def _build_spawn_command(self, cwd: str, command: str, args: list[str]) -> str: + quoted_cwd = shlex.quote(cwd) + command_parts = [shlex.quote(command), *[shlex.quote(arg) for arg in args]] + return f'mkdir -p {quoted_cwd} && cd {quoted_cwd} && exec {" ".join(command_parts)}' + + async def _run_command( + self, + args: list[str], + timeout_sec: int, + check: bool, + ) -> _CommandResult: + process = await asyncio.create_subprocess_exec( + *args, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + stdout_task = asyncio.create_task(self._read_stream(process.stdout)) + stderr_task = asyncio.create_task(self._read_stream(process.stderr)) + + timed_out = False + try: + await asyncio.wait_for(process.wait(), timeout=timeout_sec) + except asyncio.TimeoutError: + process.kill() + timed_out = True + await process.wait() + + stdout_bytes, stdout_total = await stdout_task + stderr_bytes, stderr_total = await stderr_task + + if timed_out: + return _CommandResult( + return_code=-1, + stdout=self._clip_captured_bytes(stdout_bytes, stdout_total), + stderr=self._clip_captured_bytes(stderr_bytes, stderr_total), + timed_out=True, + ) + + stdout = self._clip_captured_bytes(stdout_bytes, stdout_total) + stderr = self._clip_captured_bytes(stderr_bytes, stderr_total) + + if check and process.returncode != 0: + raise BoxError(self._format_cli_error(stderr or stdout or 'unknown backend error')) + + return _CommandResult( + return_code=process.returncode, + stdout=stdout, + stderr=stderr, + timed_out=False, + ) + + @staticmethod + def _clip_captured_bytes(data: bytes, total_size: int, limit: int = _MAX_RAW_OUTPUT_BYTES) -> str: + text = data.decode('utf-8', errors='replace').strip() + if total_size > limit: + text += f'\n... [raw output clipped at {limit} bytes, {total_size - limit} bytes discarded]' + return text + + @staticmethod + async def _read_stream( + stream: asyncio.StreamReader | None, + limit: int = _MAX_RAW_OUTPUT_BYTES, + ) -> tuple[bytes, int]: + if stream is None: + return b'', 0 + + chunks = bytearray() + total_size = 0 + while True: + chunk = await stream.read(65536) + if not chunk: + break + total_size += len(chunk) + remaining = limit - len(chunks) + if remaining > 0: + chunks.extend(chunk[:remaining]) + + return bytes(chunks), total_size + + def _format_cli_error(self, message: str) -> str: + message = ' '.join(message.split()) + if len(message) > 300: + message = f'{message[:297]}...' + return f'{self.name} backend error: {message}' + + +class PodmanBackend(CLISandboxBackend): + def __init__(self, logger: logging.Logger): + super().__init__(logger=logger, command='podman', backend_name='podman') + + +class DockerBackend(CLISandboxBackend): + def __init__(self, logger: logging.Logger): + super().__init__(logger=logger, command='docker', backend_name='docker') diff --git a/src/langbot_plugin/box/client.py b/src/langbot_plugin/box/client.py new file mode 100644 index 0000000..36a525a --- /dev/null +++ b/src/langbot_plugin/box/client.py @@ -0,0 +1,177 @@ +"""BoxRuntimeClient abstraction for Box Runtime access.""" + +from __future__ import annotations + +import abc +import logging +from typing import Any + +from langbot_plugin.runtime.io.handler import Handler + +from .actions import LangBotToBoxAction +from .errors import BoxError, BoxRuntimeUnavailableError +from .models import ( + BoxExecutionResult, + BoxExecutionStatus, + BoxManagedProcessInfo, + BoxManagedProcessSpec, + BoxSpec, +) + + +class BoxRuntimeClient(abc.ABC): + """Abstract interface that BoxService uses to talk to a Box Runtime.""" + + @abc.abstractmethod + async def initialize(self) -> None: ... + + @abc.abstractmethod + async def execute(self, spec: BoxSpec) -> BoxExecutionResult: ... + + @abc.abstractmethod + async def shutdown(self) -> None: ... + + @abc.abstractmethod + async def get_status(self) -> dict: ... + + @abc.abstractmethod + async def get_sessions(self) -> list[dict]: ... + + @abc.abstractmethod + async def get_backend_info(self) -> dict: ... + + @abc.abstractmethod + async def delete_session(self, session_id: str) -> None: ... + + @abc.abstractmethod + async def create_session(self, spec: BoxSpec) -> dict: ... + + @abc.abstractmethod + async def start_managed_process(self, session_id: str, spec: BoxManagedProcessSpec) -> BoxManagedProcessInfo: ... + + @abc.abstractmethod + async def get_managed_process(self, session_id: str) -> BoxManagedProcessInfo: ... + + @abc.abstractmethod + async def get_session(self, session_id: str) -> dict: ... + + +def _translate_action_error(exc: Exception) -> BoxError: + """Convert an ActionCallError message back into the appropriate BoxError subclass.""" + from .errors import ( + BoxBackendUnavailableError, + BoxManagedProcessConflictError, + BoxManagedProcessNotFoundError, + BoxSessionConflictError, + BoxSessionNotFoundError, + BoxValidationError, + ) + + msg = str(exc) + _ERROR_PREFIX_MAP: list[tuple[str, type[BoxError]]] = [ + ('BoxValidationError:', BoxValidationError), + ('BoxSessionNotFoundError:', BoxSessionNotFoundError), + ('BoxSessionConflictError:', BoxSessionConflictError), + ('BoxManagedProcessNotFoundError:', BoxManagedProcessNotFoundError), + ('BoxManagedProcessConflictError:', BoxManagedProcessConflictError), + ('BoxBackendUnavailableError:', BoxBackendUnavailableError), + ] + for prefix, cls in _ERROR_PREFIX_MAP: + if prefix in msg: + return cls(msg) + return BoxError(msg) + + +class ActionRPCBoxClient(BoxRuntimeClient): + """Client that talks to BoxRuntime via the action RPC protocol.""" + + def __init__(self, logger: logging.Logger): + self._logger = logger + self._handler: Handler | None = None + + @property + def handler(self) -> Handler: + if self._handler is None: + raise BoxRuntimeUnavailableError('box runtime not connected') + return self._handler + + def set_handler(self, handler: Handler) -> None: + self._handler = handler + + async def _call(self, action: LangBotToBoxAction, data: dict[str, Any], timeout: float = 15.0) -> dict[str, Any]: + try: + return await self.handler.call_action(action, data, timeout=timeout) + except BoxRuntimeUnavailableError: + raise + except Exception as exc: + raise _translate_action_error(exc) from exc + + async def initialize(self) -> None: + try: + await self._call(LangBotToBoxAction.HEALTH, {}) + self._logger.info('LangBot Box runtime connected via action RPC.') + except Exception as exc: + raise BoxRuntimeUnavailableError(f'box runtime unavailable: {exc}') from exc + + async def execute(self, spec: BoxSpec) -> BoxExecutionResult: + data = await self._call(LangBotToBoxAction.EXEC, spec.model_dump(mode='json'), timeout=300.0) + return BoxExecutionResult( + session_id=data['session_id'], + backend_name=data['backend_name'], + status=BoxExecutionStatus(data['status']), + exit_code=data.get('exit_code'), + stdout=data.get('stdout', ''), + stderr=data.get('stderr', ''), + duration_ms=data['duration_ms'], + ) + + async def shutdown(self) -> None: + if self._handler is not None: + try: + await self._call(LangBotToBoxAction.SHUTDOWN, {}) + except Exception: + pass + self._handler = None + + async def get_status(self) -> dict: + return await self._call(LangBotToBoxAction.STATUS, {}) + + async def get_sessions(self) -> list[dict]: + data = await self._call(LangBotToBoxAction.GET_SESSIONS, {}) + return data['sessions'] + + async def get_session(self, session_id: str) -> dict: + return await self._call(LangBotToBoxAction.GET_SESSION, {'session_id': session_id}) + + async def get_backend_info(self) -> dict: + return await self._call(LangBotToBoxAction.GET_BACKEND_INFO, {}) + + async def delete_session(self, session_id: str) -> None: + await self._call(LangBotToBoxAction.DELETE_SESSION, {'session_id': session_id}) + + async def create_session(self, spec: BoxSpec) -> dict: + return await self._call(LangBotToBoxAction.CREATE_SESSION, spec.model_dump(mode='json')) + + async def start_managed_process(self, session_id: str, spec: BoxManagedProcessSpec) -> BoxManagedProcessInfo: + data = await self._call( + LangBotToBoxAction.START_MANAGED_PROCESS, + {'session_id': session_id, 'spec': spec.model_dump(mode='json')}, + ) + return BoxManagedProcessInfo.model_validate(data) + + async def get_managed_process(self, session_id: str) -> BoxManagedProcessInfo: + data = await self._call(LangBotToBoxAction.GET_MANAGED_PROCESS, {'session_id': session_id}) + return BoxManagedProcessInfo.model_validate(data) + + def get_managed_process_websocket_url(self, session_id: str, ws_relay_base_url: str) -> str: + base = ws_relay_base_url + if base.startswith('https://'): + scheme = 'wss://' + suffix = base[len('https://') :] + elif base.startswith('http://'): + scheme = 'ws://' + suffix = base[len('http://') :] + else: + scheme = 'ws://' + suffix = base + return f'{scheme}{suffix}/v1/sessions/{session_id}/managed-process/ws' diff --git a/src/langbot_plugin/box/errors.py b/src/langbot_plugin/box/errors.py new file mode 100644 index 0000000..f6a8e86 --- /dev/null +++ b/src/langbot_plugin/box/errors.py @@ -0,0 +1,33 @@ +from __future__ import annotations + + +class BoxError(RuntimeError): + """Base error for LangBot Box failures.""" + + +class BoxValidationError(BoxError): + """Raised when sandbox_exec arguments are invalid.""" + + +class BoxBackendUnavailableError(BoxError): + """Raised when no supported container backend is available.""" + + +class BoxRuntimeUnavailableError(BoxError): + """Raised when the standalone Box Runtime service is unavailable.""" + + +class BoxSessionConflictError(BoxError): + """Raised when an existing session cannot satisfy a new request.""" + + +class BoxSessionNotFoundError(BoxError): + """Raised when a referenced session does not exist.""" + + +class BoxManagedProcessConflictError(BoxError): + """Raised when a session already has an active managed process.""" + + +class BoxManagedProcessNotFoundError(BoxError): + """Raised when a referenced managed process does not exist.""" diff --git a/src/langbot_plugin/box/models.py b/src/langbot_plugin/box/models.py new file mode 100644 index 0000000..90496ca --- /dev/null +++ b/src/langbot_plugin/box/models.py @@ -0,0 +1,267 @@ +from __future__ import annotations + +import datetime as dt +import enum + +import pydantic + + +DEFAULT_BOX_IMAGE = 'python:3.11-slim' +DEFAULT_BOX_MOUNT_PATH = '/workspace' + + +class BoxNetworkMode(str, enum.Enum): + OFF = 'off' + ON = 'on' + + +class BoxExecutionStatus(str, enum.Enum): + COMPLETED = 'completed' + TIMED_OUT = 'timed_out' + + +class BoxHostMountMode(str, enum.Enum): + NONE = 'none' + READ_ONLY = 'ro' + READ_WRITE = 'rw' + + +class BoxManagedProcessStatus(str, enum.Enum): + RUNNING = 'running' + EXITED = 'exited' + + +class BoxSpec(pydantic.BaseModel): + cmd: str = '' + workdir: str = '/workspace' + timeout_sec: int = 30 + network: BoxNetworkMode = BoxNetworkMode.OFF + session_id: str + env: dict[str, str] = pydantic.Field(default_factory=dict) + image: str = DEFAULT_BOX_IMAGE + host_path: str | None = None + host_path_mode: BoxHostMountMode = BoxHostMountMode.READ_WRITE + # Resource limits + cpus: float = 1.0 + memory_mb: int = 512 + pids_limit: int = 128 + read_only_rootfs: bool = True + + @pydantic.field_validator('cmd') + @classmethod + def validate_cmd(cls, value: str) -> str: + return value.strip() + + @pydantic.field_validator('workdir') + @classmethod + def validate_workdir(cls, value: str) -> str: + value = value.strip() + if not value.startswith('/'): + raise ValueError('workdir must be an absolute path inside the sandbox') + return value + + @pydantic.field_validator('timeout_sec') + @classmethod + def validate_timeout_sec(cls, value: int) -> int: + if value <= 0: + raise ValueError('timeout_sec must be greater than 0') + return value + + @pydantic.field_validator('cpus') + @classmethod + def validate_cpus(cls, value: float) -> float: + if value <= 0: + raise ValueError('cpus must be greater than 0') + return value + + @pydantic.field_validator('memory_mb') + @classmethod + def validate_memory_mb(cls, value: int) -> int: + if value < 32: + raise ValueError('memory_mb must be at least 32') + return value + + @pydantic.field_validator('pids_limit') + @classmethod + def validate_pids_limit(cls, value: int) -> int: + if value < 1: + raise ValueError('pids_limit must be at least 1') + return value + + @pydantic.field_validator('session_id') + @classmethod + def validate_session_id(cls, value: str) -> str: + value = value.strip() + if not value: + raise ValueError('session_id must not be empty') + return value + + @pydantic.field_validator('env') + @classmethod + def validate_env(cls, value: dict[str, str]) -> dict[str, str]: + return {str(k): str(v) for k, v in value.items()} + + @pydantic.field_validator('host_path') + @classmethod + def validate_host_path(cls, value: str | None) -> str | None: + if value is None: + return None + value = value.strip() + if not value.startswith('/'): + raise ValueError('host_path must be an absolute host path') + return value + + @pydantic.model_validator(mode='after') + def validate_host_mount_consistency(self) -> 'BoxSpec': + if self.host_path is None: + return self + if self.host_path_mode == BoxHostMountMode.NONE: + return self + if not self.workdir.startswith(DEFAULT_BOX_MOUNT_PATH): + raise ValueError('workdir must stay under /workspace when host_path is provided') + return self + + +class BoxProfile(pydantic.BaseModel): + """Preset sandbox configuration. + + Provides default values for BoxSpec fields and optionally locks fields + so that tool-call parameters cannot override them. + """ + + name: str + image: str = DEFAULT_BOX_IMAGE + network: BoxNetworkMode = BoxNetworkMode.OFF + timeout_sec: int = 30 + host_path_mode: BoxHostMountMode = BoxHostMountMode.READ_WRITE + max_timeout_sec: int = 120 + # Resource limits + cpus: float = 1.0 + memory_mb: int = 512 + pids_limit: int = 128 + read_only_rootfs: bool = True + locked: frozenset[str] = frozenset() + + model_config = pydantic.ConfigDict(frozen=True) + + +BUILTIN_PROFILES: dict[str, BoxProfile] = { + 'default': BoxProfile( + name='default', + network=BoxNetworkMode.OFF, + host_path_mode=BoxHostMountMode.READ_WRITE, + cpus=1.0, + memory_mb=512, + pids_limit=128, + read_only_rootfs=True, + max_timeout_sec=120, + ), + 'offline_readonly': BoxProfile( + name='offline_readonly', + network=BoxNetworkMode.OFF, + host_path_mode=BoxHostMountMode.READ_ONLY, + cpus=0.5, + memory_mb=256, + pids_limit=64, + read_only_rootfs=True, + max_timeout_sec=60, + locked=frozenset({'network', 'host_path_mode', 'read_only_rootfs'}), + ), + 'network_basic': BoxProfile( + name='network_basic', + network=BoxNetworkMode.ON, + host_path_mode=BoxHostMountMode.READ_WRITE, + cpus=1.0, + memory_mb=512, + pids_limit=128, + read_only_rootfs=True, + max_timeout_sec=120, + ), + 'network_extended': BoxProfile( + name='network_extended', + network=BoxNetworkMode.ON, + host_path_mode=BoxHostMountMode.READ_WRITE, + cpus=2.0, + memory_mb=1024, + pids_limit=256, + read_only_rootfs=False, + max_timeout_sec=300, + ), +} + + +class BoxSessionInfo(pydantic.BaseModel): + session_id: str + backend_name: str + backend_session_id: str + image: str + network: BoxNetworkMode + host_path: str | None = None + host_path_mode: BoxHostMountMode = BoxHostMountMode.READ_WRITE + cpus: float = 1.0 + memory_mb: int = 512 + pids_limit: int = 128 + read_only_rootfs: bool = True + created_at: dt.datetime + last_used_at: dt.datetime + + +class BoxManagedProcessSpec(pydantic.BaseModel): + command: str + args: list[str] = pydantic.Field(default_factory=list) + env: dict[str, str] = pydantic.Field(default_factory=dict) + cwd: str = '/workspace' + + @pydantic.field_validator('command') + @classmethod + def validate_command(cls, value: str) -> str: + value = value.strip() + if not value: + raise ValueError('command must not be empty') + return value + + @pydantic.field_validator('args') + @classmethod + def validate_args(cls, value: list[str]) -> list[str]: + return [str(item) for item in value] + + @pydantic.field_validator('env') + @classmethod + def validate_env(cls, value: dict[str, str]) -> dict[str, str]: + return {str(k): str(v) for k, v in value.items()} + + @pydantic.field_validator('cwd') + @classmethod + def validate_cwd(cls, value: str) -> str: + value = value.strip() + if not value.startswith('/'): + raise ValueError('cwd must be an absolute path inside the sandbox') + return value + + +class BoxManagedProcessInfo(pydantic.BaseModel): + session_id: str + status: BoxManagedProcessStatus + command: str + args: list[str] + cwd: str + env_keys: list[str] + attached: bool = False + started_at: dt.datetime + exited_at: dt.datetime | None = None + exit_code: int | None = None + stderr_preview: str = '' + + +class BoxExecutionResult(pydantic.BaseModel): + session_id: str + backend_name: str + status: BoxExecutionStatus + exit_code: int | None + stdout: str = '' + stderr: str = '' + duration_ms: int + + @property + def ok(self) -> bool: + return self.status == BoxExecutionStatus.COMPLETED and self.exit_code == 0 diff --git a/src/langbot_plugin/box/runtime.py b/src/langbot_plugin/box/runtime.py new file mode 100644 index 0000000..36f8c13 --- /dev/null +++ b/src/langbot_plugin/box/runtime.py @@ -0,0 +1,386 @@ +from __future__ import annotations + +import asyncio +import collections +import dataclasses +import datetime as dt +import logging +import uuid + +from .backend import BaseSandboxBackend, DockerBackend, PodmanBackend +from .errors import ( + BoxBackendUnavailableError, + BoxManagedProcessConflictError, + BoxManagedProcessNotFoundError, + BoxSessionConflictError, + BoxSessionNotFoundError, + BoxValidationError, +) +from .models import ( + BoxExecutionResult, + BoxExecutionStatus, + BoxManagedProcessInfo, + BoxManagedProcessSpec, + BoxManagedProcessStatus, + BoxSessionInfo, + BoxSpec, +) + +_UTC = dt.timezone.utc +_MANAGED_PROCESS_STDERR_PREVIEW_LIMIT = 4000 + + +@dataclasses.dataclass(slots=True) +class _ManagedProcess: + spec: BoxManagedProcessSpec + process: asyncio.subprocess.Process + started_at: dt.datetime + attach_lock: asyncio.Lock + stderr_chunks: collections.deque[str] + stderr_total_len: int = 0 + exit_code: int | None = None + exited_at: dt.datetime | None = None + + @property + def is_running(self) -> bool: + return self.exit_code is None and self.process.returncode is None + + +@dataclasses.dataclass(slots=True) +class _RuntimeSession: + info: BoxSessionInfo + lock: asyncio.Lock + managed_process: _ManagedProcess | None = None + + +class BoxRuntime: + def __init__( + self, + logger: logging.Logger, + backends: list[BaseSandboxBackend] | None = None, + session_ttl_sec: int = 300, + ): + self.logger = logger + self.backends = backends or [PodmanBackend(logger), DockerBackend(logger)] + self.session_ttl_sec = session_ttl_sec + self._backend: BaseSandboxBackend | None = None + self._sessions: dict[str, _RuntimeSession] = {} + self._lock = asyncio.Lock() + self.instance_id = uuid.uuid4().hex[:12] + + async def initialize(self): + self._backend = await self._select_backend() + if self._backend is not None: + self._backend.instance_id = self.instance_id + try: + await self._backend.cleanup_orphaned_containers(self.instance_id) + except Exception as exc: + self.logger.warning(f'LangBot Box orphan container cleanup failed: {exc}') + + async def execute(self, spec: BoxSpec) -> BoxExecutionResult: + if not spec.cmd: + raise BoxValidationError('cmd must not be empty') + session = await self._get_or_create_session(spec) + + async with session.lock: + self.logger.info( + 'LangBot Box execute: ' + f'session_id={spec.session_id} ' + f'backend_session_id={session.info.backend_session_id} ' + f'backend={session.info.backend_name} ' + f'workdir={spec.workdir} ' + f'timeout_sec={spec.timeout_sec}' + ) + result = await (await self._get_backend()).exec(session.info, spec) + + async with self._lock: + now = dt.datetime.now(_UTC) + if spec.session_id in self._sessions: + self._sessions[spec.session_id].info.last_used_at = now + + if result.status == BoxExecutionStatus.TIMED_OUT: + await self._drop_session_locked(spec.session_id) + + return result + + async def shutdown(self): + async with self._lock: + session_ids = list(self._sessions.keys()) + for session_id in session_ids: + await self._drop_session_locked(session_id) + + async def create_session(self, spec: BoxSpec) -> dict: + session = await self._get_or_create_session(spec) + return self._session_to_dict(session.info) + + async def delete_session(self, session_id: str) -> None: + async with self._lock: + if session_id not in self._sessions: + raise BoxSessionNotFoundError(f'session {session_id} not found') + await self._drop_session_locked(session_id) + + async def start_managed_process(self, session_id: str, spec: BoxManagedProcessSpec) -> dict: + async with self._lock: + runtime_session = self._sessions.get(session_id) + if runtime_session is None: + raise BoxSessionNotFoundError(f'session {session_id} not found') + + async with runtime_session.lock: + existing = runtime_session.managed_process + if existing is not None and existing.is_running: + raise BoxManagedProcessConflictError(f'session {session_id} already has a managed process') + + backend = await self._get_backend() + process = await backend.start_managed_process(runtime_session.info, spec) + managed_process = _ManagedProcess( + spec=spec, + process=process, + started_at=dt.datetime.now(_UTC), + attach_lock=asyncio.Lock(), + stderr_chunks=collections.deque(), + ) + runtime_session.managed_process = managed_process + runtime_session.info.last_used_at = dt.datetime.now(_UTC) + asyncio.create_task(self._drain_managed_process_stderr(runtime_session.info.session_id, managed_process)) + asyncio.create_task(self._watch_managed_process(runtime_session.info.session_id, managed_process)) + return self._managed_process_to_dict(runtime_session.info.session_id, managed_process) + + def get_managed_process(self, session_id: str) -> dict: + runtime_session = self._sessions.get(session_id) + if runtime_session is None: + raise BoxSessionNotFoundError(f'session {session_id} not found') + if runtime_session.managed_process is None: + raise BoxManagedProcessNotFoundError(f'session {session_id} has no managed process') + return self._managed_process_to_dict(session_id, runtime_session.managed_process) + + # ── Observability ───────────────────────────────────────────────── + + async def get_backend_info(self) -> dict: + backend = self._backend + if backend is None: + return {'name': None, 'available': False} + try: + available = await backend.is_available() + except Exception: + available = False + return {'name': backend.name, 'available': available} + + def get_sessions(self) -> list[dict]: + return [self._session_to_dict(s.info) for s in self._sessions.values()] + + def get_session(self, session_id: str) -> dict: + runtime_session = self._sessions.get(session_id) + if runtime_session is None: + raise BoxSessionNotFoundError(f'session {session_id} not found') + result = self._session_to_dict(runtime_session.info) + if runtime_session.managed_process is not None: + result['managed_process'] = self._managed_process_to_dict(session_id, runtime_session.managed_process) + return result + + async def get_status(self) -> dict: + backend_info = await self.get_backend_info() + return { + 'backend': backend_info, + 'active_sessions': len(self._sessions), + 'managed_processes': sum( + 1 + for runtime_session in self._sessions.values() + if runtime_session.managed_process is not None and runtime_session.managed_process.is_running + ), + 'session_ttl_sec': self.session_ttl_sec, + } + + async def _get_or_create_session(self, spec: BoxSpec) -> _RuntimeSession: + async with self._lock: + await self._reap_expired_sessions_locked() + + existing = self._sessions.get(spec.session_id) + if existing is not None: + self._assert_session_compatible(existing.info, spec) + existing.info.last_used_at = dt.datetime.now(_UTC) + self.logger.info( + 'LangBot Box session reused: ' + f'session_id={spec.session_id} ' + f'backend_session_id={existing.info.backend_session_id} ' + f'backend={existing.info.backend_name}' + ) + return existing + + backend = await self._get_backend() + info = await backend.start_session(spec) + runtime_session = _RuntimeSession(info=info, lock=asyncio.Lock()) + self._sessions[spec.session_id] = runtime_session + self.logger.info( + 'LangBot Box session created: ' + f'session_id={spec.session_id} ' + f'backend_session_id={info.backend_session_id} ' + f'backend={info.backend_name} ' + f'image={info.image} ' + f'network={info.network.value} ' + f'host_path={info.host_path} ' + f'host_path_mode={info.host_path_mode.value}' + ) + return runtime_session + + async def _get_backend(self) -> BaseSandboxBackend: + if self._backend is None: + self._backend = await self._select_backend() + if self._backend is None: + raise BoxBackendUnavailableError( + 'LangBot Box backend unavailable. Install and start Podman or Docker before using sandbox_exec.' + ) + return self._backend + + async def _select_backend(self) -> BaseSandboxBackend | None: + for backend in self.backends: + try: + await backend.initialize() + if await backend.is_available(): + self.logger.info(f'LangBot Box using backend: {backend.name}') + return backend + except Exception as exc: + self.logger.warning(f'LangBot Box backend {backend.name} probe failed: {exc}') + + self.logger.warning('LangBot Box backend unavailable: neither Podman nor Docker is ready') + return None + + async def _reap_expired_sessions_locked(self): + if self.session_ttl_sec <= 0: + return + + deadline = dt.datetime.now(_UTC) - dt.timedelta(seconds=self.session_ttl_sec) + expired_session_ids = [ + session_id + for session_id, session in self._sessions.items() + if session.info.last_used_at < deadline + and not (session.managed_process is not None and session.managed_process.is_running) + ] + + for session_id in expired_session_ids: + await self._drop_session_locked(session_id) + + async def _drop_session_locked(self, session_id: str): + runtime_session = self._sessions.pop(session_id, None) + if runtime_session is None or self._backend is None: + return + + await self._terminate_managed_process(runtime_session) + + try: + self.logger.info( + 'LangBot Box session cleanup: ' + f'session_id={session_id} ' + f'backend_session_id={runtime_session.info.backend_session_id} ' + f'backend={runtime_session.info.backend_name}' + ) + await self._backend.stop_session(runtime_session.info) + except Exception as exc: + self.logger.warning(f'Failed to clean up box session {session_id}: {exc}') + + def _assert_session_compatible(self, session: BoxSessionInfo, spec: BoxSpec): + _COMPAT_FIELDS = ( + 'network', + 'image', + 'host_path', + 'host_path_mode', + 'cpus', + 'memory_mb', + 'pids_limit', + 'read_only_rootfs', + ) + for field in _COMPAT_FIELDS: + session_val = getattr(session, field) + spec_val = getattr(spec, field) + if session_val != spec_val: + display = session_val.value if hasattr(session_val, 'value') else session_val + raise BoxSessionConflictError( + f'sandbox_exec session {spec.session_id} already exists with {field}={display}' + ) + + async def _drain_managed_process_stderr(self, session_id: str, managed_process: _ManagedProcess) -> None: + stream = managed_process.process.stderr + if stream is None: + return + + try: + while True: + chunk = await stream.readline() + if not chunk: + break + text = chunk.decode('utf-8', errors='replace').rstrip() + if not text: + continue + managed_process.stderr_chunks.append(text) + managed_process.stderr_total_len += len(text) + 1 # +1 for '\n' separator + while ( + managed_process.stderr_total_len > _MANAGED_PROCESS_STDERR_PREVIEW_LIMIT + and managed_process.stderr_chunks + ): + removed = managed_process.stderr_chunks.popleft() + managed_process.stderr_total_len -= len(removed) + 1 + self.logger.info(f'LangBot Box managed process stderr: session_id={session_id} {text}') + except Exception as exc: + self.logger.warning(f'Failed to drain managed process stderr for {session_id}: {exc}') + + async def _watch_managed_process(self, session_id: str, managed_process: _ManagedProcess) -> None: + return_code = await managed_process.process.wait() + managed_process.exit_code = return_code + managed_process.exited_at = dt.datetime.now(_UTC) + runtime_session = self._sessions.get(session_id) + if runtime_session is not None: + runtime_session.info.last_used_at = managed_process.exited_at + self.logger.info(f'LangBot Box managed process exited: session_id={session_id} return_code={return_code}') + + async def _terminate_managed_process(self, runtime_session: _RuntimeSession) -> None: + managed_process = runtime_session.managed_process + if managed_process is None or not managed_process.is_running: + return + + process = managed_process.process + try: + if process.stdin is not None: + process.stdin.close() + except Exception: + pass + + try: + await asyncio.wait_for(asyncio.shield(process.wait()), timeout=5) + except asyncio.TimeoutError: + if process.returncode is None: + try: + process.terminate() + except ProcessLookupError: + pass + try: + await asyncio.wait_for(asyncio.shield(process.wait()), timeout=5) + except asyncio.TimeoutError: + if process.returncode is None: + try: + process.kill() + except ProcessLookupError: + pass + await process.wait() + finally: + managed_process.exit_code = process.returncode + managed_process.exited_at = dt.datetime.now(_UTC) + + def _managed_process_to_dict(self, session_id: str, managed_process: _ManagedProcess) -> dict: + stderr_preview = '\n'.join(managed_process.stderr_chunks) + status = BoxManagedProcessStatus.RUNNING if managed_process.is_running else BoxManagedProcessStatus.EXITED + return BoxManagedProcessInfo( + session_id=session_id, + status=status, + command=managed_process.spec.command, + args=managed_process.spec.args, + cwd=managed_process.spec.cwd, + env_keys=sorted(managed_process.spec.env.keys()), + attached=managed_process.attach_lock.locked(), + started_at=managed_process.started_at, + exited_at=managed_process.exited_at, + exit_code=managed_process.exit_code, + stderr_preview=stderr_preview, + ).model_dump(mode='json') + + @staticmethod + def _session_to_dict(info: BoxSessionInfo) -> dict: + return info.model_dump(mode='json') diff --git a/src/langbot_plugin/box/security.py b/src/langbot_plugin/box/security.py new file mode 100644 index 0000000..d5a8c51 --- /dev/null +++ b/src/langbot_plugin/box/security.py @@ -0,0 +1,35 @@ +from __future__ import annotations + +import os + +from .errors import BoxValidationError +from .models import BoxSpec + +BLOCKED_HOST_PATHS = frozenset( + { + '/etc', + '/proc', + '/sys', + '/dev', + '/root', + '/boot', + '/run', + '/var/run', + '/run/docker.sock', + '/var/run/docker.sock', + '/run/podman', + '/var/run/podman', + } +) + + +def validate_sandbox_security(spec: BoxSpec) -> None: + """Validate that a BoxSpec does not request dangerous container config. + + Raises BoxValidationError when the spec contains a blocked host_path. + """ + if spec.host_path: + real = os.path.realpath(spec.host_path) + for blocked in BLOCKED_HOST_PATHS: + if real == blocked or real.startswith(blocked + '/'): + raise BoxValidationError(f'host_path {spec.host_path} is blocked for security') diff --git a/src/langbot_plugin/box/server.py b/src/langbot_plugin/box/server.py new file mode 100644 index 0000000..0690b8f --- /dev/null +++ b/src/langbot_plugin/box/server.py @@ -0,0 +1,267 @@ +"""Standalone Box Runtime service exposing BoxRuntime via action RPC. + +Usage (stdio, launched by LangBot as subprocess): + python -m langbot_plugin.box.server + +Usage (ws + ws relay, for remote/docker mode): + python -m langbot_plugin.box.server --port 5410 +""" + +from __future__ import annotations + +import argparse +import asyncio +import datetime as dt +import logging +import sys +from typing import Any + +import pydantic +from aiohttp import web + +from langbot_plugin.entities.io.actions.enums import CommonAction +from langbot_plugin.entities.io.resp import ActionResponse +from langbot_plugin.runtime.io.connection import Connection +from langbot_plugin.runtime.io.handler import Handler + +from .actions import LangBotToBoxAction +from .errors import ( + BoxManagedProcessConflictError, + BoxManagedProcessNotFoundError, + BoxSessionNotFoundError, +) +from .models import BoxExecutionResult, BoxManagedProcessSpec, BoxSpec +from .runtime import BoxRuntime + +logger = logging.getLogger('langbot.box.server') + + +def _result_to_dict(result: BoxExecutionResult) -> dict: + return result.model_dump(mode='json') + + +class BoxServerHandler(Handler): + """Server-side handler that registers box actions backed by BoxRuntime.""" + + name = 'BoxServerHandler' + + def __init__(self, connection: Connection, runtime: BoxRuntime): + super().__init__(connection) + self._runtime = runtime + self._register_actions() + + def _register_actions(self) -> None: + @self.action(CommonAction.PING) + async def ping(data: dict[str, Any]) -> ActionResponse: + return ActionResponse.success({}) + + @self.action(LangBotToBoxAction.HEALTH) + async def health(data: dict[str, Any]) -> ActionResponse: + info = await self._runtime.get_backend_info() + return ActionResponse.success(info) + + @self.action(LangBotToBoxAction.STATUS) + async def status(data: dict[str, Any]) -> ActionResponse: + result = await self._runtime.get_status() + return ActionResponse.success(result) + + @self.action(LangBotToBoxAction.EXEC) + async def exec_cmd(data: dict[str, Any]) -> ActionResponse: + try: + spec = BoxSpec.model_validate(data) + except pydantic.ValidationError as exc: + return ActionResponse.error(f'BoxValidationError: {exc}') + result = await self._runtime.execute(spec) + return ActionResponse.success(_result_to_dict(result)) + + @self.action(LangBotToBoxAction.CREATE_SESSION) + async def create_session(data: dict[str, Any]) -> ActionResponse: + try: + spec = BoxSpec.model_validate(data) + except pydantic.ValidationError as exc: + return ActionResponse.error(f'BoxValidationError: {exc}') + info = await self._runtime.create_session(spec) + return ActionResponse.success(info) + + @self.action(LangBotToBoxAction.GET_SESSION) + async def get_session(data: dict[str, Any]) -> ActionResponse: + return ActionResponse.success(self._runtime.get_session(data['session_id'])) + + @self.action(LangBotToBoxAction.GET_SESSIONS) + async def get_sessions(data: dict[str, Any]) -> ActionResponse: + return ActionResponse.success({'sessions': self._runtime.get_sessions()}) + + @self.action(LangBotToBoxAction.DELETE_SESSION) + async def delete_session(data: dict[str, Any]) -> ActionResponse: + await self._runtime.delete_session(data['session_id']) + return ActionResponse.success({'deleted': data['session_id']}) + + @self.action(LangBotToBoxAction.START_MANAGED_PROCESS) + async def start_managed_process(data: dict[str, Any]) -> ActionResponse: + session_id = data['session_id'] + try: + spec = BoxManagedProcessSpec.model_validate(data['spec']) + except pydantic.ValidationError as exc: + return ActionResponse.error(f'BoxValidationError: {exc}') + info = await self._runtime.start_managed_process(session_id, spec) + return ActionResponse.success(info) + + @self.action(LangBotToBoxAction.GET_MANAGED_PROCESS) + async def get_managed_process(data: dict[str, Any]) -> ActionResponse: + return ActionResponse.success(self._runtime.get_managed_process(data['session_id'])) + + @self.action(LangBotToBoxAction.GET_BACKEND_INFO) + async def get_backend_info(data: dict[str, Any]) -> ActionResponse: + info = await self._runtime.get_backend_info() + return ActionResponse.success(info) + + @self.action(LangBotToBoxAction.SHUTDOWN) + async def shutdown(data: dict[str, Any]) -> ActionResponse: + await self._runtime.shutdown() + return ActionResponse.success({}) + + +# ── Managed process WebSocket relay (aiohttp) ──────────────────────── + + +def _error_response(exc: Exception) -> web.Response: + return web.json_response( + {'error': {'code': type(exc).__name__, 'message': str(exc)}}, + status=400, + ) + + +async def handle_managed_process_ws(request: web.Request) -> web.StreamResponse: + runtime: BoxRuntime = request.app['runtime'] + session_id = request.match_info['session_id'] + + runtime_session = runtime._sessions.get(session_id) + if runtime_session is None: + return _error_response(BoxSessionNotFoundError(f'session {session_id} not found')) + + managed_process = runtime_session.managed_process + if managed_process is None: + return _error_response(BoxManagedProcessNotFoundError(f'session {session_id} has no managed process')) + if not managed_process.is_running: + return _error_response( + BoxManagedProcessConflictError(f'managed process in session {session_id} is not running') + ) + + ws = web.WebSocketResponse(protocols=('mcp',)) + await ws.prepare(request) + + async with managed_process.attach_lock: + process = managed_process.process + stdout = process.stdout + stdin = process.stdin + if stdout is None or stdin is None: + await ws.close(message=b'managed process stdio unavailable') + return ws + + async def _stdout_to_ws() -> None: + while True: + line = await stdout.readline() + if not line: + break + await ws.send_str(line.decode('utf-8', errors='replace').rstrip('\n')) + runtime_session.info.last_used_at = dt.datetime.now(dt.timezone.utc) + + async def _ws_to_stdin() -> None: + async for msg in ws: + if msg.type == web.WSMsgType.TEXT: + stdin.write((msg.data + '\n').encode('utf-8')) + await stdin.drain() + runtime_session.info.last_used_at = dt.datetime.now(dt.timezone.utc) + elif msg.type in ( + web.WSMsgType.CLOSE, + web.WSMsgType.CLOSING, + web.WSMsgType.CLOSED, + web.WSMsgType.ERROR, + ): + break + + stdout_task = asyncio.create_task(_stdout_to_ws()) + stdin_task = asyncio.create_task(_ws_to_stdin()) + try: + done, pending = await asyncio.wait( + [stdout_task, stdin_task], + return_when=asyncio.FIRST_COMPLETED, + ) + for task in pending: + task.cancel() + for task in done: + task.result() + finally: + await ws.close() + + return ws + + +def create_ws_relay_app(runtime: BoxRuntime) -> web.Application: + """Create a minimal aiohttp app that only serves the managed-process ws relay.""" + app = web.Application() + app['runtime'] = runtime + app.router.add_get('/v1/sessions/{session_id}/managed-process/ws', handle_managed_process_ws) + return app + + +# ── Entry point ────────────────────────────────────────────────────── + + +async def _run_server(host: str, port: int, mode: str) -> None: + runtime = BoxRuntime(logger=logger) + await runtime.initialize() + + # Start aiohttp for ws relay (non-fatal — managed process attach + # degrades gracefully if the port is unavailable). + runner: web.AppRunner | None = None + try: + ws_app = create_ws_relay_app(runtime) + runner = web.AppRunner(ws_app) + await runner.setup() + site = web.TCPSite(runner, host, port) + await site.start() + logger.info(f'Box ws relay listening on {host}:{port}') + except OSError as exc: + logger.warning(f'Box ws relay failed to bind {host}:{port}: {exc}') + logger.warning('Managed process WebSocket attach will be unavailable.') + + async def new_connection_callback(connection: Connection) -> None: + handler = BoxServerHandler(connection, runtime) + await handler.run() + + try: + if mode == 'stdio': + from langbot_plugin.runtime.io.controllers.stdio.server import StdioServerController + + ctrl = StdioServerController() + await ctrl.run(new_connection_callback) + else: + from langbot_plugin.runtime.io.controllers.ws.server import WebSocketServerController + + # Action RPC uses port+1 to avoid conflict with ws relay + rpc_port = port + 1 + logger.info(f'Box action RPC (ws) listening on {host}:{rpc_port}') + ctrl = WebSocketServerController(rpc_port) + await ctrl.run(new_connection_callback) + finally: + await runtime.shutdown() + if runner is not None: + await runner.cleanup() + + +def main() -> None: + parser = argparse.ArgumentParser(description='LangBot Box Runtime Service') + parser.add_argument('--host', default='0.0.0.0', help='Bind address') + parser.add_argument('--port', type=int, default=5410, help='Bind port (ws relay)') + parser.add_argument( + '--mode', choices=['stdio', 'ws'], default='stdio', help='Control channel transport (default: stdio)' + ) + args = parser.parse_args() + + logging.basicConfig(level=logging.INFO, stream=sys.stderr) + asyncio.run(_run_server(args.host, args.port, args.mode)) + + +if __name__ == '__main__': + main() diff --git a/src/langbot_plugin/cli/__init__.py b/src/langbot_plugin/cli/__init__.py index 7f913f2..b4f388b 100644 --- a/src/langbot_plugin/cli/__init__.py +++ b/src/langbot_plugin/cli/__init__.py @@ -33,6 +33,10 @@ - [--stdio-control -s]: Use stdio for control connection - [--ws-control-port]: The port for control connection - [--ws-debug-port]: The port for debug connection + box: Run the sandbox box runtime + - [--host]: Bind address, default is 0.0.0.0 + - [--port]: Bind port for ws relay, default is 5410 + - [--mode]: Control channel transport (stdio or ws), default is stdio """ @@ -120,6 +124,19 @@ def main(): help="Skip checking and installing dependencies for all installed plugins", ) + # box command + box_parser = subparsers.add_parser("box", help="Run the sandbox box runtime") + box_parser.add_argument( + "--host", default="0.0.0.0", help="Bind address" + ) + box_parser.add_argument( + "--port", type=int, default=5410, help="Bind port (ws relay)" + ) + box_parser.add_argument( + "--mode", choices=["stdio", "ws"], default="stdio", + help="Control channel transport (default: stdio)" + ) + args = parser.parse_args() if not args.command: @@ -148,6 +165,9 @@ def main(): publish_process() case "rt": runtime_app.main(args) + case "box": + from langbot_plugin.box.server import main as box_main + box_main() case _: cli_print("unknown_command", args.command) sys.exit(1) From 873848e914e73003655e55a8a135fbd960d9d9a7 Mon Sep 17 00:00:00 2001 From: youhuanghe <1051233107@qq.com> Date: Sun, 22 Mar 2026 07:39:55 +0000 Subject: [PATCH 02/34] refactor: add if --- src/langbot_plugin/box/__main__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/langbot_plugin/box/__main__.py b/src/langbot_plugin/box/__main__.py index c6144f0..6c41643 100644 --- a/src/langbot_plugin/box/__main__.py +++ b/src/langbot_plugin/box/__main__.py @@ -2,4 +2,6 @@ from .server import main -main() +if __name__ == "__main__": + main() + From 818bc55cb7607d25d52e35f2a0fd28b8dc5547d8 Mon Sep 17 00:00:00 2001 From: youhuanghe <1051233107@qq.com> Date: Sun, 22 Mar 2026 14:19:34 +0000 Subject: [PATCH 03/34] feat: add box backend nsjail --- src/langbot_plugin/box/nsjail_backend.py | 506 +++++++++++++++++++++++ src/langbot_plugin/box/runtime.py | 7 +- tests/box/__init__.py | 0 tests/box/test_nsjail_backend.py | 348 ++++++++++++++++ 4 files changed, 858 insertions(+), 3 deletions(-) create mode 100644 src/langbot_plugin/box/nsjail_backend.py create mode 100644 tests/box/__init__.py create mode 100644 tests/box/test_nsjail_backend.py diff --git a/src/langbot_plugin/box/nsjail_backend.py b/src/langbot_plugin/box/nsjail_backend.py new file mode 100644 index 0000000..4421667 --- /dev/null +++ b/src/langbot_plugin/box/nsjail_backend.py @@ -0,0 +1,506 @@ +from __future__ import annotations + +import asyncio +import datetime as dt +import json +import logging +import os +import pathlib +import shlex +import shutil +import signal +import uuid + +from .backend import BaseSandboxBackend, _CommandResult, _MAX_RAW_OUTPUT_BYTES +from .errors import BoxError +from .models import ( + DEFAULT_BOX_MOUNT_PATH, + BoxExecutionResult, + BoxExecutionStatus, + BoxHostMountMode, + BoxNetworkMode, + BoxSessionInfo, + BoxSpec, +) +from .security import validate_sandbox_security + +# System directories to mount read-only inside the sandbox. +# Only well-known paths needed for running Python/Node/shell commands. +_READONLY_SYSTEM_MOUNTS: list[str] = [ + '/usr', + '/lib', + '/lib64', + '/bin', + '/sbin', +] + +# Specific /etc entries required for dynamic linking and TLS. +_READONLY_ETC_ENTRIES: list[str] = [ + '/etc/alternatives', + '/etc/ld.so.cache', + '/etc/ld.so.conf', + '/etc/ld.so.conf.d', + '/etc/ssl/certs', + '/etc/localtime', + '/etc/resolv.conf', # needed when network=ON +] + +_DEFAULT_BASE_DIR = '/tmp/langbot-box-nsjail' + + +class NsjailBackend(BaseSandboxBackend): + """Lightweight sandbox backend using nsjail. + + Each ``exec`` invocation spawns an independent nsjail process. Session + state (workspace files) persists via a shared host directory that is + bind-mounted into every invocation. + """ + + name = 'nsjail' + + def __init__( + self, + logger: logging.Logger, + nsjail_bin: str = 'nsjail', + base_dir: str = _DEFAULT_BASE_DIR, + ): + super().__init__(logger) + self._nsjail_bin = nsjail_bin + self._base_dir = pathlib.Path(base_dir) + self._cgroup_v2_available: bool = False + + # ── lifecycle ───────────────────────────────────────────────────── + + async def is_available(self) -> bool: + if shutil.which(self._nsjail_bin) is None: + self.logger.info('nsjail binary not found in PATH') + return False + + # Quick sanity check – nsjail --help exits 0. + try: + proc = await asyncio.create_subprocess_exec( + self._nsjail_bin, '--help', + stdout=asyncio.subprocess.DEVNULL, + stderr=asyncio.subprocess.DEVNULL, + ) + await asyncio.wait_for(proc.wait(), timeout=5) + if proc.returncode != 0: + self.logger.info('nsjail --help returned non-zero') + return False + except Exception as exc: + self.logger.info(f'nsjail probe failed: {exc}') + return False + + self._cgroup_v2_available = self._detect_cgroup_v2() + if not self._cgroup_v2_available: + self.logger.warning( + 'cgroup v2 not available for nsjail; ' + 'falling back to rlimit-based resource limits' + ) + + self._base_dir.mkdir(parents=True, exist_ok=True) + return True + + async def start_session(self, spec: BoxSpec) -> BoxSessionInfo: + validate_sandbox_security(spec) + + now = dt.datetime.now(dt.UTC) + session_dir_name = f'{self.instance_id}_{spec.session_id}_{uuid.uuid4().hex[:8]}' + session_dir = self._base_dir / session_dir_name + + # Per-session writable directories. + workspace_dir = session_dir / 'workspace' + tmp_dir = session_dir / 'tmp' + home_dir = session_dir / 'home' + + for d in (workspace_dir, tmp_dir, home_dir): + d.mkdir(parents=True, exist_ok=True) + + # If host_path is specified, we will use it directly instead of the + # per-session workspace when building nsjail args (see _build_mounts). + meta = { + 'session_id': spec.session_id, + 'instance_id': self.instance_id, + 'host_path': spec.host_path, + 'host_path_mode': spec.host_path_mode.value if spec.host_path else None, + 'network': spec.network.value, + 'cpus': spec.cpus, + 'memory_mb': spec.memory_mb, + 'pids_limit': spec.pids_limit, + 'created_at': now.isoformat(), + } + (session_dir / 'meta.json').write_text(json.dumps(meta, indent=2)) + + self.logger.info( + f'LangBot Box backend start_session: backend=nsjail ' + f'session_id={spec.session_id} session_dir={session_dir} ' + f'network={spec.network.value} ' + f'host_path={spec.host_path} host_path_mode={spec.host_path_mode.value} ' + f'cpus={spec.cpus} memory_mb={spec.memory_mb} pids_limit={spec.pids_limit}' + ) + + return BoxSessionInfo( + session_id=spec.session_id, + backend_name=self.name, + backend_session_id=str(session_dir), + image='host', + network=spec.network, + host_path=spec.host_path, + host_path_mode=spec.host_path_mode, + cpus=spec.cpus, + memory_mb=spec.memory_mb, + pids_limit=spec.pids_limit, + read_only_rootfs=True, # always true for nsjail + created_at=now, + last_used_at=now, + ) + + async def exec(self, session: BoxSessionInfo, spec: BoxSpec) -> BoxExecutionResult: + start = dt.datetime.now(dt.UTC) + session_dir = pathlib.Path(session.backend_session_id) + + args = self._build_nsjail_args(session, spec, session_dir) + + cmd_preview = spec.cmd.strip() + if len(cmd_preview) > 400: + cmd_preview = f'{cmd_preview[:397]}...' + self.logger.info( + f'LangBot Box backend exec: backend=nsjail ' + f'session_id={session.session_id} session_dir={session_dir} ' + f'workdir={spec.workdir} timeout_sec={spec.timeout_sec} ' + f'env_keys={sorted(spec.env.keys())} cmd={cmd_preview}' + ) + + result = await self._run_nsjail(args, timeout_sec=spec.timeout_sec) + duration_ms = int((dt.datetime.now(dt.UTC) - start).total_seconds() * 1000) + + if result.timed_out: + return BoxExecutionResult( + session_id=session.session_id, + backend_name=self.name, + status=BoxExecutionStatus.TIMED_OUT, + exit_code=None, + stdout=result.stdout, + stderr=result.stderr or f'Command timed out after {spec.timeout_sec} seconds.', + duration_ms=duration_ms, + ) + + return BoxExecutionResult( + session_id=session.session_id, + backend_name=self.name, + status=BoxExecutionStatus.COMPLETED, + exit_code=result.return_code, + stdout=result.stdout, + stderr=result.stderr, + duration_ms=duration_ms, + ) + + async def stop_session(self, session: BoxSessionInfo): + session_dir = pathlib.Path(session.backend_session_id) + self.logger.info( + f'LangBot Box backend stop_session: backend=nsjail ' + f'session_id={session.session_id} session_dir={session_dir}' + ) + + # Kill any lingering nsjail processes whose cwd is inside session_dir. + await self._kill_session_processes(session_dir) + + try: + if session_dir.exists(): + shutil.rmtree(session_dir) + except Exception as exc: + self.logger.warning(f'Failed to remove nsjail session dir {session_dir}: {exc}') + + async def start_managed_process( + self, session: BoxSessionInfo, spec + ) -> asyncio.subprocess.Process: + session_dir = pathlib.Path(session.backend_session_id) + + # Build a BoxSpec-like object so we can reuse _build_nsjail_args. + # ManagedProcessSpec has command/args/cwd/env but not the full BoxSpec. + inner_cmd = ' '.join([shlex.quote(spec.command), *[shlex.quote(a) for a in spec.args]]) + pseudo_spec = BoxSpec( + cmd=inner_cmd, + workdir=spec.cwd, + timeout_sec=86400, # not used here + network=session.network, + session_id=session.session_id, + env=spec.env, + host_path=session.host_path, + host_path_mode=session.host_path_mode, + cpus=session.cpus, + memory_mb=session.memory_mb, + pids_limit=session.pids_limit, + read_only_rootfs=True, + ) + + args = self._build_nsjail_args(session, pseudo_spec, session_dir) + + self.logger.info( + f'LangBot Box backend start_managed_process: backend=nsjail ' + f'session_id={session.session_id} session_dir={session_dir} ' + f'cwd={spec.cwd} env_keys={sorted(spec.env.keys())} ' + f'command={spec.command} args={spec.args}' + ) + + return await asyncio.create_subprocess_exec( + *args, + stdin=asyncio.subprocess.PIPE, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + + async def cleanup_orphaned_containers(self, current_instance_id: str = ''): + if not self._base_dir.exists(): + return + + for entry in self._base_dir.iterdir(): + if not entry.is_dir(): + continue + + # Session dirs are named: __ + # If it doesn't start with the current instance_id, it's orphaned. + if entry.name.startswith(f'{current_instance_id}_'): + continue + + self.logger.info(f'Cleaning up orphaned nsjail session dir: {entry}') + try: + await self._kill_session_processes(entry) + shutil.rmtree(entry) + except Exception as exc: + self.logger.warning(f'Failed to clean up orphaned nsjail dir {entry}: {exc}') + + # ── nsjail argument construction ────────────────────────────────── + + def _build_nsjail_args( + self, + session: BoxSessionInfo, + spec: BoxSpec, + session_dir: pathlib.Path, + ) -> list[str]: + args: list[str] = [self._nsjail_bin] + + # Mode: one-shot execution. + args.extend(['--mode', 'o']) + + # Namespace isolation. + args.extend([ + '--clone_newuser', + '--clone_newns', + '--clone_newpid', + '--clone_newipc', + '--clone_newuts', + '--clone_newcgroup', + ]) + + # Network namespace. + if spec.network == BoxNetworkMode.OFF: + args.append('--clone_newnet') + else: + args.append('--disable_clone_newnet') + + # Read-only system mounts. + args.extend(self._build_readonly_mounts(spec.network)) + + # Writable per-session mounts. + args.extend(self._build_writable_mounts(session, spec, session_dir)) + + # Isolated /proc and minimal /dev. + args.extend(['--mount', 'none:/proc:proc:rw']) + args.extend(['--mount', 'none:/dev:tmpfs:rw']) + + # Working directory. + args.extend(['--cwd', spec.workdir]) + + # Environment variables. + args.extend(['--env', 'PYTHONUNBUFFERED=1']) + args.extend(['--env', 'HOME=/home']) + args.extend(['--env', 'PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin']) + for key, value in spec.env.items(): + args.extend(['--env', f'{key}={value}']) + + # Resource limits. + args.extend(self._build_resource_limits(spec)) + + # Suppress nsjail's own log output. + args.append('--really_quiet') + + # The actual command. + quoted_workdir = shlex.quote(spec.workdir) + user_cmd = f'mkdir -p {quoted_workdir} && cd {quoted_workdir} && {spec.cmd}' + args.extend(['--', 'sh', '-lc', user_cmd]) + + return args + + def _build_readonly_mounts(self, network: BoxNetworkMode) -> list[str]: + args: list[str] = [] + + for path in _READONLY_SYSTEM_MOUNTS: + if os.path.exists(path): + args.extend(['--bindmount_ro', f'{path}:{path}']) + + for path in _READONLY_ETC_ENTRIES: + # /etc/resolv.conf is only needed when network is ON. + if path == '/etc/resolv.conf' and network == BoxNetworkMode.OFF: + continue + if os.path.exists(path): + args.extend(['--bindmount_ro', f'{path}:{path}']) + + return args + + def _build_writable_mounts( + self, + session: BoxSessionInfo, + spec: BoxSpec, + session_dir: pathlib.Path, + ) -> list[str]: + args: list[str] = [] + + # Workspace mount. + if spec.host_path is not None and spec.host_path_mode != BoxHostMountMode.NONE: + if spec.host_path_mode == BoxHostMountMode.READ_ONLY: + args.extend(['--bindmount_ro', f'{spec.host_path}:{DEFAULT_BOX_MOUNT_PATH}']) + else: + args.extend(['--rw_bind', f'{spec.host_path}:{DEFAULT_BOX_MOUNT_PATH}']) + else: + workspace_dir = session_dir / 'workspace' + args.extend(['--rw_bind', f'{workspace_dir}:{DEFAULT_BOX_MOUNT_PATH}']) + + # /tmp and /home are always per-session writable. + tmp_dir = session_dir / 'tmp' + home_dir = session_dir / 'home' + args.extend(['--rw_bind', f'{tmp_dir}:/tmp']) + args.extend(['--rw_bind', f'{home_dir}:/home']) + + return args + + def _build_resource_limits(self, spec: BoxSpec) -> list[str]: + args: list[str] = [] + + if self._cgroup_v2_available: + # cgroup v2 – precise limits. + memory_bytes = spec.memory_mb * 1024 * 1024 + args.extend(['--cgroup_mem_max', str(memory_bytes)]) + args.extend(['--cgroup_pids_max', str(spec.pids_limit)]) + cpu_ms = int(spec.cpus * 1000) + args.extend(['--cgroup_cpu_ms_per_sec', str(cpu_ms)]) + else: + # rlimit fallback – best-effort. + args.extend(['--rlimit_as', str(spec.memory_mb)]) + args.extend(['--rlimit_nproc', str(spec.pids_limit)]) + + # Always set these rlimits regardless of cgroup mode. + args.extend(['--rlimit_fsize', '512']) # max file size 512 MB + args.extend(['--rlimit_nofile', '256']) # max open fds + + return args + + # ── process execution ───────────────────────────────────────────── + + async def _run_nsjail( + self, + args: list[str], + timeout_sec: int, + ) -> _CommandResult: + process = await asyncio.create_subprocess_exec( + *args, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + stdout_task = asyncio.create_task(self._read_stream(process.stdout)) + stderr_task = asyncio.create_task(self._read_stream(process.stderr)) + + timed_out = False + try: + await asyncio.wait_for(process.wait(), timeout=timeout_sec) + except asyncio.TimeoutError: + process.kill() + timed_out = True + await process.wait() + + stdout_bytes, stdout_total = await stdout_task + stderr_bytes, stderr_total = await stderr_task + + return _CommandResult( + return_code=process.returncode if not timed_out else -1, + stdout=self._clip_captured_bytes(stdout_bytes, stdout_total), + stderr=self._clip_captured_bytes(stderr_bytes, stderr_total), + timed_out=timed_out, + ) + + # ── helpers ─────────────────────────────────────────────────────── + + @staticmethod + def _detect_cgroup_v2() -> bool: + """Check whether the host runs cgroup v2 and we can write to it.""" + cgroup_mount = pathlib.Path('/sys/fs/cgroup') + if not cgroup_mount.exists(): + return False + # cgroup v2 has a single hierarchy with cgroup.controllers file. + controllers = cgroup_mount / 'cgroup.controllers' + if not controllers.exists(): + return False + # Check if we can write to a cgroup subtree (needed for nsjail). + # A rough heuristic: if the user owns a cgroup directory we're probably + # running under systemd user delegation. + user_slice = cgroup_mount / f'user.slice/user-{os.getuid()}.slice' + if user_slice.exists(): + return True + # If running as root (uid 0), cgroup v2 is always usable. + if os.getuid() == 0: + return True + # Conservative: if we can't confirm writability, report unavailable. + return False + + async def _kill_session_processes(self, session_dir: pathlib.Path) -> None: + """Best-effort kill of nsjail processes associated with a session dir. + + We scan /proc for nsjail processes whose command line contains the + session directory path. + """ + session_path_str = str(session_dir) + proc_dir = pathlib.Path('/proc') + if not proc_dir.exists(): + return + + for pid_dir in proc_dir.iterdir(): + if not pid_dir.name.isdigit(): + continue + try: + cmdline = (pid_dir / 'cmdline').read_bytes().decode('utf-8', errors='replace') + if self._nsjail_bin in cmdline and session_path_str in cmdline: + pid = int(pid_dir.name) + os.kill(pid, signal.SIGKILL) + self.logger.info(f'Killed orphaned nsjail process {pid}') + except (OSError, ValueError): + continue + + @staticmethod + def _clip_captured_bytes( + data: bytes, total_size: int, limit: int = _MAX_RAW_OUTPUT_BYTES + ) -> str: + text = data.decode('utf-8', errors='replace').strip() + if total_size > limit: + text += f'\n... [raw output clipped at {limit} bytes, {total_size - limit} bytes discarded]' + return text + + @staticmethod + async def _read_stream( + stream: asyncio.StreamReader | None, + limit: int = _MAX_RAW_OUTPUT_BYTES, + ) -> tuple[bytes, int]: + if stream is None: + return b'', 0 + + chunks = bytearray() + total_size = 0 + while True: + chunk = await stream.read(65536) + if not chunk: + break + total_size += len(chunk) + remaining = limit - len(chunks) + if remaining > 0: + chunks.extend(chunk[:remaining]) + + return bytes(chunks), total_size diff --git a/src/langbot_plugin/box/runtime.py b/src/langbot_plugin/box/runtime.py index 36f8c13..cac6b66 100644 --- a/src/langbot_plugin/box/runtime.py +++ b/src/langbot_plugin/box/runtime.py @@ -8,6 +8,7 @@ import uuid from .backend import BaseSandboxBackend, DockerBackend, PodmanBackend +from .nsjail_backend import NsjailBackend from .errors import ( BoxBackendUnavailableError, BoxManagedProcessConflictError, @@ -61,7 +62,7 @@ def __init__( session_ttl_sec: int = 300, ): self.logger = logger - self.backends = backends or [PodmanBackend(logger), DockerBackend(logger)] + self.backends = backends or [PodmanBackend(logger), DockerBackend(logger), NsjailBackend(logger)] self.session_ttl_sec = session_ttl_sec self._backend: BaseSandboxBackend | None = None self._sessions: dict[str, _RuntimeSession] = {} @@ -227,7 +228,7 @@ async def _get_backend(self) -> BaseSandboxBackend: self._backend = await self._select_backend() if self._backend is None: raise BoxBackendUnavailableError( - 'LangBot Box backend unavailable. Install and start Podman or Docker before using sandbox_exec.' + 'LangBot Box backend unavailable. Install and start Podman, Docker, or nsjail before using sandbox_exec.' ) return self._backend @@ -241,7 +242,7 @@ async def _select_backend(self) -> BaseSandboxBackend | None: except Exception as exc: self.logger.warning(f'LangBot Box backend {backend.name} probe failed: {exc}') - self.logger.warning('LangBot Box backend unavailable: neither Podman nor Docker is ready') + self.logger.warning('LangBot Box backend unavailable: no supported backend (Podman, Docker, nsjail) is ready') return None async def _reap_expired_sessions_locked(self): diff --git a/tests/box/__init__.py b/tests/box/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/box/test_nsjail_backend.py b/tests/box/test_nsjail_backend.py new file mode 100644 index 0000000..b9e7f68 --- /dev/null +++ b/tests/box/test_nsjail_backend.py @@ -0,0 +1,348 @@ +"""Unit tests for NsjailBackend. + +These tests do NOT require nsjail to be installed – they mock subprocess +calls and filesystem checks to verify argument construction, session +directory management, and cgroup detection logic. +""" + +from __future__ import annotations + +import asyncio +import logging +import pathlib +from unittest import mock + +import pytest + +from langbot_plugin.box.nsjail_backend import ( + NsjailBackend, + _READONLY_ETC_ENTRIES, + _READONLY_SYSTEM_MOUNTS, +) +from langbot_plugin.box.models import ( + BoxExecutionStatus, + BoxHostMountMode, + BoxNetworkMode, + BoxSessionInfo, + BoxSpec, +) + + +@pytest.fixture +def logger(): + return logging.getLogger('test.nsjail') + + +@pytest.fixture +def tmp_base(tmp_path: pathlib.Path): + return tmp_path / 'nsjail-base' + + +@pytest.fixture +def backend(logger, tmp_base): + b = NsjailBackend(logger=logger, base_dir=str(tmp_base)) + b.instance_id = 'test123' + return b + + +# ── is_available ────────────────────────────────────────────────────── + +@pytest.mark.anyio +async def test_is_available_no_binary(backend): + with mock.patch('shutil.which', return_value=None): + assert await backend.is_available() is False + + +@pytest.mark.anyio +async def test_is_available_binary_exists(backend, tmp_base): + with ( + mock.patch('shutil.which', return_value='/usr/bin/nsjail'), + mock.patch('asyncio.create_subprocess_exec') as mock_exec, + ): + mock_proc = mock.AsyncMock() + mock_proc.returncode = 0 + mock_proc.wait = mock.AsyncMock(return_value=0) + mock_exec.return_value = mock_proc + + result = await backend.is_available() + assert result is True + assert tmp_base.exists() + + +# ── start_session ───────────────────────────────────────────────────── + +@pytest.mark.anyio +async def test_start_session_creates_directories(backend, tmp_base): + tmp_base.mkdir(parents=True, exist_ok=True) + spec = BoxSpec(session_id='sess1', cmd='echo hi') + + info = await backend.start_session(spec) + + session_dir = pathlib.Path(info.backend_session_id) + assert session_dir.exists() + assert (session_dir / 'workspace').is_dir() + assert (session_dir / 'tmp').is_dir() + assert (session_dir / 'home').is_dir() + assert (session_dir / 'meta.json').exists() + + assert info.backend_name == 'nsjail' + assert info.session_id == 'sess1' + assert info.image == 'host' + assert info.read_only_rootfs is True + + +@pytest.mark.anyio +async def test_start_session_with_host_path(backend, tmp_base): + tmp_base.mkdir(parents=True, exist_ok=True) + spec = BoxSpec( + session_id='sess2', + cmd='ls', + host_path='/some/path', + host_path_mode=BoxHostMountMode.READ_WRITE, + ) + + info = await backend.start_session(spec) + assert info.host_path == '/some/path' + assert info.host_path_mode == BoxHostMountMode.READ_WRITE + + +# ── stop_session ────────────────────────────────────────────────────── + +@pytest.mark.anyio +async def test_stop_session_removes_directory(backend, tmp_base): + tmp_base.mkdir(parents=True, exist_ok=True) + spec = BoxSpec(session_id='sess-rm', cmd='echo') + + info = await backend.start_session(spec) + session_dir = pathlib.Path(info.backend_session_id) + assert session_dir.exists() + + await backend.stop_session(info) + assert not session_dir.exists() + + +# ── nsjail argument construction ────────────────────────────────────── + +def test_build_nsjail_args_basic(backend, tmp_base): + tmp_base.mkdir(parents=True, exist_ok=True) + session_dir = tmp_base / 'test_session' + for d in ('workspace', 'tmp', 'home'): + (session_dir / d).mkdir(parents=True) + + session = BoxSessionInfo( + session_id='s1', + backend_name='nsjail', + backend_session_id=str(session_dir), + image='host', + network=BoxNetworkMode.OFF, + created_at='2024-01-01T00:00:00+00:00', + last_used_at='2024-01-01T00:00:00+00:00', + ) + spec = BoxSpec(session_id='s1', cmd='echo hello', env={'FOO': 'bar'}) + + args = backend._build_nsjail_args(session, spec, session_dir) + + assert args[0] == 'nsjail' + assert '--mode' in args + assert args[args.index('--mode') + 1] == 'o' + assert '--clone_newnet' in args + assert '--disable_clone_newnet' not in args + assert '--really_quiet' in args + + # Writable mounts should reference session directories. + rw_binds = [args[i + 1] for i, a in enumerate(args) if a == '--rw_bind'] + workspace_mount = f'{session_dir}/workspace:/workspace' + assert workspace_mount in rw_binds + + # Custom env should be present. + env_values = [args[i + 1] for i, a in enumerate(args) if a == '--env'] + assert 'FOO=bar' in env_values + + # Command is the last part after '--'. + separator_idx = args.index('--') + assert args[separator_idx + 1] == 'sh' + + +def test_build_nsjail_args_network_on(backend, tmp_base): + tmp_base.mkdir(parents=True, exist_ok=True) + session_dir = tmp_base / 'test_session_net' + for d in ('workspace', 'tmp', 'home'): + (session_dir / d).mkdir(parents=True) + + session = BoxSessionInfo( + session_id='s2', + backend_name='nsjail', + backend_session_id=str(session_dir), + image='host', + network=BoxNetworkMode.ON, + created_at='2024-01-01T00:00:00+00:00', + last_used_at='2024-01-01T00:00:00+00:00', + ) + spec = BoxSpec(session_id='s2', cmd='curl http://example.com', network=BoxNetworkMode.ON) + + args = backend._build_nsjail_args(session, spec, session_dir) + + assert '--disable_clone_newnet' in args + assert '--clone_newnet' not in args + + +def test_build_nsjail_args_host_path_ro(backend, tmp_base): + tmp_base.mkdir(parents=True, exist_ok=True) + session_dir = tmp_base / 'test_hp' + for d in ('workspace', 'tmp', 'home'): + (session_dir / d).mkdir(parents=True) + + session = BoxSessionInfo( + session_id='s3', + backend_name='nsjail', + backend_session_id=str(session_dir), + image='host', + network=BoxNetworkMode.OFF, + host_path='/data/project', + host_path_mode=BoxHostMountMode.READ_ONLY, + created_at='2024-01-01T00:00:00+00:00', + last_used_at='2024-01-01T00:00:00+00:00', + ) + spec = BoxSpec( + session_id='s3', + cmd='ls', + host_path='/data/project', + host_path_mode=BoxHostMountMode.READ_ONLY, + ) + + args = backend._build_nsjail_args(session, spec, session_dir) + + ro_binds = [args[i + 1] for i, a in enumerate(args) if a == '--bindmount_ro'] + assert '/data/project:/workspace' in ro_binds + + +def test_build_resource_limits_cgroup(backend): + backend._cgroup_v2_available = True + spec = BoxSpec(session_id='s', cmd='x', cpus=2.0, memory_mb=1024, pids_limit=256) + + args = backend._build_resource_limits(spec) + + assert '--cgroup_mem_max' in args + mem_idx = args.index('--cgroup_mem_max') + assert args[mem_idx + 1] == str(1024 * 1024 * 1024) + + pids_idx = args.index('--cgroup_pids_max') + assert args[pids_idx + 1] == '256' + + cpu_idx = args.index('--cgroup_cpu_ms_per_sec') + assert args[cpu_idx + 1] == '2000' + + +def test_build_resource_limits_rlimit_fallback(backend): + backend._cgroup_v2_available = False + spec = BoxSpec(session_id='s', cmd='x', memory_mb=512, pids_limit=128) + + args = backend._build_resource_limits(spec) + + assert '--rlimit_as' in args + as_idx = args.index('--rlimit_as') + assert args[as_idx + 1] == '512' + + nproc_idx = args.index('--rlimit_nproc') + assert args[nproc_idx + 1] == '128' + + # cgroup flags should NOT be present. + assert '--cgroup_mem_max' not in args + + +# ── exec ────────────────────────────────────────────────────────────── + +@pytest.mark.anyio +async def test_exec_success(backend, tmp_base): + tmp_base.mkdir(parents=True, exist_ok=True) + spec = BoxSpec(session_id='exec1', cmd='echo hello') + info = await backend.start_session(spec) + + with mock.patch.object(backend, '_run_nsjail') as mock_run: + from langbot_plugin.box.backend import _CommandResult + mock_run.return_value = _CommandResult( + return_code=0, stdout='hello\n', stderr='', timed_out=False + ) + + result = await backend.exec(info, spec) + + assert result.status == BoxExecutionStatus.COMPLETED + assert result.exit_code == 0 + assert result.stdout == 'hello\n' + assert result.backend_name == 'nsjail' + + +@pytest.mark.anyio +async def test_exec_timeout(backend, tmp_base): + tmp_base.mkdir(parents=True, exist_ok=True) + spec = BoxSpec(session_id='exec2', cmd='sleep 100', timeout_sec=1) + info = await backend.start_session(spec) + + with mock.patch.object(backend, '_run_nsjail') as mock_run: + from langbot_plugin.box.backend import _CommandResult + mock_run.return_value = _CommandResult( + return_code=-1, stdout='', stderr='', timed_out=True + ) + + result = await backend.exec(info, spec) + + assert result.status == BoxExecutionStatus.TIMED_OUT + assert result.exit_code is None + + +# ── cgroup detection ────────────────────────────────────────────────── + +def test_detect_cgroup_v2_no_mount(): + with mock.patch.object(pathlib.Path, 'exists', return_value=False): + assert NsjailBackend._detect_cgroup_v2() is False + + +def test_detect_cgroup_v2_root_user(): + orig_exists = pathlib.Path.exists + + def always_exists(self): + return True + + with ( + mock.patch('os.getuid', return_value=0), + mock.patch.object(pathlib.Path, 'exists', always_exists), + ): + assert NsjailBackend._detect_cgroup_v2() is True + + +# ── cleanup_orphaned_containers ─────────────────────────────────────── + +@pytest.mark.anyio +async def test_cleanup_orphaned_removes_old_sessions(backend, tmp_base): + tmp_base.mkdir(parents=True, exist_ok=True) + + # Create a dir from a different instance. + old_dir = tmp_base / 'oldinst_sess1_abc' + old_dir.mkdir() + (old_dir / 'workspace').mkdir() + + # Create a dir from current instance. + current_dir = tmp_base / 'test123_sess2_def' + current_dir.mkdir() + (current_dir / 'workspace').mkdir() + + with mock.patch.object(backend, '_kill_session_processes', new_callable=mock.AsyncMock): + await backend.cleanup_orphaned_containers('test123') + + assert not old_dir.exists() + assert current_dir.exists() + + +# ── output clipping ────────────────────────────────────────────────── + +def test_clip_captured_bytes_within_limit(): + data = b'hello world' + result = NsjailBackend._clip_captured_bytes(data, len(data)) + assert result == 'hello world' + + +def test_clip_captured_bytes_exceeds_limit(): + data = b'hello' + result = NsjailBackend._clip_captured_bytes(data, 2_000_000, limit=1_000_000) + assert 'clipped' in result + assert '1000000' in result From c0e30968bd22051634165c1e7480b2215280d4a0 Mon Sep 17 00:00:00 2001 From: youhuanghe <1051233107@qq.com> Date: Sun, 22 Mar 2026 15:06:29 +0000 Subject: [PATCH 04/34] refactor: use unified logging config in box server --- src/langbot_plugin/box/server.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/langbot_plugin/box/server.py b/src/langbot_plugin/box/server.py index 0690b8f..e704500 100644 --- a/src/langbot_plugin/box/server.py +++ b/src/langbot_plugin/box/server.py @@ -23,6 +23,7 @@ from langbot_plugin.entities.io.resp import ActionResponse from langbot_plugin.runtime.io.connection import Connection from langbot_plugin.runtime.io.handler import Handler +from langbot_plugin.utils.log import configure_process_logging from .actions import LangBotToBoxAction from .errors import ( @@ -259,7 +260,7 @@ def main() -> None: ) args = parser.parse_args() - logging.basicConfig(level=logging.INFO, stream=sys.stderr) + configure_process_logging(stream=sys.stderr) asyncio.run(_run_server(args.host, args.port, args.mode)) From ef7f3546bc2ace0fe46810c4457888f04df863f6 Mon Sep 17 00:00:00 2001 From: youhuanghe <1051233107@qq.com> Date: Tue, 24 Mar 2026 02:38:20 +0000 Subject: [PATCH 05/34] feat(box): support configurable sandbox mount paths - add mount_path to box specs and session metadata - mount host paths at spec.mount_path instead of hard-coded /workspace - default workdir/cwd from mount_path when omitted - update runtime/backend logging and nsjail tests for custom mount paths --- src/langbot_plugin/box/backend.py | 12 ++++---- src/langbot_plugin/box/models.py | 31 +++++++++++++++++--- src/langbot_plugin/box/nsjail_backend.py | 18 ++++++------ src/langbot_plugin/box/runtime.py | 4 ++- tests/box/test_nsjail_backend.py | 36 ++++++++++++++++++++++++ 5 files changed, 82 insertions(+), 19 deletions(-) diff --git a/src/langbot_plugin/box/backend.py b/src/langbot_plugin/box/backend.py index e5bbe56..40b6a67 100644 --- a/src/langbot_plugin/box/backend.py +++ b/src/langbot_plugin/box/backend.py @@ -12,7 +12,6 @@ from .errors import BoxError from .models import ( - DEFAULT_BOX_MOUNT_PATH, BoxExecutionResult, BoxExecutionStatus, BoxHostMountMode, @@ -89,7 +88,7 @@ async def is_available(self) -> bool: async def start_session(self, spec: BoxSpec) -> BoxSessionInfo: validate_sandbox_security(spec) - now = dt.datetime.now(dt.UTC) + now = dt.datetime.now(dt.timezone.utc) container_name = self._build_container_name(spec.session_id) args = [ @@ -120,7 +119,7 @@ async def start_session(self, spec: BoxSpec) -> BoxSessionInfo: args.extend(['--tmpfs', '/tmp:size=64m']) if spec.host_path is not None and spec.host_path_mode != BoxHostMountMode.NONE: - mount_spec = f'{spec.host_path}:{DEFAULT_BOX_MOUNT_PATH}:{spec.host_path_mode.value}' + mount_spec = f'{spec.host_path}:{spec.mount_path}:{spec.host_path_mode.value}' args.extend(['-v', mount_spec]) args.extend([spec.image, 'sh', '-lc', 'while true; do sleep 3600; done']) @@ -129,7 +128,7 @@ async def start_session(self, spec: BoxSpec) -> BoxSessionInfo: f'LangBot Box backend start_session: backend={self.name} ' f'session_id={spec.session_id} container_name={container_name} ' f'image={spec.image} network={spec.network.value} ' - f'host_path={spec.host_path} host_path_mode={spec.host_path_mode.value} ' + f'host_path={spec.host_path} host_path_mode={spec.host_path_mode.value} mount_path={spec.mount_path} ' f'cpus={spec.cpus} memory_mb={spec.memory_mb} pids_limit={spec.pids_limit} ' f'read_only_rootfs={spec.read_only_rootfs}' ) @@ -144,6 +143,7 @@ async def start_session(self, spec: BoxSpec) -> BoxSessionInfo: network=spec.network, host_path=spec.host_path, host_path_mode=spec.host_path_mode, + mount_path=spec.mount_path, cpus=spec.cpus, memory_mb=spec.memory_mb, pids_limit=spec.pids_limit, @@ -153,7 +153,7 @@ async def start_session(self, spec: BoxSpec) -> BoxSessionInfo: ) async def exec(self, session: BoxSessionInfo, spec: BoxSpec) -> BoxExecutionResult: - start = dt.datetime.now(dt.UTC) + start = dt.datetime.now(dt.timezone.utc) args = [self.command, 'exec'] for key, value in spec.env.items(): @@ -179,7 +179,7 @@ async def exec(self, session: BoxSessionInfo, spec: BoxSpec) -> BoxExecutionResu ) result = await self._run_command(args, timeout_sec=spec.timeout_sec, check=False) - duration_ms = int((dt.datetime.now(dt.UTC) - start).total_seconds() * 1000) + duration_ms = int((dt.datetime.now(dt.timezone.utc) - start).total_seconds() * 1000) if result.timed_out: return BoxExecutionResult( diff --git a/src/langbot_plugin/box/models.py b/src/langbot_plugin/box/models.py index 90496ca..8de58dc 100644 --- a/src/langbot_plugin/box/models.py +++ b/src/langbot_plugin/box/models.py @@ -33,7 +33,7 @@ class BoxManagedProcessStatus(str, enum.Enum): class BoxSpec(pydantic.BaseModel): cmd: str = '' - workdir: str = '/workspace' + workdir: str = DEFAULT_BOX_MOUNT_PATH timeout_sec: int = 30 network: BoxNetworkMode = BoxNetworkMode.OFF session_id: str @@ -41,12 +41,26 @@ class BoxSpec(pydantic.BaseModel): image: str = DEFAULT_BOX_IMAGE host_path: str | None = None host_path_mode: BoxHostMountMode = BoxHostMountMode.READ_WRITE + mount_path: str = DEFAULT_BOX_MOUNT_PATH # Resource limits cpus: float = 1.0 memory_mb: int = 512 pids_limit: int = 128 read_only_rootfs: bool = True + @pydantic.model_validator(mode='before') + @classmethod + def populate_workdir_from_mount_path(cls, data): + if not isinstance(data, dict): + return data + if data.get('workdir') not in (None, ''): + return data + mount_path = data.get('mount_path') + if isinstance(mount_path, str) and mount_path.strip(): + data = dict(data) + data['workdir'] = mount_path + return data + @pydantic.field_validator('cmd') @classmethod def validate_cmd(cls, value: str) -> str: @@ -111,14 +125,22 @@ def validate_host_path(cls, value: str | None) -> str | None: raise ValueError('host_path must be an absolute host path') return value + @pydantic.field_validator('mount_path') + @classmethod + def validate_mount_path(cls, value: str) -> str: + value = value.strip() + if not value.startswith('/'): + raise ValueError('mount_path must be an absolute path inside the sandbox') + return value + @pydantic.model_validator(mode='after') def validate_host_mount_consistency(self) -> 'BoxSpec': if self.host_path is None: return self if self.host_path_mode == BoxHostMountMode.NONE: return self - if not self.workdir.startswith(DEFAULT_BOX_MOUNT_PATH): - raise ValueError('workdir must stay under /workspace when host_path is provided') + if self.workdir != self.mount_path and not self.workdir.startswith(f'{self.mount_path}/'): + raise ValueError('workdir must stay under mount_path when host_path is provided') return self @@ -198,6 +220,7 @@ class BoxSessionInfo(pydantic.BaseModel): network: BoxNetworkMode host_path: str | None = None host_path_mode: BoxHostMountMode = BoxHostMountMode.READ_WRITE + mount_path: str = DEFAULT_BOX_MOUNT_PATH cpus: float = 1.0 memory_mb: int = 512 pids_limit: int = 128 @@ -210,7 +233,7 @@ class BoxManagedProcessSpec(pydantic.BaseModel): command: str args: list[str] = pydantic.Field(default_factory=list) env: dict[str, str] = pydantic.Field(default_factory=dict) - cwd: str = '/workspace' + cwd: str = DEFAULT_BOX_MOUNT_PATH @pydantic.field_validator('command') @classmethod diff --git a/src/langbot_plugin/box/nsjail_backend.py b/src/langbot_plugin/box/nsjail_backend.py index 4421667..b972c7a 100644 --- a/src/langbot_plugin/box/nsjail_backend.py +++ b/src/langbot_plugin/box/nsjail_backend.py @@ -14,7 +14,6 @@ from .backend import BaseSandboxBackend, _CommandResult, _MAX_RAW_OUTPUT_BYTES from .errors import BoxError from .models import ( - DEFAULT_BOX_MOUNT_PATH, BoxExecutionResult, BoxExecutionStatus, BoxHostMountMode, @@ -104,7 +103,7 @@ async def is_available(self) -> bool: async def start_session(self, spec: BoxSpec) -> BoxSessionInfo: validate_sandbox_security(spec) - now = dt.datetime.now(dt.UTC) + now = dt.datetime.now(dt.timezone.utc) session_dir_name = f'{self.instance_id}_{spec.session_id}_{uuid.uuid4().hex[:8]}' session_dir = self._base_dir / session_dir_name @@ -123,6 +122,7 @@ async def start_session(self, spec: BoxSpec) -> BoxSessionInfo: 'instance_id': self.instance_id, 'host_path': spec.host_path, 'host_path_mode': spec.host_path_mode.value if spec.host_path else None, + 'mount_path': spec.mount_path, 'network': spec.network.value, 'cpus': spec.cpus, 'memory_mb': spec.memory_mb, @@ -135,7 +135,7 @@ async def start_session(self, spec: BoxSpec) -> BoxSessionInfo: f'LangBot Box backend start_session: backend=nsjail ' f'session_id={spec.session_id} session_dir={session_dir} ' f'network={spec.network.value} ' - f'host_path={spec.host_path} host_path_mode={spec.host_path_mode.value} ' + f'host_path={spec.host_path} host_path_mode={spec.host_path_mode.value} mount_path={spec.mount_path} ' f'cpus={spec.cpus} memory_mb={spec.memory_mb} pids_limit={spec.pids_limit}' ) @@ -147,6 +147,7 @@ async def start_session(self, spec: BoxSpec) -> BoxSessionInfo: network=spec.network, host_path=spec.host_path, host_path_mode=spec.host_path_mode, + mount_path=spec.mount_path, cpus=spec.cpus, memory_mb=spec.memory_mb, pids_limit=spec.pids_limit, @@ -156,7 +157,7 @@ async def start_session(self, spec: BoxSpec) -> BoxSessionInfo: ) async def exec(self, session: BoxSessionInfo, spec: BoxSpec) -> BoxExecutionResult: - start = dt.datetime.now(dt.UTC) + start = dt.datetime.now(dt.timezone.utc) session_dir = pathlib.Path(session.backend_session_id) args = self._build_nsjail_args(session, spec, session_dir) @@ -172,7 +173,7 @@ async def exec(self, session: BoxSessionInfo, spec: BoxSpec) -> BoxExecutionResu ) result = await self._run_nsjail(args, timeout_sec=spec.timeout_sec) - duration_ms = int((dt.datetime.now(dt.UTC) - start).total_seconds() * 1000) + duration_ms = int((dt.datetime.now(dt.timezone.utc) - start).total_seconds() * 1000) if result.timed_out: return BoxExecutionResult( @@ -228,6 +229,7 @@ async def start_managed_process( env=spec.env, host_path=session.host_path, host_path_mode=session.host_path_mode, + mount_path=session.mount_path, cpus=session.cpus, memory_mb=session.memory_mb, pids_limit=session.pids_limit, @@ -359,12 +361,12 @@ def _build_writable_mounts( # Workspace mount. if spec.host_path is not None and spec.host_path_mode != BoxHostMountMode.NONE: if spec.host_path_mode == BoxHostMountMode.READ_ONLY: - args.extend(['--bindmount_ro', f'{spec.host_path}:{DEFAULT_BOX_MOUNT_PATH}']) + args.extend(['--bindmount_ro', f'{spec.host_path}:{spec.mount_path}']) else: - args.extend(['--rw_bind', f'{spec.host_path}:{DEFAULT_BOX_MOUNT_PATH}']) + args.extend(['--rw_bind', f'{spec.host_path}:{spec.mount_path}']) else: workspace_dir = session_dir / 'workspace' - args.extend(['--rw_bind', f'{workspace_dir}:{DEFAULT_BOX_MOUNT_PATH}']) + args.extend(['--rw_bind', f'{workspace_dir}:{spec.mount_path}']) # /tmp and /home are always per-session writable. tmp_dir = session_dir / 'tmp' diff --git a/src/langbot_plugin/box/runtime.py b/src/langbot_plugin/box/runtime.py index cac6b66..ef4d2ef 100644 --- a/src/langbot_plugin/box/runtime.py +++ b/src/langbot_plugin/box/runtime.py @@ -219,7 +219,8 @@ async def _get_or_create_session(self, spec: BoxSpec) -> _RuntimeSession: f'image={info.image} ' f'network={info.network.value} ' f'host_path={info.host_path} ' - f'host_path_mode={info.host_path_mode.value}' + f'host_path_mode={info.host_path_mode.value} ' + f'mount_path={info.mount_path}' ) return runtime_session @@ -284,6 +285,7 @@ def _assert_session_compatible(self, session: BoxSessionInfo, spec: BoxSpec): 'image', 'host_path', 'host_path_mode', + 'mount_path', 'cpus', 'memory_mb', 'pids_limit', diff --git a/tests/box/test_nsjail_backend.py b/tests/box/test_nsjail_backend.py index b9e7f68..fca4e2b 100644 --- a/tests/box/test_nsjail_backend.py +++ b/tests/box/test_nsjail_backend.py @@ -99,11 +99,13 @@ async def test_start_session_with_host_path(backend, tmp_base): cmd='ls', host_path='/some/path', host_path_mode=BoxHostMountMode.READ_WRITE, + mount_path='/project', ) info = await backend.start_session(spec) assert info.host_path == '/some/path' assert info.host_path_mode == BoxHostMountMode.READ_WRITE + assert info.mount_path == '/project' # ── stop_session ────────────────────────────────────────────────────── @@ -216,6 +218,40 @@ def test_build_nsjail_args_host_path_ro(backend, tmp_base): assert '/data/project:/workspace' in ro_binds +def test_build_nsjail_args_uses_custom_mount_path(backend, tmp_base): + tmp_base.mkdir(parents=True, exist_ok=True) + session_dir = tmp_base / 'test_custom_mount' + for d in ('workspace', 'tmp', 'home'): + (session_dir / d).mkdir(parents=True) + + session = BoxSessionInfo( + session_id='s4', + backend_name='nsjail', + backend_session_id=str(session_dir), + image='host', + network=BoxNetworkMode.OFF, + host_path='/data/project', + host_path_mode=BoxHostMountMode.READ_WRITE, + mount_path='/project', + created_at='2024-01-01T00:00:00+00:00', + last_used_at='2024-01-01T00:00:00+00:00', + ) + spec = BoxSpec( + session_id='s4', + cmd='pwd', + workdir='/project/src', + host_path='/data/project', + host_path_mode=BoxHostMountMode.READ_WRITE, + mount_path='/project', + ) + + args = backend._build_nsjail_args(session, spec, session_dir) + + rw_binds = [args[i + 1] for i, a in enumerate(args) if a == '--rw_bind'] + assert '/data/project:/project' in rw_binds + assert args[args.index('--cwd') + 1] == '/project/src' + + def test_build_resource_limits_cgroup(backend): backend._cgroup_v2_available = True spec = BoxSpec(session_id='s', cmd='x', cpus=2.0, memory_mb=1024, pids_limit=256) From cf7ec2d7fd3d5bdfff59c5bf619d1138bf04284a Mon Sep 17 00:00:00 2001 From: youhuanghe <1051233107@qq.com> Date: Tue, 24 Mar 2026 04:01:51 +0000 Subject: [PATCH 06/34] fix(box-runtime): terminate managed processes promptly on session deletion --- src/langbot_plugin/box/client.py | 2 +- src/langbot_plugin/box/runtime.py | 17 +++++++---------- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/src/langbot_plugin/box/client.py b/src/langbot_plugin/box/client.py index 36a525a..8b8e17b 100644 --- a/src/langbot_plugin/box/client.py +++ b/src/langbot_plugin/box/client.py @@ -147,7 +147,7 @@ async def get_backend_info(self) -> dict: return await self._call(LangBotToBoxAction.GET_BACKEND_INFO, {}) async def delete_session(self, session_id: str) -> None: - await self._call(LangBotToBoxAction.DELETE_SESSION, {'session_id': session_id}) + await self._call(LangBotToBoxAction.DELETE_SESSION, {'session_id': session_id}, timeout=30.0) async def create_session(self, spec: BoxSpec) -> dict: return await self._call(LangBotToBoxAction.CREATE_SESSION, spec.model_dump(mode='json')) diff --git a/src/langbot_plugin/box/runtime.py b/src/langbot_plugin/box/runtime.py index ef4d2ef..b09ab32 100644 --- a/src/langbot_plugin/box/runtime.py +++ b/src/langbot_plugin/box/runtime.py @@ -347,22 +347,19 @@ async def _terminate_managed_process(self, runtime_session: _RuntimeSession) -> pass try: + if process.returncode is None: + try: + process.terminate() + except ProcessLookupError: + pass await asyncio.wait_for(asyncio.shield(process.wait()), timeout=5) except asyncio.TimeoutError: if process.returncode is None: try: - process.terminate() + process.kill() except ProcessLookupError: pass - try: - await asyncio.wait_for(asyncio.shield(process.wait()), timeout=5) - except asyncio.TimeoutError: - if process.returncode is None: - try: - process.kill() - except ProcessLookupError: - pass - await process.wait() + await process.wait() finally: managed_process.exit_code = process.returncode managed_process.exited_at = dt.datetime.now(_UTC) From 31c763c5d249d654b17f9feb0da8fe2a6f74bef8 Mon Sep 17 00:00:00 2001 From: youhuanghe <1051233107@qq.com> Date: Tue, 24 Mar 2026 06:55:44 +0000 Subject: [PATCH 07/34] fix: update doc --- src/langbot_plugin/box/errors.py | 2 +- src/langbot_plugin/box/runtime.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/langbot_plugin/box/errors.py b/src/langbot_plugin/box/errors.py index f6a8e86..ecdde7a 100644 --- a/src/langbot_plugin/box/errors.py +++ b/src/langbot_plugin/box/errors.py @@ -6,7 +6,7 @@ class BoxError(RuntimeError): class BoxValidationError(BoxError): - """Raised when sandbox_exec arguments are invalid.""" + """Raised when exec tool arguments are invalid.""" class BoxBackendUnavailableError(BoxError): diff --git a/src/langbot_plugin/box/runtime.py b/src/langbot_plugin/box/runtime.py index b09ab32..b91bc02 100644 --- a/src/langbot_plugin/box/runtime.py +++ b/src/langbot_plugin/box/runtime.py @@ -229,7 +229,7 @@ async def _get_backend(self) -> BaseSandboxBackend: self._backend = await self._select_backend() if self._backend is None: raise BoxBackendUnavailableError( - 'LangBot Box backend unavailable. Install and start Podman, Docker, or nsjail before using sandbox_exec.' + 'LangBot Box backend unavailable. Install and start Podman, Docker, or nsjail before using exec.' ) return self._backend @@ -297,7 +297,7 @@ def _assert_session_compatible(self, session: BoxSessionInfo, spec: BoxSpec): if session_val != spec_val: display = session_val.value if hasattr(session_val, 'value') else session_val raise BoxSessionConflictError( - f'sandbox_exec session {spec.session_id} already exists with {field}={display}' + f'Box session {spec.session_id} already exists with {field}={display}' ) async def _drain_managed_process_stderr(self, session_id: str, managed_process: _ManagedProcess) -> None: From dea5820a46ad3283662f24bb3bfcb472a395792e Mon Sep 17 00:00:00 2001 From: youhuanghe <1051233107@qq.com> Date: Tue, 24 Mar 2026 07:58:34 +0000 Subject: [PATCH 08/34] fix(box): repair sdk runtime entrypoints and nsjail session parity --- pyproject.toml | 1 + src/langbot_plugin/box/nsjail_backend.py | 4 ++-- src/langbot_plugin/box/server.py | 4 ++-- src/langbot_plugin/cli/__init__.py | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 0631261..1f29456 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,6 +9,7 @@ authors = [ requires-python = ">=3.10" dependencies = [ "aiofiles>=24.1.0", + "aiohttp>=3.9.0", "dotenv>=0.9.9", "httpx>=0.28.1", "jinja2>=3.1.6", diff --git a/src/langbot_plugin/box/nsjail_backend.py b/src/langbot_plugin/box/nsjail_backend.py index b972c7a..32a3437 100644 --- a/src/langbot_plugin/box/nsjail_backend.py +++ b/src/langbot_plugin/box/nsjail_backend.py @@ -151,7 +151,7 @@ async def start_session(self, spec: BoxSpec) -> BoxSessionInfo: cpus=spec.cpus, memory_mb=spec.memory_mb, pids_limit=spec.pids_limit, - read_only_rootfs=True, # always true for nsjail + read_only_rootfs=spec.read_only_rootfs, created_at=now, last_used_at=now, ) @@ -233,7 +233,7 @@ async def start_managed_process( cpus=session.cpus, memory_mb=session.memory_mb, pids_limit=session.pids_limit, - read_only_rootfs=True, + read_only_rootfs=session.read_only_rootfs, ) args = self._build_nsjail_args(session, pseudo_spec, session_dir) diff --git a/src/langbot_plugin/box/server.py b/src/langbot_plugin/box/server.py index e704500..95e5d80 100644 --- a/src/langbot_plugin/box/server.py +++ b/src/langbot_plugin/box/server.py @@ -251,14 +251,14 @@ async def new_connection_callback(connection: Connection) -> None: await runner.cleanup() -def main() -> None: +def main(argv: list[str] | None = None) -> None: parser = argparse.ArgumentParser(description='LangBot Box Runtime Service') parser.add_argument('--host', default='0.0.0.0', help='Bind address') parser.add_argument('--port', type=int, default=5410, help='Bind port (ws relay)') parser.add_argument( '--mode', choices=['stdio', 'ws'], default='stdio', help='Control channel transport (default: stdio)' ) - args = parser.parse_args() + args = parser.parse_args(argv) configure_process_logging(stream=sys.stderr) asyncio.run(_run_server(args.host, args.port, args.mode)) diff --git a/src/langbot_plugin/cli/__init__.py b/src/langbot_plugin/cli/__init__.py index b4f388b..ff9174c 100644 --- a/src/langbot_plugin/cli/__init__.py +++ b/src/langbot_plugin/cli/__init__.py @@ -167,7 +167,7 @@ def main(): runtime_app.main(args) case "box": from langbot_plugin.box.server import main as box_main - box_main() + box_main(sys.argv[2:]) case _: cli_print("unknown_command", args.command) sys.exit(1) From 8e63877f5f3cdc7920861eba9a4c280859ede521 Mon Sep 17 00:00:00 2001 From: youhuanghe <1051233107@qq.com> Date: Thu, 26 Mar 2026 10:45:24 +0000 Subject: [PATCH 09/34] feat(box): add session workspace quota enforcement and SDK quota metadata --- src/langbot_plugin/box/backend.py | 3 ++- src/langbot_plugin/box/models.py | 10 ++++++++++ src/langbot_plugin/box/nsjail_backend.py | 4 +++- src/langbot_plugin/box/runtime.py | 4 +++- 4 files changed, 18 insertions(+), 3 deletions(-) diff --git a/src/langbot_plugin/box/backend.py b/src/langbot_plugin/box/backend.py index 40b6a67..47bcafb 100644 --- a/src/langbot_plugin/box/backend.py +++ b/src/langbot_plugin/box/backend.py @@ -130,7 +130,7 @@ async def start_session(self, spec: BoxSpec) -> BoxSessionInfo: f'image={spec.image} network={spec.network.value} ' f'host_path={spec.host_path} host_path_mode={spec.host_path_mode.value} mount_path={spec.mount_path} ' f'cpus={spec.cpus} memory_mb={spec.memory_mb} pids_limit={spec.pids_limit} ' - f'read_only_rootfs={spec.read_only_rootfs}' + f'read_only_rootfs={spec.read_only_rootfs} workspace_quota_mb={spec.workspace_quota_mb}' ) await self._run_command(args, timeout_sec=30, check=True) @@ -148,6 +148,7 @@ async def start_session(self, spec: BoxSpec) -> BoxSessionInfo: memory_mb=spec.memory_mb, pids_limit=spec.pids_limit, read_only_rootfs=spec.read_only_rootfs, + workspace_quota_mb=spec.workspace_quota_mb, created_at=now, last_used_at=now, ) diff --git a/src/langbot_plugin/box/models.py b/src/langbot_plugin/box/models.py index 8de58dc..e3d8472 100644 --- a/src/langbot_plugin/box/models.py +++ b/src/langbot_plugin/box/models.py @@ -47,6 +47,7 @@ class BoxSpec(pydantic.BaseModel): memory_mb: int = 512 pids_limit: int = 128 read_only_rootfs: bool = True + workspace_quota_mb: int = 0 @pydantic.model_validator(mode='before') @classmethod @@ -102,6 +103,13 @@ def validate_pids_limit(cls, value: int) -> int: raise ValueError('pids_limit must be at least 1') return value + @pydantic.field_validator('workspace_quota_mb') + @classmethod + def validate_workspace_quota_mb(cls, value: int) -> int: + if value < 0: + raise ValueError('workspace_quota_mb must be greater than or equal to 0') + return value + @pydantic.field_validator('session_id') @classmethod def validate_session_id(cls, value: str) -> str: @@ -162,6 +170,7 @@ class BoxProfile(pydantic.BaseModel): memory_mb: int = 512 pids_limit: int = 128 read_only_rootfs: bool = True + workspace_quota_mb: int = 0 locked: frozenset[str] = frozenset() model_config = pydantic.ConfigDict(frozen=True) @@ -225,6 +234,7 @@ class BoxSessionInfo(pydantic.BaseModel): memory_mb: int = 512 pids_limit: int = 128 read_only_rootfs: bool = True + workspace_quota_mb: int = 0 created_at: dt.datetime last_used_at: dt.datetime diff --git a/src/langbot_plugin/box/nsjail_backend.py b/src/langbot_plugin/box/nsjail_backend.py index 32a3437..52e913e 100644 --- a/src/langbot_plugin/box/nsjail_backend.py +++ b/src/langbot_plugin/box/nsjail_backend.py @@ -136,7 +136,8 @@ async def start_session(self, spec: BoxSpec) -> BoxSessionInfo: f'session_id={spec.session_id} session_dir={session_dir} ' f'network={spec.network.value} ' f'host_path={spec.host_path} host_path_mode={spec.host_path_mode.value} mount_path={spec.mount_path} ' - f'cpus={spec.cpus} memory_mb={spec.memory_mb} pids_limit={spec.pids_limit}' + f'cpus={spec.cpus} memory_mb={spec.memory_mb} pids_limit={spec.pids_limit} ' + f'workspace_quota_mb={spec.workspace_quota_mb}' ) return BoxSessionInfo( @@ -152,6 +153,7 @@ async def start_session(self, spec: BoxSpec) -> BoxSessionInfo: memory_mb=spec.memory_mb, pids_limit=spec.pids_limit, read_only_rootfs=spec.read_only_rootfs, + workspace_quota_mb=spec.workspace_quota_mb, created_at=now, last_used_at=now, ) diff --git a/src/langbot_plugin/box/runtime.py b/src/langbot_plugin/box/runtime.py index b91bc02..e371f07 100644 --- a/src/langbot_plugin/box/runtime.py +++ b/src/langbot_plugin/box/runtime.py @@ -220,7 +220,8 @@ async def _get_or_create_session(self, spec: BoxSpec) -> _RuntimeSession: f'network={info.network.value} ' f'host_path={info.host_path} ' f'host_path_mode={info.host_path_mode.value} ' - f'mount_path={info.mount_path}' + f'mount_path={info.mount_path} ' + f'workspace_quota_mb={info.workspace_quota_mb}' ) return runtime_session @@ -290,6 +291,7 @@ def _assert_session_compatible(self, session: BoxSessionInfo, spec: BoxSpec): 'memory_mb', 'pids_limit', 'read_only_rootfs', + 'workspace_quota_mb', ) for field in _COMPAT_FIELDS: session_val = getattr(session, field) From 120817ad1858f71de85b1ffcd4a3cfed028e248b Mon Sep 17 00:00:00 2001 From: youhuanghe <1051233107@qq.com> Date: Thu, 9 Apr 2026 10:28:56 +0000 Subject: [PATCH 10/34] feat(box): add Windows support for Docker backend Accept Windows-style absolute paths (e.g. C:\Users\...) in host_path validation, and make security path comparison case-insensitive and separator-aware on Windows. Only the Docker backend is supported on Windows (via Docker Desktop); Podman and nsjail remain Linux-only. --- src/langbot_plugin/box/models.py | 4 +++- src/langbot_plugin/box/security.py | 23 +++++++++++++++++++++-- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/src/langbot_plugin/box/models.py b/src/langbot_plugin/box/models.py index e3d8472..aa07b44 100644 --- a/src/langbot_plugin/box/models.py +++ b/src/langbot_plugin/box/models.py @@ -2,6 +2,8 @@ import datetime as dt import enum +import ntpath +import posixpath import pydantic @@ -129,7 +131,7 @@ def validate_host_path(cls, value: str | None) -> str | None: if value is None: return None value = value.strip() - if not value.startswith('/'): + if not (posixpath.isabs(value) or ntpath.isabs(value)): raise ValueError('host_path must be an absolute host path') return value diff --git a/src/langbot_plugin/box/security.py b/src/langbot_plugin/box/security.py index d5a8c51..2e8ed72 100644 --- a/src/langbot_plugin/box/security.py +++ b/src/langbot_plugin/box/security.py @@ -1,11 +1,12 @@ from __future__ import annotations import os +import sys from .errors import BoxValidationError from .models import BoxSpec -BLOCKED_HOST_PATHS = frozenset( +_BLOCKED_HOST_PATHS_POSIX = frozenset( { '/etc', '/proc', @@ -22,6 +23,22 @@ } ) +_BLOCKED_HOST_PATHS_WINDOWS = frozenset( + { + r'C:\Windows', + r'C:\Program Files', + r'C:\Program Files (x86)', + r'C:\ProgramData', + r'\\.\pipe\docker_engine', + } +) + +BLOCKED_HOST_PATHS = ( + _BLOCKED_HOST_PATHS_POSIX | _BLOCKED_HOST_PATHS_WINDOWS + if sys.platform == 'win32' + else _BLOCKED_HOST_PATHS_POSIX +) + def validate_sandbox_security(spec: BoxSpec) -> None: """Validate that a BoxSpec does not request dangerous container config. @@ -30,6 +47,8 @@ def validate_sandbox_security(spec: BoxSpec) -> None: """ if spec.host_path: real = os.path.realpath(spec.host_path) + sep = os.sep + _norm = os.path.normcase for blocked in BLOCKED_HOST_PATHS: - if real == blocked or real.startswith(blocked + '/'): + if _norm(real) == _norm(blocked) or _norm(real).startswith(_norm(blocked) + sep): raise BoxValidationError(f'host_path {spec.host_path} is blocked for security') From 8c71ec5fe3c0aa5ea21709a1a6e5eb72685d9bfd Mon Sep 17 00:00:00 2001 From: Junyan Qin Date: Fri, 17 Apr 2026 23:52:21 +0800 Subject: [PATCH 11/34] refactor(box): merge action RPC and WS relay into single port MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the two-port scheme (5410 relay + 5411 RPC) with a single aiohttp server on port 5410, using path-based routing: /rpc/ws — Action RPC (control channel) /v1/sessions/{id}/managed-process/ws — Managed process stdio relay Add AiohttpWSConnection adapter to bridge aiohttp WebSocketResponse to the SDK Connection interface, keeping Handler/BoxServerHandler unchanged. --- src/langbot_plugin/box/server.py | 110 ++++++++++++++++++++++++------- 1 file changed, 88 insertions(+), 22 deletions(-) diff --git a/src/langbot_plugin/box/server.py b/src/langbot_plugin/box/server.py index 95e5d80..aca5c94 100644 --- a/src/langbot_plugin/box/server.py +++ b/src/langbot_plugin/box/server.py @@ -3,8 +3,12 @@ Usage (stdio, launched by LangBot as subprocess): python -m langbot_plugin.box.server -Usage (ws + ws relay, for remote/docker mode): - python -m langbot_plugin.box.server --port 5410 +Usage (ws, for remote/docker mode): + python -m langbot_plugin.box.server --mode ws --port 5410 + +All WebSocket endpoints share a single port (default 5410): + /rpc/ws — Action RPC (control channel) + /v1/sessions/{session_id}/managed-process/ws — Managed process stdio relay """ from __future__ import annotations @@ -12,6 +16,7 @@ import argparse import asyncio import datetime as dt +import json import logging import sys from typing import Any @@ -20,6 +25,7 @@ from aiohttp import web from langbot_plugin.entities.io.actions.enums import CommonAction +from langbot_plugin.entities.io.errors import ConnectionClosedError from langbot_plugin.entities.io.resp import ActionResponse from langbot_plugin.runtime.io.connection import Connection from langbot_plugin.runtime.io.handler import Handler @@ -41,6 +47,47 @@ def _result_to_dict(result: BoxExecutionResult) -> dict: return result.model_dump(mode='json') +# ── aiohttp WebSocket → Connection adapter ─────────────────────────── + + +class AiohttpWSConnection(Connection): + """Adapt an aiohttp ``WebSocketResponse`` to the SDK ``Connection`` interface. + + This allows ``BoxServerHandler`` (and therefore ``Handler``) to work over + an aiohttp WebSocket without any changes to the handler/IO layer. + """ + + def __init__(self, ws: web.WebSocketResponse) -> None: + self._ws = ws + self._send_lock = asyncio.Lock() + + async def send(self, message: str) -> None: + async with self._send_lock: + try: + await self._ws.send_str(message) + except ConnectionResetError: + raise ConnectionClosedError('Connection closed during send') + + async def receive(self) -> str: + msg = await self._ws.receive() + if msg.type == web.WSMsgType.TEXT: + return msg.data + if msg.type in ( + web.WSMsgType.CLOSE, + web.WSMsgType.CLOSING, + web.WSMsgType.CLOSED, + web.WSMsgType.ERROR, + ): + raise ConnectionClosedError('Connection closed') + raise ConnectionClosedError(f'Unexpected message type: {msg.type}') + + async def close(self) -> None: + await self._ws.close() + + +# ── BoxServerHandler ───────────────────────────────────────────────── + + class BoxServerHandler(Handler): """Server-side handler that registers box actions backed by BoxRuntime.""" @@ -122,7 +169,7 @@ async def shutdown(data: dict[str, Any]) -> ActionResponse: return ActionResponse.success({}) -# ── Managed process WebSocket relay (aiohttp) ──────────────────────── +# ── Managed process WebSocket relay ────────────────────────────────── def _error_response(exc: Exception) -> web.Response: @@ -198,10 +245,31 @@ async def _ws_to_stdin() -> None: return ws -def create_ws_relay_app(runtime: BoxRuntime) -> web.Application: - """Create a minimal aiohttp app that only serves the managed-process ws relay.""" +# ── Action RPC WebSocket handler ───────────────────────────────────── + + +async def handle_rpc_ws(request: web.Request) -> web.StreamResponse: + """Handle action RPC over a single aiohttp WebSocket connection.""" + runtime: BoxRuntime = request.app['runtime'] + + ws = web.WebSocketResponse() + await ws.prepare(request) + + connection = AiohttpWSConnection(ws) + handler = BoxServerHandler(connection, runtime) + await handler.run() + + return ws + + +# ── App factory ────────────────────────────────────────────────────── + + +def create_app(runtime: BoxRuntime) -> web.Application: + """Create the aiohttp app with all WebSocket routes on a single port.""" app = web.Application() app['runtime'] = runtime + app.router.add_get('/rpc/ws', handle_rpc_ws) app.router.add_get('/v1/sessions/{session_id}/managed-process/ws', handle_managed_process_ws) return app @@ -213,38 +281,36 @@ async def _run_server(host: str, port: int, mode: str) -> None: runtime = BoxRuntime(logger=logger) await runtime.initialize() - # Start aiohttp for ws relay (non-fatal — managed process attach - # degrades gracefully if the port is unavailable). + # Start aiohttp — serves managed-process relay and (in ws mode) + # also the action RPC endpoint, all on the same port. runner: web.AppRunner | None = None try: - ws_app = create_ws_relay_app(runtime) + ws_app = create_app(runtime) runner = web.AppRunner(ws_app) await runner.setup() site = web.TCPSite(runner, host, port) await site.start() - logger.info(f'Box ws relay listening on {host}:{port}') + logger.info(f'Box server listening on {host}:{port}') except OSError as exc: - logger.warning(f'Box ws relay failed to bind {host}:{port}: {exc}') + logger.warning(f'Box server failed to bind {host}:{port}: {exc}') logger.warning('Managed process WebSocket attach will be unavailable.') - async def new_connection_callback(connection: Connection) -> None: - handler = BoxServerHandler(connection, runtime) - await handler.run() - try: if mode == 'stdio': from langbot_plugin.runtime.io.controllers.stdio.server import StdioServerController + async def new_connection_callback(connection: Connection) -> None: + handler = BoxServerHandler(connection, runtime) + await handler.run() + ctrl = StdioServerController() await ctrl.run(new_connection_callback) else: - from langbot_plugin.runtime.io.controllers.ws.server import WebSocketServerController - - # Action RPC uses port+1 to avoid conflict with ws relay - rpc_port = port + 1 - logger.info(f'Box action RPC (ws) listening on {host}:{rpc_port}') - ctrl = WebSocketServerController(rpc_port) - await ctrl.run(new_connection_callback) + # In ws mode, action RPC is served via aiohttp on /rpc/ws. + # Keep the server alive until cancelled. + logger.info(f'Box action RPC available at ws://{host}:{port}/rpc/ws') + stop_event = asyncio.Event() + await stop_event.wait() finally: await runtime.shutdown() if runner is not None: @@ -254,7 +320,7 @@ async def new_connection_callback(connection: Connection) -> None: def main(argv: list[str] | None = None) -> None: parser = argparse.ArgumentParser(description='LangBot Box Runtime Service') parser.add_argument('--host', default='0.0.0.0', help='Bind address') - parser.add_argument('--port', type=int, default=5410, help='Bind port (ws relay)') + parser.add_argument('--port', type=int, default=5410, help='Bind port') parser.add_argument( '--mode', choices=['stdio', 'ws'], default='stdio', help='Control channel transport (default: stdio)' ) From 7209d38e889b83836f926a94c777a1931e06a405 Mon Sep 17 00:00:00 2001 From: Junyan Qin Date: Sat, 18 Apr 2026 22:11:06 +0800 Subject: [PATCH 12/34] feat(box): add extra_mounts support to BoxSpec for multi-mount containers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add BoxMountSpec model and extra_mounts field to BoxSpec, allowing multiple bind mounts per container. Docker and nsjail backends iterate extra_mounts to append additional -v / --rw_bind flags at session creation time. Backward compatible — existing single-mount usage is unchanged. --- src/langbot_plugin/box/backend.py | 4 ++++ src/langbot_plugin/box/models.py | 25 ++++++++++++++++++++++++ src/langbot_plugin/box/nsjail_backend.py | 6 ++++++ 3 files changed, 35 insertions(+) diff --git a/src/langbot_plugin/box/backend.py b/src/langbot_plugin/box/backend.py index 47bcafb..7ba35b0 100644 --- a/src/langbot_plugin/box/backend.py +++ b/src/langbot_plugin/box/backend.py @@ -122,6 +122,10 @@ async def start_session(self, spec: BoxSpec) -> BoxSessionInfo: mount_spec = f'{spec.host_path}:{spec.mount_path}:{spec.host_path_mode.value}' args.extend(['-v', mount_spec]) + for mount in spec.extra_mounts: + if mount.mode != BoxHostMountMode.NONE: + args.extend(['-v', f'{mount.host_path}:{mount.mount_path}:{mount.mode.value}']) + args.extend([spec.image, 'sh', '-lc', 'while true; do sleep 3600; done']) self.logger.info( diff --git a/src/langbot_plugin/box/models.py b/src/langbot_plugin/box/models.py index aa07b44..97cfc34 100644 --- a/src/langbot_plugin/box/models.py +++ b/src/langbot_plugin/box/models.py @@ -33,6 +33,30 @@ class BoxManagedProcessStatus(str, enum.Enum): EXITED = 'exited' +class BoxMountSpec(pydantic.BaseModel): + """A single additional bind mount specification.""" + + host_path: str + mount_path: str + mode: BoxHostMountMode = BoxHostMountMode.READ_WRITE + + @pydantic.field_validator('host_path') + @classmethod + def validate_host_path(cls, value: str) -> str: + value = value.strip() + if not (posixpath.isabs(value) or ntpath.isabs(value)): + raise ValueError('host_path must be an absolute host path') + return value + + @pydantic.field_validator('mount_path') + @classmethod + def validate_mount_path(cls, value: str) -> str: + value = value.strip() + if not value.startswith('/'): + raise ValueError('mount_path must be an absolute path inside the sandbox') + return value + + class BoxSpec(pydantic.BaseModel): cmd: str = '' workdir: str = DEFAULT_BOX_MOUNT_PATH @@ -44,6 +68,7 @@ class BoxSpec(pydantic.BaseModel): host_path: str | None = None host_path_mode: BoxHostMountMode = BoxHostMountMode.READ_WRITE mount_path: str = DEFAULT_BOX_MOUNT_PATH + extra_mounts: list[BoxMountSpec] = pydantic.Field(default_factory=list) # Resource limits cpus: float = 1.0 memory_mb: int = 512 diff --git a/src/langbot_plugin/box/nsjail_backend.py b/src/langbot_plugin/box/nsjail_backend.py index 52e913e..900c5d3 100644 --- a/src/langbot_plugin/box/nsjail_backend.py +++ b/src/langbot_plugin/box/nsjail_backend.py @@ -370,6 +370,12 @@ def _build_writable_mounts( workspace_dir = session_dir / 'workspace' args.extend(['--rw_bind', f'{workspace_dir}:{spec.mount_path}']) + for mount in spec.extra_mounts: + if mount.mode == BoxHostMountMode.READ_ONLY: + args.extend(['--bindmount_ro', f'{mount.host_path}:{mount.mount_path}']) + elif mount.mode == BoxHostMountMode.READ_WRITE: + args.extend(['--rw_bind', f'{mount.host_path}:{mount.mount_path}']) + # /tmp and /home are always per-session writable. tmp_dir = session_dir / 'tmp' home_dir = session_dir / 'home' From 529088ebc78d0d2ee7e9427012c731b30abc6dc3 Mon Sep 17 00:00:00 2001 From: Junyan Qin Date: Mon, 20 Apr 2026 22:22:00 +0800 Subject: [PATCH 13/34] feat(box): add shared MCP container support with persistent sessions and multi-process - Add persistent flag to BoxSpec/BoxSessionInfo to keep sessions across shutdowns - Change managed_process to managed_processes dict (keyed by process_id) - Support multiple managed processes per session for shared containers - Skip persistent sessions during shutdown and TTL reaping - Conditionally add --rm flag only for non-persistent containers - Add process_id to RPC handlers and WebSocket relay routes - Update nsjail backend to pass persistent field --- src/langbot_plugin/box/backend.py | 15 ++--- src/langbot_plugin/box/client.py | 13 +++-- src/langbot_plugin/box/models.py | 6 +- src/langbot_plugin/box/nsjail_backend.py | 1 + src/langbot_plugin/box/runtime.py | 74 ++++++++++++++---------- src/langbot_plugin/box/security.py | 2 - src/langbot_plugin/box/server.py | 19 ++++-- 7 files changed, 79 insertions(+), 51 deletions(-) diff --git a/src/langbot_plugin/box/backend.py b/src/langbot_plugin/box/backend.py index 7ba35b0..99e6f7d 100644 --- a/src/langbot_plugin/box/backend.py +++ b/src/langbot_plugin/box/backend.py @@ -95,7 +95,12 @@ async def start_session(self, spec: BoxSpec) -> BoxSessionInfo: self.command, 'run', '-d', - '--rm', + ] + + if not spec.persistent: + args.append('--rm') + + args.extend([ '--name', container_name, '--label', @@ -104,7 +109,7 @@ async def start_session(self, spec: BoxSpec) -> BoxSessionInfo: f'langbot.session_id={spec.session_id}', '--label', f'langbot.box.instance_id={self.instance_id}', - ] + ]) if spec.network == BoxNetworkMode.OFF: args.extend(['--network', 'none']) @@ -148,6 +153,7 @@ async def start_session(self, spec: BoxSpec) -> BoxSessionInfo: host_path=spec.host_path, host_path_mode=spec.host_path_mode, mount_path=spec.mount_path, + persistent=spec.persistent, cpus=spec.cpus, memory_mb=spec.memory_mb, pids_limit=spec.pids_limit, @@ -383,11 +389,6 @@ def _format_cli_error(self, message: str) -> str: return f'{self.name} backend error: {message}' -class PodmanBackend(CLISandboxBackend): - def __init__(self, logger: logging.Logger): - super().__init__(logger=logger, command='podman', backend_name='podman') - - class DockerBackend(CLISandboxBackend): def __init__(self, logger: logging.Logger): super().__init__(logger=logger, command='docker', backend_name='docker') diff --git a/src/langbot_plugin/box/client.py b/src/langbot_plugin/box/client.py index 8b8e17b..9175c59 100644 --- a/src/langbot_plugin/box/client.py +++ b/src/langbot_plugin/box/client.py @@ -50,7 +50,7 @@ async def create_session(self, spec: BoxSpec) -> dict: ... async def start_managed_process(self, session_id: str, spec: BoxManagedProcessSpec) -> BoxManagedProcessInfo: ... @abc.abstractmethod - async def get_managed_process(self, session_id: str) -> BoxManagedProcessInfo: ... + async def get_managed_process(self, session_id: str, process_id: str = 'default') -> BoxManagedProcessInfo: ... @abc.abstractmethod async def get_session(self, session_id: str) -> dict: ... @@ -159,11 +159,14 @@ async def start_managed_process(self, session_id: str, spec: BoxManagedProcessSp ) return BoxManagedProcessInfo.model_validate(data) - async def get_managed_process(self, session_id: str) -> BoxManagedProcessInfo: - data = await self._call(LangBotToBoxAction.GET_MANAGED_PROCESS, {'session_id': session_id}) + async def get_managed_process(self, session_id: str, process_id: str = 'default') -> BoxManagedProcessInfo: + data = await self._call(LangBotToBoxAction.GET_MANAGED_PROCESS, { + 'session_id': session_id, + 'process_id': process_id, + }) return BoxManagedProcessInfo.model_validate(data) - def get_managed_process_websocket_url(self, session_id: str, ws_relay_base_url: str) -> str: + def get_managed_process_websocket_url(self, session_id: str, ws_relay_base_url: str, process_id: str = 'default') -> str: base = ws_relay_base_url if base.startswith('https://'): scheme = 'wss://' @@ -174,4 +177,4 @@ def get_managed_process_websocket_url(self, session_id: str, ws_relay_base_url: else: scheme = 'ws://' suffix = base - return f'{scheme}{suffix}/v1/sessions/{session_id}/managed-process/ws' + return f'{scheme}{suffix}/v1/sessions/{session_id}/managed-process/{process_id}/ws' diff --git a/src/langbot_plugin/box/models.py b/src/langbot_plugin/box/models.py index 97cfc34..fa34e36 100644 --- a/src/langbot_plugin/box/models.py +++ b/src/langbot_plugin/box/models.py @@ -8,7 +8,7 @@ import pydantic -DEFAULT_BOX_IMAGE = 'python:3.11-slim' +DEFAULT_BOX_IMAGE = 'rockchin/langbot-sandbox:latest' DEFAULT_BOX_MOUNT_PATH = '/workspace' @@ -69,6 +69,7 @@ class BoxSpec(pydantic.BaseModel): host_path_mode: BoxHostMountMode = BoxHostMountMode.READ_WRITE mount_path: str = DEFAULT_BOX_MOUNT_PATH extra_mounts: list[BoxMountSpec] = pydantic.Field(default_factory=list) + persistent: bool = False # Resource limits cpus: float = 1.0 memory_mb: int = 512 @@ -257,6 +258,7 @@ class BoxSessionInfo(pydantic.BaseModel): host_path: str | None = None host_path_mode: BoxHostMountMode = BoxHostMountMode.READ_WRITE mount_path: str = DEFAULT_BOX_MOUNT_PATH + persistent: bool = False cpus: float = 1.0 memory_mb: int = 512 pids_limit: int = 128 @@ -267,6 +269,7 @@ class BoxSessionInfo(pydantic.BaseModel): class BoxManagedProcessSpec(pydantic.BaseModel): + process_id: str = 'default' command: str args: list[str] = pydantic.Field(default_factory=list) env: dict[str, str] = pydantic.Field(default_factory=dict) @@ -301,6 +304,7 @@ def validate_cwd(cls, value: str) -> str: class BoxManagedProcessInfo(pydantic.BaseModel): session_id: str + process_id: str = 'default' status: BoxManagedProcessStatus command: str args: list[str] diff --git a/src/langbot_plugin/box/nsjail_backend.py b/src/langbot_plugin/box/nsjail_backend.py index 900c5d3..921c8ff 100644 --- a/src/langbot_plugin/box/nsjail_backend.py +++ b/src/langbot_plugin/box/nsjail_backend.py @@ -154,6 +154,7 @@ async def start_session(self, spec: BoxSpec) -> BoxSessionInfo: pids_limit=spec.pids_limit, read_only_rootfs=spec.read_only_rootfs, workspace_quota_mb=spec.workspace_quota_mb, + persistent=spec.persistent, created_at=now, last_used_at=now, ) diff --git a/src/langbot_plugin/box/runtime.py b/src/langbot_plugin/box/runtime.py index e371f07..8509289 100644 --- a/src/langbot_plugin/box/runtime.py +++ b/src/langbot_plugin/box/runtime.py @@ -7,7 +7,7 @@ import logging import uuid -from .backend import BaseSandboxBackend, DockerBackend, PodmanBackend +from .backend import BaseSandboxBackend, DockerBackend from .nsjail_backend import NsjailBackend from .errors import ( BoxBackendUnavailableError, @@ -51,7 +51,7 @@ def is_running(self) -> bool: class _RuntimeSession: info: BoxSessionInfo lock: asyncio.Lock - managed_process: _ManagedProcess | None = None + managed_processes: dict[str, _ManagedProcess] = dataclasses.field(default_factory=dict) class BoxRuntime: @@ -62,7 +62,7 @@ def __init__( session_ttl_sec: int = 300, ): self.logger = logger - self.backends = backends or [PodmanBackend(logger), DockerBackend(logger), NsjailBackend(logger)] + self.backends = backends or [DockerBackend(logger), NsjailBackend(logger)] self.session_ttl_sec = session_ttl_sec self._backend: BaseSandboxBackend | None = None self._sessions: dict[str, _RuntimeSession] = {} @@ -108,6 +108,9 @@ async def shutdown(self): async with self._lock: session_ids = list(self._sessions.keys()) for session_id in session_ids: + session = self._sessions.get(session_id) + if session is not None and session.info.persistent: + continue await self._drop_session_locked(session_id) async def create_session(self, spec: BoxSpec) -> dict: @@ -127,9 +130,12 @@ async def start_managed_process(self, session_id: str, spec: BoxManagedProcessSp raise BoxSessionNotFoundError(f'session {session_id} not found') async with runtime_session.lock: - existing = runtime_session.managed_process + process_id = spec.process_id + existing = runtime_session.managed_processes.get(process_id) if existing is not None and existing.is_running: - raise BoxManagedProcessConflictError(f'session {session_id} already has a managed process') + raise BoxManagedProcessConflictError( + f'session {session_id} already has a running managed process with process_id={process_id}' + ) backend = await self._get_backend() process = await backend.start_managed_process(runtime_session.info, spec) @@ -140,19 +146,20 @@ async def start_managed_process(self, session_id: str, spec: BoxManagedProcessSp attach_lock=asyncio.Lock(), stderr_chunks=collections.deque(), ) - runtime_session.managed_process = managed_process + runtime_session.managed_processes[process_id] = managed_process runtime_session.info.last_used_at = dt.datetime.now(_UTC) - asyncio.create_task(self._drain_managed_process_stderr(runtime_session.info.session_id, managed_process)) - asyncio.create_task(self._watch_managed_process(runtime_session.info.session_id, managed_process)) - return self._managed_process_to_dict(runtime_session.info.session_id, managed_process) + asyncio.create_task(self._drain_managed_process_stderr(runtime_session.info.session_id, process_id, managed_process)) + asyncio.create_task(self._watch_managed_process(runtime_session.info.session_id, process_id, managed_process)) + return self._managed_process_to_dict(runtime_session.info.session_id, process_id, managed_process) - def get_managed_process(self, session_id: str) -> dict: + def get_managed_process(self, session_id: str, process_id: str = 'default') -> dict: runtime_session = self._sessions.get(session_id) if runtime_session is None: raise BoxSessionNotFoundError(f'session {session_id} not found') - if runtime_session.managed_process is None: - raise BoxManagedProcessNotFoundError(f'session {session_id} has no managed process') - return self._managed_process_to_dict(session_id, runtime_session.managed_process) + managed_process = runtime_session.managed_processes.get(process_id) + if managed_process is None: + raise BoxManagedProcessNotFoundError(f'session {session_id} has no managed process with process_id={process_id}') + return self._managed_process_to_dict(session_id, process_id, managed_process) # ── Observability ───────────────────────────────────────────────── @@ -174,8 +181,11 @@ def get_session(self, session_id: str) -> dict: if runtime_session is None: raise BoxSessionNotFoundError(f'session {session_id} not found') result = self._session_to_dict(runtime_session.info) - if runtime_session.managed_process is not None: - result['managed_process'] = self._managed_process_to_dict(session_id, runtime_session.managed_process) + if runtime_session.managed_processes: + result['managed_processes'] = { + pid: self._managed_process_to_dict(session_id, pid, mp) + for pid, mp in runtime_session.managed_processes.items() + } return result async def get_status(self) -> dict: @@ -186,7 +196,8 @@ async def get_status(self) -> dict: 'managed_processes': sum( 1 for runtime_session in self._sessions.values() - if runtime_session.managed_process is not None and runtime_session.managed_process.is_running + for mp in runtime_session.managed_processes.values() + if mp.is_running ), 'session_ttl_sec': self.session_ttl_sec, } @@ -230,7 +241,7 @@ async def _get_backend(self) -> BaseSandboxBackend: self._backend = await self._select_backend() if self._backend is None: raise BoxBackendUnavailableError( - 'LangBot Box backend unavailable. Install and start Podman, Docker, or nsjail before using exec.' + 'LangBot Box backend unavailable. Install and start Docker or nsjail before using exec.' ) return self._backend @@ -244,7 +255,7 @@ async def _select_backend(self) -> BaseSandboxBackend | None: except Exception as exc: self.logger.warning(f'LangBot Box backend {backend.name} probe failed: {exc}') - self.logger.warning('LangBot Box backend unavailable: no supported backend (Podman, Docker, nsjail) is ready') + self.logger.warning('LangBot Box backend unavailable: no supported backend (Docker, nsjail) is ready') return None async def _reap_expired_sessions_locked(self): @@ -255,8 +266,9 @@ async def _reap_expired_sessions_locked(self): expired_session_ids = [ session_id for session_id, session in self._sessions.items() - if session.info.last_used_at < deadline - and not (session.managed_process is not None and session.managed_process.is_running) + if not session.info.persistent + and session.info.last_used_at < deadline + and not any(mp.is_running for mp in session.managed_processes.values()) ] for session_id in expired_session_ids: @@ -267,7 +279,8 @@ async def _drop_session_locked(self, session_id: str): if runtime_session is None or self._backend is None: return - await self._terminate_managed_process(runtime_session) + for mp in runtime_session.managed_processes.values(): + await self._terminate_managed_process(mp) try: self.logger.info( @@ -287,6 +300,7 @@ def _assert_session_compatible(self, session: BoxSessionInfo, spec: BoxSpec): 'host_path', 'host_path_mode', 'mount_path', + 'persistent', 'cpus', 'memory_mb', 'pids_limit', @@ -302,7 +316,7 @@ def _assert_session_compatible(self, session: BoxSessionInfo, spec: BoxSpec): f'Box session {spec.session_id} already exists with {field}={display}' ) - async def _drain_managed_process_stderr(self, session_id: str, managed_process: _ManagedProcess) -> None: + async def _drain_managed_process_stderr(self, session_id: str, process_id: str, managed_process: _ManagedProcess) -> None: stream = managed_process.process.stderr if stream is None: return @@ -323,22 +337,21 @@ async def _drain_managed_process_stderr(self, session_id: str, managed_process: ): removed = managed_process.stderr_chunks.popleft() managed_process.stderr_total_len -= len(removed) + 1 - self.logger.info(f'LangBot Box managed process stderr: session_id={session_id} {text}') + self.logger.info(f'LangBot Box managed process stderr: session_id={session_id} process_id={process_id} {text}') except Exception as exc: - self.logger.warning(f'Failed to drain managed process stderr for {session_id}: {exc}') + self.logger.warning(f'Failed to drain managed process stderr for {session_id}/{process_id}: {exc}') - async def _watch_managed_process(self, session_id: str, managed_process: _ManagedProcess) -> None: + async def _watch_managed_process(self, session_id: str, process_id: str, managed_process: _ManagedProcess) -> None: return_code = await managed_process.process.wait() managed_process.exit_code = return_code managed_process.exited_at = dt.datetime.now(_UTC) runtime_session = self._sessions.get(session_id) if runtime_session is not None: runtime_session.info.last_used_at = managed_process.exited_at - self.logger.info(f'LangBot Box managed process exited: session_id={session_id} return_code={return_code}') + self.logger.info(f'LangBot Box managed process exited: session_id={session_id} process_id={process_id} return_code={return_code}') - async def _terminate_managed_process(self, runtime_session: _RuntimeSession) -> None: - managed_process = runtime_session.managed_process - if managed_process is None or not managed_process.is_running: + async def _terminate_managed_process(self, managed_process: _ManagedProcess) -> None: + if not managed_process.is_running: return process = managed_process.process @@ -366,11 +379,12 @@ async def _terminate_managed_process(self, runtime_session: _RuntimeSession) -> managed_process.exit_code = process.returncode managed_process.exited_at = dt.datetime.now(_UTC) - def _managed_process_to_dict(self, session_id: str, managed_process: _ManagedProcess) -> dict: + def _managed_process_to_dict(self, session_id: str, process_id: str, managed_process: _ManagedProcess) -> dict: stderr_preview = '\n'.join(managed_process.stderr_chunks) status = BoxManagedProcessStatus.RUNNING if managed_process.is_running else BoxManagedProcessStatus.EXITED return BoxManagedProcessInfo( session_id=session_id, + process_id=process_id, status=status, command=managed_process.spec.command, args=managed_process.spec.args, diff --git a/src/langbot_plugin/box/security.py b/src/langbot_plugin/box/security.py index 2e8ed72..7b3b98e 100644 --- a/src/langbot_plugin/box/security.py +++ b/src/langbot_plugin/box/security.py @@ -18,8 +18,6 @@ '/var/run', '/run/docker.sock', '/var/run/docker.sock', - '/run/podman', - '/var/run/podman', } ) diff --git a/src/langbot_plugin/box/server.py b/src/langbot_plugin/box/server.py index aca5c94..c2bb8f8 100644 --- a/src/langbot_plugin/box/server.py +++ b/src/langbot_plugin/box/server.py @@ -7,8 +7,9 @@ python -m langbot_plugin.box.server --mode ws --port 5410 All WebSocket endpoints share a single port (default 5410): - /rpc/ws — Action RPC (control channel) - /v1/sessions/{session_id}/managed-process/ws — Managed process stdio relay + /rpc/ws — Action RPC (control channel) + /v1/sessions/{session_id}/managed-process/{process_id}/ws — Managed process stdio relay + /v1/sessions/{session_id}/managed-process/ws — Legacy (process_id defaults to 'default') """ from __future__ import annotations @@ -156,7 +157,10 @@ async def start_managed_process(data: dict[str, Any]) -> ActionResponse: @self.action(LangBotToBoxAction.GET_MANAGED_PROCESS) async def get_managed_process(data: dict[str, Any]) -> ActionResponse: - return ActionResponse.success(self._runtime.get_managed_process(data['session_id'])) + return ActionResponse.success(self._runtime.get_managed_process( + data['session_id'], + data.get('process_id', 'default'), + )) @self.action(LangBotToBoxAction.GET_BACKEND_INFO) async def get_backend_info(data: dict[str, Any]) -> ActionResponse: @@ -182,17 +186,18 @@ def _error_response(exc: Exception) -> web.Response: async def handle_managed_process_ws(request: web.Request) -> web.StreamResponse: runtime: BoxRuntime = request.app['runtime'] session_id = request.match_info['session_id'] + process_id = request.match_info.get('process_id', 'default') runtime_session = runtime._sessions.get(session_id) if runtime_session is None: return _error_response(BoxSessionNotFoundError(f'session {session_id} not found')) - managed_process = runtime_session.managed_process + managed_process = runtime_session.managed_processes.get(process_id) if managed_process is None: - return _error_response(BoxManagedProcessNotFoundError(f'session {session_id} has no managed process')) + return _error_response(BoxManagedProcessNotFoundError(f'session {session_id} has no managed process with process_id={process_id}')) if not managed_process.is_running: return _error_response( - BoxManagedProcessConflictError(f'managed process in session {session_id} is not running') + BoxManagedProcessConflictError(f'managed process {process_id} in session {session_id} is not running') ) ws = web.WebSocketResponse(protocols=('mcp',)) @@ -270,6 +275,8 @@ def create_app(runtime: BoxRuntime) -> web.Application: app = web.Application() app['runtime'] = runtime app.router.add_get('/rpc/ws', handle_rpc_ws) + app.router.add_get('/v1/sessions/{session_id}/managed-process/{process_id}/ws', handle_managed_process_ws) + # Backward-compatible route (defaults to process_id='default') app.router.add_get('/v1/sessions/{session_id}/managed-process/ws', handle_managed_process_ws) return app From d593734ed49faa51aa5203b6aee6192906ce006d Mon Sep 17 00:00:00 2001 From: Junyan Qin Date: Mon, 20 Apr 2026 22:40:55 +0800 Subject: [PATCH 14/34] fix: terminate stale managed process on restart instead of raising conflict error --- src/langbot_plugin/box/runtime.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/langbot_plugin/box/runtime.py b/src/langbot_plugin/box/runtime.py index 8509289..292740c 100644 --- a/src/langbot_plugin/box/runtime.py +++ b/src/langbot_plugin/box/runtime.py @@ -11,7 +11,6 @@ from .nsjail_backend import NsjailBackend from .errors import ( BoxBackendUnavailableError, - BoxManagedProcessConflictError, BoxManagedProcessNotFoundError, BoxSessionConflictError, BoxSessionNotFoundError, @@ -133,9 +132,15 @@ async def start_managed_process(self, session_id: str, spec: BoxManagedProcessSp process_id = spec.process_id existing = runtime_session.managed_processes.get(process_id) if existing is not None and existing.is_running: - raise BoxManagedProcessConflictError( - f'session {session_id} already has a running managed process with process_id={process_id}' + # Terminate the stale process before starting a new one. + # This happens when LangBot restarts while the Box runtime + # keeps the persistent session alive. + self.logger.info( + f'LangBot Box terminating stale managed process before restart: ' + f'session_id={session_id} process_id={process_id}' ) + await self._terminate_managed_process(existing) + del runtime_session.managed_processes[process_id] backend = await self._get_backend() process = await backend.start_managed_process(runtime_session.info, spec) From 75b547f927fc966450743e7da09e32be2a66542a Mon Sep 17 00:00:00 2001 From: huanghuoguoguo <1051233107@qq.com> Date: Fri, 24 Apr 2026 09:51:12 +0800 Subject: [PATCH 15/34] feat(box): add E2B sandbox backend support - Add E2BSandboxBackend for E2B cloud and self-hosted CubeSandbox - Add e2b>=2.15 as core dependency - Add INIT action for configuration passing via WebSocket - Runtime creates all backends at init; each backend determines availability via is_available() - Configuration applied to backends by name (config[backend.name] -> backend.configure()) - Support BOX_BACKEND env var for explicit backend override --- pyproject.toml | 1 + src/langbot_plugin/box/actions.py | 1 + src/langbot_plugin/box/client.py | 6 + src/langbot_plugin/box/e2b_backend.py | 319 +++++++++++++++++ src/langbot_plugin/box/runtime.py | 90 ++++- src/langbot_plugin/box/server.py | 5 + tests/box/test_backend_selection.py | 190 ++++++++++ tests/box/test_e2b_backend.py | 480 ++++++++++++++++++++++++++ 8 files changed, 1090 insertions(+), 2 deletions(-) create mode 100644 src/langbot_plugin/box/e2b_backend.py create mode 100644 tests/box/test_backend_selection.py create mode 100644 tests/box/test_e2b_backend.py diff --git a/pyproject.toml b/pyproject.toml index 1f29456..7f0dbb5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,6 +11,7 @@ dependencies = [ "aiofiles>=24.1.0", "aiohttp>=3.9.0", "dotenv>=0.9.9", + "e2b>=2.15", "httpx>=0.28.1", "jinja2>=3.1.6", "pip>=25.2", diff --git a/src/langbot_plugin/box/actions.py b/src/langbot_plugin/box/actions.py index 954c606..9b6d741 100644 --- a/src/langbot_plugin/box/actions.py +++ b/src/langbot_plugin/box/actions.py @@ -8,6 +8,7 @@ class LangBotToBoxAction(ActionType): """Actions sent from LangBot to the Box runtime.""" + INIT = 'box_init' # Initialize with full box config (highest priority) HEALTH = 'box_health' STATUS = 'box_status' EXEC = 'box_exec' diff --git a/src/langbot_plugin/box/client.py b/src/langbot_plugin/box/client.py index 9175c59..9b81e9c 100644 --- a/src/langbot_plugin/box/client.py +++ b/src/langbot_plugin/box/client.py @@ -55,6 +55,9 @@ async def get_managed_process(self, session_id: str, process_id: str = 'default' @abc.abstractmethod async def get_session(self, session_id: str) -> dict: ... + @abc.abstractmethod + async def init(self, config: dict) -> None: ... + def _translate_action_error(exc: Exception) -> BoxError: """Convert an ActionCallError message back into the appropriate BoxError subclass.""" @@ -178,3 +181,6 @@ def get_managed_process_websocket_url(self, session_id: str, ws_relay_base_url: scheme = 'ws://' suffix = base return f'{scheme}{suffix}/v1/sessions/{session_id}/managed-process/{process_id}/ws' + + async def init(self, config: dict) -> None: + await self._call(LangBotToBoxAction.INIT, config) diff --git a/src/langbot_plugin/box/e2b_backend.py b/src/langbot_plugin/box/e2b_backend.py new file mode 100644 index 0000000..be6f15d --- /dev/null +++ b/src/langbot_plugin/box/e2b_backend.py @@ -0,0 +1,319 @@ +from __future__ import annotations + +import datetime as dt +import json +import logging +import os + +from .backend import BaseSandboxBackend, _MAX_RAW_OUTPUT_BYTES +from .errors import BoxError +from .models import ( + BoxExecutionResult, + BoxExecutionStatus, + BoxHostMountMode, + BoxNetworkMode, + BoxSessionInfo, + BoxSpec, +) +from .security import validate_sandbox_security + +# E2B sandbox uses /home/user as the default writable directory +# We map /workspace to /home/user/workspace for compatibility +E2B_DEFAULT_WORKDIR = '/home/user' +E2B_WORKSPACE_DIR = '/home/user/workspace' + +# Lazy imports for e2b - only imported when actually needed +_e2b_available: bool | None = None +_AsyncSandbox = None +_CommandResult = None + + +def _check_e2b_available(force: bool = False) -> bool: + """Check if e2b package is available (cached result). + + Args: + force: If True, re-check even if cached result exists. + """ + global _e2b_available, _AsyncSandbox, _CommandResult + if _e2b_available is not None and not force: + return _e2b_available + + try: + from e2b import AsyncSandbox, CommandResult + + _AsyncSandbox = AsyncSandbox + _CommandResult = CommandResult + _e2b_available = True + except ImportError: + _e2b_available = False + + return _e2b_available + + +def _reset_e2b_cache() -> None: + """Reset the e2b availability cache, forcing re-check on next call.""" + global _e2b_available, _AsyncSandbox, _CommandResult + _e2b_available = None + _AsyncSandbox = None + _CommandResult = None + + +def _adapt_path_for_e2b(path: str) -> str: + """Adapt paths for E2B sandbox environment. + + E2B sandbox doesn't have /workspace by default, so we map it to + /home/user/workspace which is writable. + """ + if path == '/workspace' or path.startswith('/workspace/'): + return path.replace('/workspace', E2B_WORKSPACE_DIR, 1) + return path + + +class E2BSandboxBackend(BaseSandboxBackend): + """E2B/CubeSandbox sandbox backend. + + Supports both E2B cloud service and self-hosted CubeSandbox. + Configuration sources (priority from high to low): + 1. Environment variables: E2B_API_KEY, E2B_API_URL + 2. Configuration passed via configure() method (from LangBot config.yaml) + """ + + name = 'e2b' + + def __init__(self, logger: logging.Logger): + super().__init__(logger) + self._api_key: str | None = None + self._api_url: str | None = None + self._default_template: str | None = None + self._config_from_langbot: dict = {} + + def configure(self, config: dict) -> None: + """Apply configuration from LangBot config.yaml. + + Environment variables take precedence over config.yaml values. + """ + self._config_from_langbot = config + # Reset cache to force re-check if e2b package was installed later + _reset_e2b_cache() + + async def initialize(self): + """Load configuration from environment variables (priority) or config.yaml.""" + # Environment variables take precedence + self._api_key = os.getenv('E2B_API_KEY') or self._config_from_langbot.get('api_key') + self._api_url = os.getenv('E2B_API_URL') or self._config_from_langbot.get('api_url') + self._default_template = self._config_from_langbot.get('default_template') + + async def is_available(self) -> bool: + """Check if E2B backend is available. + + Returns True if: + 1. e2b package is installed + 2. E2B_API_KEY environment variable is set + """ + if not _check_e2b_available(): + self.logger.info('e2b package not installed') + return False + + if not self._api_key: + self.logger.info('E2B_API_KEY not set') + return False + + return True + + async def start_session(self, spec: BoxSpec) -> BoxSessionInfo: + """Create a new E2B sandbox session. + + Maps BoxSpec fields to AsyncSandbox.create() parameters: + - template: spec.image (E2B template ID) + - envs: spec.env + - timeout: sandbox lifetime timeout (not command timeout) + - metadata: CubeSandbox host-mount configuration + """ + validate_sandbox_security(spec) + + if not _check_e2b_available(): + raise BoxError('e2b package not installed') + + now = dt.datetime.now(dt.timezone.utc) + + # Adapt paths for E2B environment + workdir = _adapt_path_for_e2b(spec.workdir) + mount_path = _adapt_path_for_e2b(spec.mount_path) + + # Build create parameters + create_kwargs = {} + + # Template - use spec.image if provided, otherwise default_template, otherwise E2B default + if spec.image and spec.image != 'rockchin/langbot-sandbox:latest': + create_kwargs['template'] = spec.image + elif self._default_template: + create_kwargs['template'] = self._default_template + + # Environment variables + if spec.env: + create_kwargs['envs'] = spec.env + + # API key and domain (for CubeSandbox self-deployment) + if self._api_key: + create_kwargs['api_key'] = self._api_key + if self._api_url: + # E2B SDK uses 'domain' for self-hosted API URL + create_kwargs['domain'] = self._api_url + + # Build metadata for CubeSandbox host-mount + metadata = {} + if spec.host_path and spec.host_path_mode != BoxHostMountMode.NONE: + metadata['host-mount'] = json.dumps([{ + 'hostPath': spec.host_path, + 'mountPath': mount_path, + 'readOnly': spec.host_path_mode == BoxHostMountMode.READ_ONLY, + }]) + if metadata: + create_kwargs['metadata'] = metadata + + # Network mode - E2B uses allow_internet_access parameter + # Note: E2B SDK doesn't have this directly in create(), but CubeSandbox may support it + # For now, we rely on template configuration for network access + + self.logger.info( + f'LangBot Box backend start_session: backend=e2b ' + f'session_id={spec.session_id} ' + f'template={create_kwargs.get("template", "default")} ' + f'network={spec.network.value} ' + f'host_path={spec.host_path} host_path_mode={spec.host_path_mode.value} mount_path={mount_path} ' + f'env_keys={sorted(spec.env.keys())}' + ) + + try: + sandbox = await _AsyncSandbox.create(**create_kwargs) + except Exception as exc: + raise BoxError(f'Failed to create E2B sandbox: {exc}') + + return BoxSessionInfo( + session_id=spec.session_id, + backend_name=self.name, + backend_session_id=sandbox.sandbox_id, + image=spec.image, + network=spec.network, + host_path=spec.host_path, + host_path_mode=spec.host_path_mode, + mount_path=mount_path, + persistent=spec.persistent, + cpus=spec.cpus, + memory_mb=spec.memory_mb, + pids_limit=spec.pids_limit, + read_only_rootfs=spec.read_only_rootfs, + workspace_quota_mb=spec.workspace_quota_mb, + created_at=now, + last_used_at=now, + ) + + async def exec(self, session: BoxSessionInfo, spec: BoxSpec) -> BoxExecutionResult: + """Execute a command in the E2B sandbox. + + Reconnects to existing sandbox via AsyncSandbox.connect() and runs command. + """ + if not _check_e2b_available(): + raise BoxError('e2b package not installed') + + start = dt.datetime.now(dt.timezone.utc) + + # Connect kwargs + connect_kwargs = {} + if self._api_key: + connect_kwargs['api_key'] = self._api_key + if self._api_url: + connect_kwargs['domain'] = self._api_url + + # Adapt workdir for E2B environment (use session's mount_path as base) + workdir = _adapt_path_for_e2b(spec.workdir) + + cmd_preview = spec.cmd.strip() + if len(cmd_preview) > 400: + cmd_preview = f'{cmd_preview[:397]}...' + self.logger.info( + f'LangBot Box backend exec: backend=e2b ' + f'session_id={session.session_id} sandbox_id={session.backend_session_id} ' + f'workdir={workdir} timeout_sec={spec.timeout_sec} ' + f'env_keys={sorted(spec.env.keys())} cmd={cmd_preview}' + ) + + try: + sandbox = await _AsyncSandbox.connect( + sandbox_id=session.backend_session_id, + **connect_kwargs + ) + except Exception as exc: + raise BoxError(f'Failed to connect to E2B sandbox: {exc}') + + # Run the command + # Note: E2B requires workdir to exist before running command + # We create it as part of the command, not via cwd parameter + run_kwargs = { + 'cmd': f'mkdir -p {workdir} && cd {workdir} && {spec.cmd}', + 'timeout': spec.timeout_sec, + } + if spec.env: + run_kwargs['envs'] = spec.env + + try: + result = await sandbox.commands.run(**run_kwargs) + except Exception as exc: + # Check if it's a timeout + duration_ms = int((dt.datetime.now(dt.timezone.utc) - start).total_seconds() * 1000) + error_msg = str(exc) + if 'timeout' in error_msg.lower() or 'timed out' in error_msg.lower(): + return BoxExecutionResult( + session_id=session.session_id, + backend_name=self.name, + status=BoxExecutionStatus.TIMED_OUT, + exit_code=None, + stdout='', + stderr=f'Command timed out after {spec.timeout_sec} seconds.', + duration_ms=duration_ms, + ) + raise BoxError(f'E2B command execution failed: {exc}') + + duration_ms = int((dt.datetime.now(dt.timezone.utc) - start).total_seconds() * 1000) + + # Process output - apply truncation if needed + stdout = self._truncate_output(result.stdout or '') + stderr = self._truncate_output(result.stderr or '') + + return BoxExecutionResult( + session_id=session.session_id, + backend_name=self.name, + status=BoxExecutionStatus.COMPLETED, + exit_code=result.exit_code, + stdout=stdout, + stderr=stderr, + duration_ms=duration_ms, + ) + + async def stop_session(self, session: BoxSessionInfo): + """Kill the E2B sandbox.""" + self.logger.info( + f'LangBot Box backend stop_session: backend=e2b ' + f'session_id={session.session_id} sandbox_id={session.backend_session_id}' + ) + + if not _check_e2b_available(): + return # Nothing to do if package not available + + try: + await _AsyncSandbox.kill( + sandbox_id=session.backend_session_id, + api_key=self._api_key, + domain=self._api_url, + ) + except Exception as exc: + self.logger.warning(f'Failed to kill E2B sandbox: {exc}') + + def _truncate_output(self, output: str, limit: int = _MAX_RAW_OUTPUT_BYTES) -> str: + """Truncate output if exceeds the limit.""" + if len(output.encode('utf-8', errors='replace')) > limit: + # Truncate to approximately the limit + truncated = output[:limit] + truncated += f'\n... [output clipped at {limit} bytes]' + return truncated + return output \ No newline at end of file diff --git a/src/langbot_plugin/box/runtime.py b/src/langbot_plugin/box/runtime.py index 292740c..c25f652 100644 --- a/src/langbot_plugin/box/runtime.py +++ b/src/langbot_plugin/box/runtime.py @@ -4,7 +4,9 @@ import collections import dataclasses import datetime as dt +import json import logging +import os import uuid from .backend import BaseSandboxBackend, DockerBackend @@ -61,14 +63,45 @@ def __init__( session_ttl_sec: int = 300, ): self.logger = logger - self.backends = backends or [DockerBackend(logger), NsjailBackend(logger)] + + # Load configuration from environment variable (passed by LangBot) + self._box_config: dict = {} + config_json = os.getenv('LANGBOT_BOX_CONFIG', '') + if config_json: + try: + self._box_config = json.loads(config_json) + except json.JSONDecodeError: + logger.warning(f'Failed to parse LANGBOT_BOX_CONFIG: {config_json[:100]}') + + # Build backend list + if backends is None: + backends = [ + DockerBackend(logger), + NsjailBackend(logger), + self._create_e2b_backend(logger), + ] + + self.backends = backends self.session_ttl_sec = session_ttl_sec self._backend: BaseSandboxBackend | None = None self._sessions: dict[str, _RuntimeSession] = {} self._lock = asyncio.Lock() self.instance_id = uuid.uuid4().hex[:12] + def _create_e2b_backend(self, logger: logging.Logger) -> 'E2BSandboxBackend | None': + """Create E2B backend if package is installed.""" + try: + from .e2b_backend import E2BSandboxBackend + return E2BSandboxBackend(logger) + except ImportError: + logger.debug('e2b package not installed, E2B backend unavailable') + return None + async def initialize(self): + # Apply configuration from env var to all backends + if self._box_config: + self._apply_config_to_backends(self._box_config) + self._backend = await self._select_backend() if self._backend is not None: self._backend.instance_id = self.instance_id @@ -77,6 +110,23 @@ async def initialize(self): except Exception as exc: self.logger.warning(f'LangBot Box orphan container cleanup failed: {exc}') + def init(self, config: dict) -> None: + """Initialize with full box configuration from LangBot. + + Called via RPC (INIT action) when connecting over WebSocket. + """ + self._box_config.update(config) + self._apply_config_to_backends(config) + + def _apply_config_to_backends(self, config: dict) -> None: + """Apply configuration sections to corresponding backends.""" + for backend in self.backends: + if backend is None: + continue + backend_config = config.get(backend.name, {}) + if backend_config and hasattr(backend, 'configure'): + backend.configure(backend_config) + async def execute(self, spec: BoxSpec) -> BoxExecutionResult: if not spec.cmd: raise BoxValidationError('cmd must not be empty') @@ -251,7 +301,43 @@ async def _get_backend(self) -> BaseSandboxBackend: return self._backend async def _select_backend(self) -> BaseSandboxBackend | None: + # Check for explicit backend override via BOX_BACKEND env var + box_backend_env = os.getenv('BOX_BACKEND') + if box_backend_env: + # Find the specified backend + for backend in self.backends: + if backend is None: + continue + if backend.name == box_backend_env: + try: + await backend.initialize() + if await backend.is_available(): + self.logger.info(f'LangBot Box using backend (forced): {backend.name}') + return backend + else: + self.logger.error( + f'LangBot Box backend {backend.name} is not available ' + f'(BOX_BACKEND={box_backend_env})' + ) + return None + except Exception as exc: + self.logger.error( + f'LangBot Box backend {backend.name} probe failed: {exc} ' + f'(BOX_BACKEND={box_backend_env})' + ) + return None + # Backend name not found + available_names = [b.name for b in self.backends if b is not None] + self.logger.error( + f'LangBot Box backend "{box_backend_env}" not found ' + f'(available: {available_names})' + ) + return None + + # Auto-detect: select first available backend for backend in self.backends: + if backend is None: + continue try: await backend.initialize() if await backend.is_available(): @@ -260,7 +346,7 @@ async def _select_backend(self) -> BaseSandboxBackend | None: except Exception as exc: self.logger.warning(f'LangBot Box backend {backend.name} probe failed: {exc}') - self.logger.warning('LangBot Box backend unavailable: no supported backend (Docker, nsjail) is ready') + self.logger.warning('LangBot Box backend unavailable: no supported backend (Docker, nsjail, E2B) is ready') return None async def _reap_expired_sessions_locked(self): diff --git a/src/langbot_plugin/box/server.py b/src/langbot_plugin/box/server.py index c2bb8f8..36971a4 100644 --- a/src/langbot_plugin/box/server.py +++ b/src/langbot_plugin/box/server.py @@ -167,6 +167,11 @@ async def get_backend_info(data: dict[str, Any]) -> ActionResponse: info = await self._runtime.get_backend_info() return ActionResponse.success(info) + @self.action(LangBotToBoxAction.INIT) + async def init(data: dict[str, Any]) -> ActionResponse: + self._runtime.init(data) + return ActionResponse.success({'initialized': True}) + @self.action(LangBotToBoxAction.SHUTDOWN) async def shutdown(data: dict[str, Any]) -> ActionResponse: await self._runtime.shutdown() diff --git a/tests/box/test_backend_selection.py b/tests/box/test_backend_selection.py new file mode 100644 index 0000000..ea58fe5 --- /dev/null +++ b/tests/box/test_backend_selection.py @@ -0,0 +1,190 @@ +"""Unit tests for BoxRuntime backend selection mechanism.""" + +from __future__ import annotations + +import logging +from unittest import mock + +import pytest + +from langbot_plugin.box.backend import BaseSandboxBackend +from langbot_plugin.box.runtime import BoxRuntime + + +@pytest.fixture +def logger(): + return logging.getLogger('test.runtime') + + +class MockBackend(BaseSandboxBackend): + """Mock backend for testing.""" + + def __init__(self, logger: logging.Logger, name: str, available: bool = True): + super().__init__(logger) + self.name = name + self._available = available + + async def is_available(self) -> bool: + return self._available + + async def start_session(self, spec): + pass + + async def exec(self, session, spec): + pass + + async def stop_session(self, session): + pass + + +# ── E2B backend creation ──────────────────────────────────────────────── + +def test_e2b_backend_created_if_package_installed(logger): + """E2B backend is created when package is installed.""" + with mock.patch('os.getenv', return_value=''): + runtime = BoxRuntime(logger) + # E2B backend exists (package installed) + e2b_backend = runtime.backends[2] + assert e2b_backend is not None + assert e2b_backend.name == 'e2b' + + +def test_e2b_backend_none_if_package_not_installed(logger): + """E2B backend is None when package is not installed.""" + with ( + mock.patch('os.getenv', return_value=''), + mock.patch.object(BoxRuntime, '_create_e2b_backend', return_value=None), + ): + runtime = BoxRuntime(logger) + # Third backend is None (package not installed) + assert runtime.backends[2] is None + # Filtered list for selection + active_backends = [b for b in runtime.backends if b is not None] + assert len(active_backends) == 2 + + +def test_e2b_import_failure_returns_none(logger): + """Import failure for e2b package returns None, not fatal.""" + with mock.patch('os.getenv', return_value=''): + # _create_e2b_backend handles ImportError internally + runtime = BoxRuntime(logger) + # Should have Docker, nsjail, and E2B (if package installed) or None + active_backends = [b for b in runtime.backends if b is not None] + assert len(active_backends) >= 2 + + +# ── BOX_BACKEND environment variable ─────────────────────────────────── + +@pytest.mark.anyio +async def test_box_backend_forces_specific_backend(logger): + """BOX_BACKEND env var forces selection of named backend.""" + backend_e2b = MockBackend(logger, 'e2b', available=True) + backend_docker = MockBackend(logger, 'docker', available=True) + backend_nsjail = MockBackend(logger, 'nsjail', available=False) + + runtime = BoxRuntime(logger, backends=[backend_e2b, backend_docker, backend_nsjail]) + + with mock.patch('os.getenv', side_effect=lambda k: 'docker' if k == 'BOX_BACKEND' else None): + selected = await runtime._select_backend() + + assert selected.name == 'docker' + assert selected is backend_docker + + +@pytest.mark.anyio +async def test_box_backend_unavailable_returns_none(logger): + """When BOX_BACKEND specifies unavailable backend, returns None.""" + backend_e2b = MockBackend(logger, 'e2b', available=False) + backend_docker = MockBackend(logger, 'docker', available=True) + + runtime = BoxRuntime(logger, backends=[backend_e2b, backend_docker]) + + with mock.patch('os.getenv', side_effect=lambda k: 'e2b' if k == 'BOX_BACKEND' else None): + selected = await runtime._select_backend() + + assert selected is None + + +@pytest.mark.anyio +async def test_box_backend_not_found_returns_none(logger): + """When BOX_BACKEND specifies unknown backend name, returns None.""" + backend_docker = MockBackend(logger, 'docker', available=True) + + runtime = BoxRuntime(logger, backends=[backend_docker]) + + with mock.patch('os.getenv', side_effect=lambda k: 'unknown' if k == 'BOX_BACKEND' else None): + selected = await runtime._select_backend() + + assert selected is None + + +@pytest.mark.anyio +async def test_box_backend_no_fallback(logger): + """When BOX_BACKEND is set but backend unavailable, does NOT fallback.""" + backend_e2b = MockBackend(logger, 'e2b', available=False) + backend_docker = MockBackend(logger, 'docker', available=True) + + runtime = BoxRuntime(logger, backends=[backend_e2b, backend_docker]) + + with mock.patch('os.getenv', side_effect=lambda k: 'e2b' if k == 'BOX_BACKEND' else None): + selected = await runtime._select_backend() + + # Should return None, not fallback to docker + assert selected is None + + +# ── Auto-detect backend selection ─────────────────────────────────────── + +@pytest.mark.anyio +async def test_auto_detect_first_available(logger): + """Without BOX_BACKEND, selects first available backend.""" + backend_e2b = MockBackend(logger, 'e2b', available=False) + backend_docker = MockBackend(logger, 'docker', available=True) + backend_nsjail = MockBackend(logger, 'nsjail', available=False) + + runtime = BoxRuntime(logger, backends=[backend_e2b, backend_docker, backend_nsjail]) + + with mock.patch('os.getenv', return_value=None): + selected = await runtime._select_backend() + + assert selected.name == 'docker' + + +@pytest.mark.anyio +async def test_auto_detect_none_when_all_unavailable(logger): + """Returns None when all backends are unavailable.""" + backend_docker = MockBackend(logger, 'docker', available=False) + backend_nsjail = MockBackend(logger, 'nsjail', available=False) + + runtime = BoxRuntime(logger, backends=[backend_docker, backend_nsjail]) + + with mock.patch('os.getenv', return_value=None): + selected = await runtime._select_backend() + + assert selected is None + + +# ── Custom backends list ──────────────────────────────────────────────── + +def test_custom_backends_list_preserved(logger): + """Providing custom backends list overrides auto-detection.""" + custom_backend = MockBackend(logger, 'custom', available=True) + + runtime = BoxRuntime(logger, backends=[custom_backend]) + + assert len(runtime.backends) == 1 + assert runtime.backends[0].name == 'custom' + + +@pytest.mark.anyio +async def test_custom_backends_with_box_backend(logger): + """BOX_BACKEND works with custom backends list.""" + backend_a = MockBackend(logger, 'a', available=True) + backend_b = MockBackend(logger, 'b', available=True) + + runtime = BoxRuntime(logger, backends=[backend_a, backend_b]) + + with mock.patch('os.getenv', side_effect=lambda k: 'b' if k == 'BOX_BACKEND' else None): + selected = await runtime._select_backend() + + assert selected.name == 'b' \ No newline at end of file diff --git a/tests/box/test_e2b_backend.py b/tests/box/test_e2b_backend.py new file mode 100644 index 0000000..565674d --- /dev/null +++ b/tests/box/test_e2b_backend.py @@ -0,0 +1,480 @@ +"""Unit tests for E2BSandboxBackend. + +These tests do NOT require e2b package to be installed – they mock the E2B SDK +to verify parameter mapping, session lifecycle, and availability detection. +""" + +from __future__ import annotations + +import json +import logging +from unittest import mock + +import pytest + +from langbot_plugin.box.e2b_backend import ( + E2BSandboxBackend, + _adapt_path_for_e2b, + _check_e2b_available, +) +from langbot_plugin.box.models import ( + BoxExecutionStatus, + BoxHostMountMode, + BoxNetworkMode, + BoxSessionInfo, + BoxSpec, +) + + +@pytest.fixture +def logger(): + return logging.getLogger('test.e2b') + + +@pytest.fixture +def backend(logger): + b = E2BSandboxBackend(logger=logger) + b.instance_id = 'test123' + return b + + +@pytest.fixture +def mock_e2b_module(): + """Mock the e2b module for tests.""" + mock_async_sandbox = mock.MagicMock() + mock_async_sandbox.sandbox_id = 'sandbox-test-123' + + # Mock AsyncSandbox.create + mock_async_sandbox.create = mock.AsyncMock(return_value=mock_async_sandbox) + + # Mock AsyncSandbox.connect + mock_async_sandbox.connect = mock.AsyncMock(return_value=mock_async_sandbox) + + # Mock AsyncSandbox.kill + mock_async_sandbox.kill = mock.AsyncMock(return_value=True) + + # Mock commands.run result + mock_command_result = mock.MagicMock() + mock_command_result.stdout = 'output' + mock_command_result.stderr = '' + mock_command_result.exit_code = 0 + + mock_commands = mock.MagicMock() + mock_commands.run = mock.AsyncMock(return_value=mock_command_result) + mock_async_sandbox.commands = mock_commands + + # Mock the module import + with ( + mock.patch('langbot_plugin.box.e2b_backend._e2b_available', None), + mock.patch('langbot_plugin.box.e2b_backend._AsyncSandbox', None), + mock.patch('langbot_plugin.box.e2b_backend._CommandResult', None), + ): + # Simulate successful import + import langbot_plugin.box.e2b_backend as e2b_backend + e2b_backend._e2b_available = True + e2b_backend._AsyncSandbox = mock_async_sandbox + yield mock_async_sandbox + + +# ── Path adaptation ──────────────────────────────────────────────────── + +def test_adapt_path_workspace(): + """_adapt_path_for_e2b maps /workspace to /home/user/workspace.""" + assert _adapt_path_for_e2b('/workspace') == '/home/user/workspace' + assert _adapt_path_for_e2b('/workspace/subdir') == '/home/user/workspace/subdir' + + +def test_adapt_path_other_paths_unchanged(): + """_adapt_path_for_e2b doesn't modify paths not starting with /workspace.""" + assert _adapt_path_for_e2b('/home/user') == '/home/user' + assert _adapt_path_for_e2b('/tmp') == '/tmp' + assert _adapt_path_for_e2b('/code') == '/code' + + +# ── is_available ────────────────────────────────────────────────────── + +@pytest.mark.anyio +async def test_is_available_no_package(backend): + """is_available returns False when e2b package is not installed.""" + with mock.patch('langbot_plugin.box.e2b_backend._check_e2b_available', return_value=False): + assert await backend.is_available() is False + + +@pytest.mark.anyio +async def test_is_available_no_api_key(backend): + """is_available returns False when E2B_API_KEY is not set.""" + backend._api_key = None + with mock.patch('langbot_plugin.box.e2b_backend._check_e2b_available', return_value=True): + assert await backend.is_available() is False + + +@pytest.mark.anyio +async def test_is_available_with_api_key(backend): + """is_available returns True when both package and API key are available.""" + backend._api_key = 'test-api-key' + with mock.patch('langbot_plugin.box.e2b_backend._check_e2b_available', return_value=True): + assert await backend.is_available() is True + + +@pytest.mark.anyio +async def test_configure_from_langbot(backend, mock_e2b_module): + """configure() applies settings from LangBot config.yaml.""" + backend.configure({ + 'api_key': 'config-api-key', + 'api_url': 'http://127.0.0.1:3000', + 'default_template': 'python-3.11', + }) + await backend.initialize() + + # Environment variable takes precedence, so if not set, use config + assert backend._api_key == 'config-api-key' + assert backend._api_url == 'http://127.0.0.1:3000' + assert backend._default_template == 'python-3.11' + + +@pytest.mark.anyio +async def test_env_vars_override_config(backend, mock_e2b_module): + """Environment variables take precedence over config.yaml values.""" + with mock.patch.dict('os.environ', {'E2B_API_KEY': 'env-api-key', 'E2B_API_URL': 'http://env-url'}): + backend.configure({ + 'api_key': 'config-api-key', + 'api_url': 'http://config-url', + }) + await backend.initialize() + + # Environment variables should win + assert backend._api_key == 'env-api-key' + assert backend._api_url == 'http://env-url' + + +# ── start_session ───────────────────────────────────────────────────── + +@pytest.mark.anyio +async def test_start_session_basic(backend, mock_e2b_module): + """start_session creates sandbox with default parameters.""" + backend._api_key = 'test-api-key' + spec = BoxSpec(session_id='sess1', cmd='echo hi') + + info = await backend.start_session(spec) + + assert info.backend_name == 'e2b' + assert info.session_id == 'sess1' + assert info.backend_session_id == 'sandbox-test-123' + # Path should be adapted + assert info.mount_path == '/home/user/workspace' + + # Verify AsyncSandbox.create was called with api_key + mock_e2b_module.create.assert_called_once() + call_kwargs = mock_e2b_module.create.call_args.kwargs + assert call_kwargs.get('api_key') == 'test-api-key' + + +@pytest.mark.anyio +async def test_start_session_with_template(backend, mock_e2b_module): + """start_session passes template parameter when image is specified.""" + backend._api_key = 'test-api-key' + spec = BoxSpec( + session_id='sess2', + cmd='python script.py', + image='python-3.11', + ) + + info = await backend.start_session(spec) + + assert info.image == 'python-3.11' + + # Verify template was passed + call_kwargs = mock_e2b_module.create.call_args.kwargs + assert call_kwargs.get('template') == 'python-3.11' + + +@pytest.mark.anyio +async def test_start_session_with_envs(backend, mock_e2b_module): + """start_session passes environment variables.""" + backend._api_key = 'test-api-key' + spec = BoxSpec( + session_id='sess3', + cmd='echo $FOO', + env={'FOO': 'bar', 'DEBUG': '1'}, + ) + + info = await backend.start_session(spec) + + call_kwargs = mock_e2b_module.create.call_args.kwargs + assert call_kwargs.get('envs') == {'FOO': 'bar', 'DEBUG': '1'} + + +@pytest.mark.anyio +async def test_start_session_with_api_url(backend, mock_e2b_module): + """start_session passes domain for CubeSandbox self-deployment.""" + backend._api_key = 'dummy' + backend._api_url = 'http://127.0.0.1:3000' + spec = BoxSpec(session_id='sess4', cmd='ls') + + info = await backend.start_session(spec) + + call_kwargs = mock_e2b_module.create.call_args.kwargs + assert call_kwargs.get('domain') == 'http://127.0.0.1:3000' + + +@pytest.mark.anyio +async def test_start_session_custom_mount_path(backend, mock_e2b_module): + """start_session adapts custom mount_path.""" + backend._api_key = 'test-api-key' + spec = BoxSpec( + session_id='sess5', + cmd='ls', + mount_path='/workspace/myproject', + ) + + info = await backend.start_session(spec) + + # Path should be adapted + assert info.mount_path == '/home/user/workspace/myproject' + + +# ── CubeSandbox host-mount metadata ─────────────────────────────────── + +@pytest.mark.anyio +async def test_start_session_host_mount_rw(backend, mock_e2b_module): + """host_path with rw mode generates correct metadata.""" + backend._api_key = 'test-api-key' + spec = BoxSpec( + session_id='sess-hp-rw', + cmd='ls', + host_path='/data/project', + host_path_mode=BoxHostMountMode.READ_WRITE, + mount_path='/workspace', + ) + + info = await backend.start_session(spec) + + call_kwargs = mock_e2b_module.create.call_args.kwargs + metadata = call_kwargs.get('metadata', {}) + + assert 'host-mount' in metadata + host_mount = json.loads(metadata['host-mount']) + assert len(host_mount) == 1 + assert host_mount[0]['hostPath'] == '/data/project' + # mountPath should be adapted + assert host_mount[0]['mountPath'] == '/home/user/workspace' + assert host_mount[0]['readOnly'] is False + + +@pytest.mark.anyio +async def test_start_session_host_mount_ro(backend, mock_e2b_module): + """host_path with ro mode generates readOnly=True in metadata.""" + backend._api_key = 'test-api-key' + spec = BoxSpec( + session_id='sess-hp-ro', + cmd='cat file.txt', + host_path='/data/source', + host_path_mode=BoxHostMountMode.READ_ONLY, + mount_path='/src', # Non-workspace path stays unchanged + ) + + info = await backend.start_session(spec) + + call_kwargs = mock_e2b_module.create.call_args.kwargs + metadata = call_kwargs.get('metadata', {}) + + host_mount = json.loads(metadata['host-mount']) + assert host_mount[0]['readOnly'] is True + # Non-workspace path stays unchanged + assert host_mount[0]['mountPath'] == '/src' + + +@pytest.mark.anyio +async def test_start_session_no_host_mount_when_none(backend, mock_e2b_module): + """host_path_mode=none skips host-mount metadata.""" + backend._api_key = 'test-api-key' + spec = BoxSpec( + session_id='sess-hp-none', + cmd='ls', + host_path='/data', + host_path_mode=BoxHostMountMode.NONE, + ) + + info = await backend.start_session(spec) + + call_kwargs = mock_e2b_module.create.call_args.kwargs + assert 'host-mount' not in call_kwargs.get('metadata', {}) + + +@pytest.mark.anyio +async def test_start_session_no_host_mount_when_empty(backend, mock_e2b_module): + """Empty host_path skips host-mount metadata.""" + backend._api_key = 'test-api-key' + spec = BoxSpec(session_id='sess-no-hp', cmd='ls') + + info = await backend.start_session(spec) + + call_kwargs = mock_e2b_module.create.call_args.kwargs + assert 'metadata' not in call_kwargs or 'host-mount' not in call_kwargs.get('metadata', {}) + + +# ── exec ────────────────────────────────────────────────────────────── + +@pytest.mark.anyio +async def test_exec_success(backend, mock_e2b_module): + """exec runs command and returns result.""" + backend._api_key = 'test-api-key' + + session = BoxSessionInfo( + session_id='exec-sess', + backend_name='e2b', + backend_session_id='sandbox-123', + image='base', + network=BoxNetworkMode.OFF, + created_at='2024-01-01T00:00:00+00:00', + last_used_at='2024-01-01T00:00:00+00:00', + ) + spec = BoxSpec(session_id='exec-sess', cmd='echo hello', workdir='/workspace', env={'FOO': 'bar'}) + + result = await backend.exec(session, spec) + + assert result.status == BoxExecutionStatus.COMPLETED + assert result.exit_code == 0 + assert result.stdout == 'output' + + # Verify connect and run were called + mock_e2b_module.connect.assert_called_once() + mock_e2b_module.commands.run.assert_called_once() + + # Verify command includes path adaptation + run_kwargs = mock_e2b_module.commands.run.call_args.kwargs + assert '/home/user/workspace' in run_kwargs['cmd'] + + +@pytest.mark.anyio +async def test_exec_timeout(backend, mock_e2b_module): + """exec handles timeout correctly.""" + backend._api_key = 'test-api-key' + + # Mock timeout error + mock_e2b_module.commands.run = mock.AsyncMock( + side_effect=Exception('Command timed out after 30 seconds') + ) + + session = BoxSessionInfo( + session_id='timeout-sess', + backend_name='e2b', + backend_session_id='sandbox-456', + image='base', + network=BoxNetworkMode.OFF, + created_at='2024-01-01T00:00:00+00:00', + last_used_at='2024-01-01T00:00:00+00:00', + ) + spec = BoxSpec(session_id='timeout-sess', cmd='sleep 100', timeout_sec=30) + + result = await backend.exec(session, spec) + + assert result.status == BoxExecutionStatus.TIMED_OUT + assert result.exit_code is None + assert 'timed out' in result.stderr.lower() + + +@pytest.mark.anyio +async def test_exec_truncates_large_output(backend, mock_e2b_module): + """exec truncates output exceeding the limit.""" + backend._api_key = 'test-api-key' + + # Create large output (over 1MB) + large_output = 'x' * (2 * 1024 * 1024) # 2MB + mock_command_result = mock.MagicMock() + mock_command_result.stdout = large_output + mock_command_result.stderr = '' + mock_command_result.exit_code = 0 + + mock_commands = mock.MagicMock() + mock_commands.run = mock.AsyncMock(return_value=mock_command_result) + mock_e2b_module.commands = mock_commands + + session = BoxSessionInfo( + session_id='truncate-sess', + backend_name='e2b', + backend_session_id='sandbox-789', + image='base', + network=BoxNetworkMode.OFF, + created_at='2024-01-01T00:00:00+00:00', + last_used_at='2024-01-01T00:00:00+00:00', + ) + spec = BoxSpec(session_id='truncate-sess', cmd='cat large_file') + + result = await backend.exec(session, spec) + + assert 'clipped' in result.stdout + + +# ── stop_session ────────────────────────────────────────────────────── + +@pytest.mark.anyio +async def test_stop_session(backend, mock_e2b_module): + """stop_session kills the sandbox.""" + backend._api_key = 'test-api-key' + + session = BoxSessionInfo( + session_id='stop-sess', + backend_name='e2b', + backend_session_id='sandbox-to-kill', + image='base', + network=BoxNetworkMode.OFF, + created_at='2024-01-01T00:00:00+00:00', + last_used_at='2024-01-01T00:00:00+00:00', + ) + + await backend.stop_session(session) + + # Verify AsyncSandbox.kill was called + mock_e2b_module.kill.assert_called_once() + + +@pytest.mark.anyio +async def test_stop_session_handles_error(backend, mock_e2b_module): + """stop_session logs error but doesn't raise on kill failure.""" + backend._api_key = 'test-api-key' + + mock_e2b_module.kill = mock.AsyncMock(side_effect=Exception('Sandbox not found')) + + session = BoxSessionInfo( + session_id='stop-fail', + backend_name='e2b', + backend_session_id='sandbox-missing', + image='base', + network=BoxNetworkMode.OFF, + created_at='2024-01-01T00:00:00+00:00', + last_used_at='2024-01-01T00:00:00+00:00', + ) + + # Should not raise + await backend.stop_session(session) + + +# ── _check_e2b_available ────────────────────────────────────────────── + +def test_check_e2b_available_caches_result(): + """_check_e2b_available caches the import check result.""" + # Reset the cache + import langbot_plugin.box.e2b_backend as e2b_backend + e2b_backend._e2b_available = None + + # First call + with mock.patch.dict('sys.modules', {'e2b': mock.MagicMock()}): + result1 = _check_e2b_available() + + # Second call should use cached result + result2 = _check_e2b_available() + + assert result1 == result2 + + +def test_check_e2b_available_returns_false_on_import_error(): + """_check_e2b_available returns False when import fails.""" + import langbot_plugin.box.e2b_backend as e2b_backend + e2b_backend._e2b_available = None + e2b_backend._AsyncSandbox = None + + with mock.patch('builtins.__import__', side_effect=ImportError('No e2b')): + result = _check_e2b_available() + + assert result is False \ No newline at end of file From eefdea452e3e4813c81593cfac7bb619038a2751 Mon Sep 17 00:00:00 2001 From: Junyan Qin Date: Fri, 1 May 2026 23:21:42 +0800 Subject: [PATCH 16/34] refactor(box): align with LangBot's restructured box config - E2B backend: rename config key 'default_template' -> 'template'; rename internal attribute _default_template -> _template. - Runtime backend selection: honor top-level box.backend config field, with BOX_BACKEND env var still taking precedence. 'local' fans out to docker -> nsjail; specific names ('docker'/'nsjail'/'e2b') match exactly. --- src/langbot_plugin/box/e2b_backend.py | 10 ++-- src/langbot_plugin/box/runtime.py | 79 ++++++++++++++------------- tests/box/test_e2b_backend.py | 4 +- 3 files changed, 49 insertions(+), 44 deletions(-) diff --git a/src/langbot_plugin/box/e2b_backend.py b/src/langbot_plugin/box/e2b_backend.py index be6f15d..178b4a3 100644 --- a/src/langbot_plugin/box/e2b_backend.py +++ b/src/langbot_plugin/box/e2b_backend.py @@ -84,7 +84,7 @@ def __init__(self, logger: logging.Logger): super().__init__(logger) self._api_key: str | None = None self._api_url: str | None = None - self._default_template: str | None = None + self._template: str | None = None self._config_from_langbot: dict = {} def configure(self, config: dict) -> None: @@ -101,7 +101,7 @@ async def initialize(self): # Environment variables take precedence self._api_key = os.getenv('E2B_API_KEY') or self._config_from_langbot.get('api_key') self._api_url = os.getenv('E2B_API_URL') or self._config_from_langbot.get('api_url') - self._default_template = self._config_from_langbot.get('default_template') + self._template = self._config_from_langbot.get('template') async def is_available(self) -> bool: """Check if E2B backend is available. @@ -143,11 +143,11 @@ async def start_session(self, spec: BoxSpec) -> BoxSessionInfo: # Build create parameters create_kwargs = {} - # Template - use spec.image if provided, otherwise default_template, otherwise E2B default + # Template - use spec.image if provided, otherwise configured template, otherwise E2B default if spec.image and spec.image != 'rockchin/langbot-sandbox:latest': create_kwargs['template'] = spec.image - elif self._default_template: - create_kwargs['template'] = self._default_template + elif self._template: + create_kwargs['template'] = self._template # Environment variables if spec.env: diff --git a/src/langbot_plugin/box/runtime.py b/src/langbot_plugin/box/runtime.py index c25f652..abdcceb 100644 --- a/src/langbot_plugin/box/runtime.py +++ b/src/langbot_plugin/box/runtime.py @@ -300,52 +300,57 @@ async def _get_backend(self) -> BaseSandboxBackend: ) return self._backend + # Backends grouped under each top-level box.backend choice. + # 'local' picks the first available local container backend (docker → nsjail). + _LOCAL_BACKEND_NAMES = ('docker', 'nsjail') + async def _select_backend(self) -> BaseSandboxBackend | None: - # Check for explicit backend override via BOX_BACKEND env var - box_backend_env = os.getenv('BOX_BACKEND') - if box_backend_env: - # Find the specified backend - for backend in self.backends: - if backend is None: - continue - if backend.name == box_backend_env: - try: - await backend.initialize() - if await backend.is_available(): - self.logger.info(f'LangBot Box using backend (forced): {backend.name}') - return backend - else: - self.logger.error( - f'LangBot Box backend {backend.name} is not available ' - f'(BOX_BACKEND={box_backend_env})' - ) - return None - except Exception as exc: - self.logger.error( - f'LangBot Box backend {backend.name} probe failed: {exc} ' - f'(BOX_BACKEND={box_backend_env})' - ) - return None - # Backend name not found - available_names = [b.name for b in self.backends if b is not None] - self.logger.error( - f'LangBot Box backend "{box_backend_env}" not found ' - f'(available: {available_names})' - ) - return None + # Backend override priority: BOX_BACKEND env var > box.backend config. + # Accepted values: 'local', 'docker', 'nsjail', 'e2b'. 'local' fans out + # to a list; everything else must match a single backend name exactly. + configured = (self._box_config.get('backend') or '').strip() + forced = (os.getenv('BOX_BACKEND') or configured or '').strip() + source_label = 'BOX_BACKEND' if os.getenv('BOX_BACKEND') else 'box.backend' + + candidates: list[BaseSandboxBackend] + if forced == 'local': + candidates = [ + b for b in self.backends if b is not None and b.name in self._LOCAL_BACKEND_NAMES + ] + if not candidates: + self.logger.error( + f'LangBot Box: no local backend registered ' + f'({source_label}={forced})' + ) + return None + elif forced: + candidates = [b for b in self.backends if b is not None and b.name == forced] + if not candidates: + available_names = [b.name for b in self.backends if b is not None] + self.logger.error( + f'LangBot Box backend "{forced}" not found ' + f'({source_label}={forced}, available: {available_names})' + ) + return None + else: + candidates = [b for b in self.backends if b is not None] - # Auto-detect: select first available backend - for backend in self.backends: - if backend is None: - continue + for backend in candidates: try: await backend.initialize() if await backend.is_available(): - self.logger.info(f'LangBot Box using backend: {backend.name}') + label = f'{backend.name} (forced via {source_label}={forced})' if forced else backend.name + self.logger.info(f'LangBot Box using backend: {label}') return backend except Exception as exc: self.logger.warning(f'LangBot Box backend {backend.name} probe failed: {exc}') + if forced: + self.logger.error( + f'LangBot Box backend "{forced}" probed but not available ' + f'({source_label}={forced})' + ) + self.logger.warning('LangBot Box backend unavailable: no supported backend (Docker, nsjail, E2B) is ready') return None diff --git a/tests/box/test_e2b_backend.py b/tests/box/test_e2b_backend.py index 565674d..d98969a 100644 --- a/tests/box/test_e2b_backend.py +++ b/tests/box/test_e2b_backend.py @@ -122,14 +122,14 @@ async def test_configure_from_langbot(backend, mock_e2b_module): backend.configure({ 'api_key': 'config-api-key', 'api_url': 'http://127.0.0.1:3000', - 'default_template': 'python-3.11', + 'template': 'python-3.11', }) await backend.initialize() # Environment variable takes precedence, so if not set, use config assert backend._api_key == 'config-api-key' assert backend._api_url == 'http://127.0.0.1:3000' - assert backend._default_template == 'python-3.11' + assert backend._template == 'python-3.11' @pytest.mark.anyio From 4a2cf65aac67ad9220481fc7c451b30d178b6348 Mon Sep 17 00:00:00 2001 From: Junyan Qin Date: Sat, 2 May 2026 10:55:28 +0800 Subject: [PATCH 17/34] chore: bump version to 0.3.10 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 7f0dbb5..ed55da6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "langbot-plugin" -version = "0.3.7" +version = "0.3.10" description = "This package contains the SDK, CLI for building plugins for LangBot, plus the runtime for hosting LangBot plugins" readme = "README.md" authors = [ From fafb7a4f2ba98c0ae271ff82d3a30c486a3bc458 Mon Sep 17 00:00:00 2001 From: Junyan Qin Date: Fri, 8 May 2026 18:03:58 +0800 Subject: [PATCH 18/34] fix: default box control mode on windows --- src/langbot_plugin/box/server.py | 15 ++++++++++++--- src/langbot_plugin/cli/__init__.py | 8 +++++--- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/src/langbot_plugin/box/server.py b/src/langbot_plugin/box/server.py index 36971a4..5b6c9d4 100644 --- a/src/langbot_plugin/box/server.py +++ b/src/langbot_plugin/box/server.py @@ -1,6 +1,6 @@ """Standalone Box Runtime service exposing BoxRuntime via action RPC. -Usage (stdio, launched by LangBot as subprocess): +Usage (auto, stdio on Unix/macOS and ws on Windows): python -m langbot_plugin.box.server Usage (ws, for remote/docker mode): @@ -44,6 +44,12 @@ logger = logging.getLogger('langbot.box.server') +def _resolve_control_mode(mode: str) -> str: + if mode == 'auto': + return 'ws' if sys.platform == 'win32' else 'stdio' + return mode + + def _result_to_dict(result: BoxExecutionResult) -> dict: return result.model_dump(mode='json') @@ -334,12 +340,15 @@ def main(argv: list[str] | None = None) -> None: parser.add_argument('--host', default='0.0.0.0', help='Bind address') parser.add_argument('--port', type=int, default=5410, help='Bind port') parser.add_argument( - '--mode', choices=['stdio', 'ws'], default='stdio', help='Control channel transport (default: stdio)' + '--mode', + choices=['auto', 'stdio', 'ws'], + default='auto', + help='Control channel transport (default: auto; ws on Windows, stdio elsewhere)', ) args = parser.parse_args(argv) configure_process_logging(stream=sys.stderr) - asyncio.run(_run_server(args.host, args.port, args.mode)) + asyncio.run(_run_server(args.host, args.port, _resolve_control_mode(args.mode))) if __name__ == '__main__': diff --git a/src/langbot_plugin/cli/__init__.py b/src/langbot_plugin/cli/__init__.py index ff9174c..274ad5c 100644 --- a/src/langbot_plugin/cli/__init__.py +++ b/src/langbot_plugin/cli/__init__.py @@ -36,7 +36,7 @@ box: Run the sandbox box runtime - [--host]: Bind address, default is 0.0.0.0 - [--port]: Bind port for ws relay, default is 5410 - - [--mode]: Control channel transport (stdio or ws), default is stdio + - [--mode]: Control channel transport (auto, stdio, or ws), default is auto """ @@ -133,8 +133,10 @@ def main(): "--port", type=int, default=5410, help="Bind port (ws relay)" ) box_parser.add_argument( - "--mode", choices=["stdio", "ws"], default="stdio", - help="Control channel transport (default: stdio)" + "--mode", + choices=["auto", "stdio", "ws"], + default="auto", + help="Control channel transport (default: auto; ws on Windows, stdio elsewhere)", ) args = parser.parse_args() From 57916cdd383a1731726ad599f2efd12ee5ba4edf Mon Sep 17 00:00:00 2001 From: Junyan Qin Date: Fri, 8 May 2026 18:07:55 +0800 Subject: [PATCH 19/34] fix: align box runtime control args --- src/langbot_plugin/box/server.py | 29 ++++++++++++++++------------- src/langbot_plugin/cli/__init__.py | 21 ++++++++++++++++----- 2 files changed, 32 insertions(+), 18 deletions(-) diff --git a/src/langbot_plugin/box/server.py b/src/langbot_plugin/box/server.py index 5b6c9d4..0ec5939 100644 --- a/src/langbot_plugin/box/server.py +++ b/src/langbot_plugin/box/server.py @@ -1,10 +1,10 @@ """Standalone Box Runtime service exposing BoxRuntime via action RPC. -Usage (auto, stdio on Unix/macOS and ws on Windows): +Usage (ws, standalone/manual mode): python -m langbot_plugin.box.server -Usage (ws, for remote/docker mode): - python -m langbot_plugin.box.server --mode ws --port 5410 +Usage (stdio, launched by LangBot as subprocess): + python -m langbot_plugin.box.server --stdio-control All WebSocket endpoints share a single port (default 5410): /rpc/ws — Action RPC (control channel) @@ -44,12 +44,6 @@ logger = logging.getLogger('langbot.box.server') -def _resolve_control_mode(mode: str) -> str: - if mode == 'auto': - return 'ws' if sys.platform == 'win32' else 'stdio' - return mode - - def _result_to_dict(result: BoxExecutionResult) -> dict: return result.model_dump(mode='json') @@ -338,17 +332,26 @@ async def new_connection_callback(connection: Connection) -> None: def main(argv: list[str] | None = None) -> None: parser = argparse.ArgumentParser(description='LangBot Box Runtime Service') parser.add_argument('--host', default='0.0.0.0', help='Bind address') - parser.add_argument('--port', type=int, default=5410, help='Bind port') + parser.add_argument('--ws-control-port', type=int, default=5410, help='The port for control connection') + parser.add_argument( + '--port', + type=int, + dest='ws_control_port', + help=argparse.SUPPRESS, + ) + parser.add_argument('--stdio-control', action='store_true', help='Use stdio for control connection') parser.add_argument( '--mode', choices=['auto', 'stdio', 'ws'], - default='auto', - help='Control channel transport (default: auto; ws on Windows, stdio elsewhere)', + help=argparse.SUPPRESS, ) args = parser.parse_args(argv) + stdio_control = args.stdio_control or args.mode == 'stdio' + control_mode = 'stdio' if stdio_control else 'ws' + configure_process_logging(stream=sys.stderr) - asyncio.run(_run_server(args.host, args.port, _resolve_control_mode(args.mode))) + asyncio.run(_run_server(args.host, args.ws_control_port, control_mode)) if __name__ == '__main__': diff --git a/src/langbot_plugin/cli/__init__.py b/src/langbot_plugin/cli/__init__.py index 274ad5c..f0cf4e6 100644 --- a/src/langbot_plugin/cli/__init__.py +++ b/src/langbot_plugin/cli/__init__.py @@ -35,8 +35,8 @@ - [--ws-debug-port]: The port for debug connection box: Run the sandbox box runtime - [--host]: Bind address, default is 0.0.0.0 - - [--port]: Bind port for ws relay, default is 5410 - - [--mode]: Control channel transport (auto, stdio, or ws), default is auto + - [--stdio-control]: Use stdio for control connection + - [--ws-control-port]: The port for control connection, default is 5410 """ @@ -130,13 +130,24 @@ def main(): "--host", default="0.0.0.0", help="Bind address" ) box_parser.add_argument( - "--port", type=int, default=5410, help="Bind port (ws relay)" + "--stdio-control", action="store_true", help="Use stdio for control connection" + ) + box_parser.add_argument( + "--ws-control-port", + type=int, + default=5410, + help="The port for control connection", + ) + box_parser.add_argument( + "--port", + type=int, + dest="ws_control_port", + help=argparse.SUPPRESS, ) box_parser.add_argument( "--mode", choices=["auto", "stdio", "ws"], - default="auto", - help="Control channel transport (default: auto; ws on Windows, stdio elsewhere)", + help=argparse.SUPPRESS, ) args = parser.parse_args() From 5029d9c7e7184110f74b160f79ee65c6592e6899 Mon Sep 17 00:00:00 2001 From: Junyan Qin Date: Tue, 12 May 2026 23:40:10 +0800 Subject: [PATCH 20/34] fix(box): apply init config before backend reuse --- src/langbot_plugin/box/runtime.py | 2 ++ tests/box/test_backend_selection.py | 22 +++++++++++++++++++++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/src/langbot_plugin/box/runtime.py b/src/langbot_plugin/box/runtime.py index abdcceb..de92468 100644 --- a/src/langbot_plugin/box/runtime.py +++ b/src/langbot_plugin/box/runtime.py @@ -117,6 +117,8 @@ def init(self, config: dict) -> None: """ self._box_config.update(config) self._apply_config_to_backends(config) + if not self._sessions: + self._backend = None def _apply_config_to_backends(self, config: dict) -> None: """Apply configuration sections to corresponding backends.""" diff --git a/tests/box/test_backend_selection.py b/tests/box/test_backend_selection.py index ea58fe5..b5aaced 100644 --- a/tests/box/test_backend_selection.py +++ b/tests/box/test_backend_selection.py @@ -164,6 +164,26 @@ async def test_auto_detect_none_when_all_unavailable(logger): assert selected is None +@pytest.mark.anyio +async def test_init_config_reselects_backend_before_sessions(logger): + """INIT config from LangBot can change the selected backend.""" + backend_docker = MockBackend(logger, 'docker', available=True) + backend_e2b = MockBackend(logger, 'e2b', available=True) + + runtime = BoxRuntime(logger, backends=[backend_docker, backend_e2b]) + + with mock.patch('os.getenv', return_value=None): + await runtime.initialize() + assert runtime._backend is backend_docker + + runtime.init({'backend': 'e2b'}) + assert runtime._backend is None + + selected = await runtime._get_backend() + + assert selected is backend_e2b + + # ── Custom backends list ──────────────────────────────────────────────── def test_custom_backends_list_preserved(logger): @@ -187,4 +207,4 @@ async def test_custom_backends_with_box_backend(logger): with mock.patch('os.getenv', side_effect=lambda k: 'b' if k == 'BOX_BACKEND' else None): selected = await runtime._select_backend() - assert selected.name == 'b' \ No newline at end of file + assert selected.name == 'b' From c6882cff009b214c92b4573e9a21aa647c915e5c Mon Sep 17 00:00:00 2001 From: Junyan Qin Date: Wed, 13 May 2026 00:20:07 +0800 Subject: [PATCH 21/34] fix(box): recreate vanished backend sessions --- src/langbot_plugin/box/backend.py | 17 ++++++++++ src/langbot_plugin/box/runtime.py | 12 +++++++ tests/box/test_backend_selection.py | 49 +++++++++++++++++++++++++++-- 3 files changed, 76 insertions(+), 2 deletions(-) diff --git a/src/langbot_plugin/box/backend.py b/src/langbot_plugin/box/backend.py index 99e6f7d..37ffbe3 100644 --- a/src/langbot_plugin/box/backend.py +++ b/src/langbot_plugin/box/backend.py @@ -62,6 +62,9 @@ async def exec(self, session: BoxSessionInfo, spec: BoxSpec) -> BoxExecutionResu async def stop_session(self, session: BoxSessionInfo): pass + async def is_session_alive(self, session: BoxSessionInfo) -> bool: + return True + async def start_managed_process(self, session: BoxSessionInfo, spec): raise BoxError(f'{self.name} backend does not support managed processes') @@ -224,6 +227,20 @@ async def stop_session(self, session: BoxSessionInfo): check=False, ) + async def is_session_alive(self, session: BoxSessionInfo) -> bool: + result = await self._run_command( + [ + self.command, + 'inspect', + '-f', + '{{.State.Running}}', + session.backend_session_id, + ], + timeout_sec=5, + check=False, + ) + return result.return_code == 0 and result.stdout.strip().lower() == 'true' + async def cleanup_orphaned_containers(self, current_instance_id: str = ''): """Remove langbot.box containers from previous instances. diff --git a/src/langbot_plugin/box/runtime.py b/src/langbot_plugin/box/runtime.py index de92468..b3a2b8a 100644 --- a/src/langbot_plugin/box/runtime.py +++ b/src/langbot_plugin/box/runtime.py @@ -266,6 +266,18 @@ async def _get_or_create_session(self, spec: BoxSpec) -> _RuntimeSession: existing = self._sessions.get(spec.session_id) if existing is not None: self._assert_session_compatible(existing.info, spec) + backend = await self._get_backend() + if not await backend.is_session_alive(existing.info): + self.logger.warning( + 'LangBot Box session backend disappeared, recreating: ' + f'session_id={spec.session_id} ' + f'backend_session_id={existing.info.backend_session_id} ' + f'backend={existing.info.backend_name}' + ) + await self._drop_session_locked(spec.session_id) + existing = None + + if existing is not None: existing.info.last_used_at = dt.datetime.now(_UTC) self.logger.info( 'LangBot Box session reused: ' diff --git a/tests/box/test_backend_selection.py b/tests/box/test_backend_selection.py index b5aaced..48204b6 100644 --- a/tests/box/test_backend_selection.py +++ b/tests/box/test_backend_selection.py @@ -3,11 +3,13 @@ from __future__ import annotations import logging +import datetime as dt from unittest import mock import pytest from langbot_plugin.box.backend import BaseSandboxBackend +from langbot_plugin.box.models import BoxSessionInfo, BoxSpec from langbot_plugin.box.runtime import BoxRuntime @@ -23,18 +25,43 @@ def __init__(self, logger: logging.Logger, name: str, available: bool = True): super().__init__(logger) self.name = name self._available = available + self._alive = True + self.started_sessions = 0 + self.stopped_sessions = 0 async def is_available(self) -> bool: return self._available async def start_session(self, spec): - pass + self.started_sessions += 1 + now = dt.datetime.now(dt.timezone.utc) + return BoxSessionInfo( + session_id=spec.session_id, + backend_name=self.name, + backend_session_id=f'{self.name}-{self.started_sessions}', + image=spec.image, + network=spec.network, + host_path=spec.host_path, + host_path_mode=spec.host_path_mode, + mount_path=spec.mount_path, + persistent=spec.persistent, + cpus=spec.cpus, + memory_mb=spec.memory_mb, + pids_limit=spec.pids_limit, + read_only_rootfs=spec.read_only_rootfs, + workspace_quota_mb=spec.workspace_quota_mb, + created_at=now, + last_used_at=now, + ) async def exec(self, session, spec): pass async def stop_session(self, session): - pass + self.stopped_sessions += 1 + + async def is_session_alive(self, session) -> bool: + return self._alive # ── E2B backend creation ──────────────────────────────────────────────── @@ -184,6 +211,24 @@ async def test_init_config_reselects_backend_before_sessions(logger): assert selected is backend_e2b +@pytest.mark.anyio +async def test_create_session_recreates_disappeared_backend_session(logger): + """A stale in-memory session is dropped if its backend session vanished.""" + backend = MockBackend(logger, 'docker', available=True) + runtime = BoxRuntime(logger, backends=[backend]) + spec = BoxSpec(session_id='mcp-shared', cmd='true', persistent=True, read_only_rootfs=False) + + with mock.patch('os.getenv', return_value=None): + first = await runtime.create_session(spec) + backend._alive = False + second = await runtime.create_session(spec) + + assert first['backend_session_id'] == 'docker-1' + assert second['backend_session_id'] == 'docker-2' + assert backend.started_sessions == 2 + assert backend.stopped_sessions == 1 + + # ── Custom backends list ──────────────────────────────────────────────── def test_custom_backends_list_preserved(logger): From df9c72271a48252d6465ea8e1281248f8e91dae2 Mon Sep 17 00:00:00 2001 From: huanghuoguoguo <1051233107@qq.com> Date: Wed, 13 May 2026 22:12:08 +0800 Subject: [PATCH 22/34] feat(box): expose BoxRuntimeClient and ActionRPCBoxClient in the package --- src/langbot_plugin/box/__init__.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/langbot_plugin/box/__init__.py b/src/langbot_plugin/box/__init__.py index c1ea6e1..5bd86a4 100644 --- a/src/langbot_plugin/box/__init__.py +++ b/src/langbot_plugin/box/__init__.py @@ -1 +1,5 @@ """LangBot Box runtime package.""" + +from .client import BoxRuntimeClient, ActionRPCBoxClient + +__all__ = ['BoxRuntimeClient', 'ActionRPCBoxClient'] From 4ab3502056a716299b3fb28ad91a9dbf67ba5598 Mon Sep 17 00:00:00 2001 From: Junyan Qin Date: Sat, 16 May 2026 17:14:45 +0800 Subject: [PATCH 23/34] feat: add box-managed skill storage --- src/langbot_plugin/box/actions.py | 11 + src/langbot_plugin/box/client.py | 125 +++++ src/langbot_plugin/box/runtime.py | 3 + src/langbot_plugin/box/server.py | 100 ++++ src/langbot_plugin/box/skill_store.py | 639 ++++++++++++++++++++++++++ tests/box/test_skill_store.py | 88 ++++ 6 files changed, 966 insertions(+) create mode 100644 src/langbot_plugin/box/skill_store.py create mode 100644 tests/box/test_skill_store.py diff --git a/src/langbot_plugin/box/actions.py b/src/langbot_plugin/box/actions.py index 9b6d741..5bed42e 100644 --- a/src/langbot_plugin/box/actions.py +++ b/src/langbot_plugin/box/actions.py @@ -19,4 +19,15 @@ class LangBotToBoxAction(ActionType): START_MANAGED_PROCESS = 'box_start_managed_process' GET_MANAGED_PROCESS = 'box_get_managed_process' GET_BACKEND_INFO = 'box_get_backend_info' + LIST_SKILLS = 'box_list_skills' + GET_SKILL = 'box_get_skill' + CREATE_SKILL = 'box_create_skill' + UPDATE_SKILL = 'box_update_skill' + DELETE_SKILL = 'box_delete_skill' + SCAN_SKILL_DIRECTORY = 'box_scan_skill_directory' + LIST_SKILL_FILES = 'box_list_skill_files' + READ_SKILL_FILE = 'box_read_skill_file' + WRITE_SKILL_FILE = 'box_write_skill_file' + PREVIEW_SKILL_ZIP = 'box_preview_skill_zip' + INSTALL_SKILL_ZIP = 'box_install_skill_zip' SHUTDOWN = 'box_shutdown' diff --git a/src/langbot_plugin/box/client.py b/src/langbot_plugin/box/client.py index 9b81e9c..5615752 100644 --- a/src/langbot_plugin/box/client.py +++ b/src/langbot_plugin/box/client.py @@ -58,6 +58,52 @@ async def get_session(self, session_id: str) -> dict: ... @abc.abstractmethod async def init(self, config: dict) -> None: ... + async def list_skills(self) -> list[dict]: + raise NotImplementedError + + async def get_skill(self, name: str) -> dict | None: + raise NotImplementedError + + async def create_skill(self, skill: dict) -> dict: + raise NotImplementedError + + async def update_skill(self, name: str, skill: dict) -> dict: + raise NotImplementedError + + async def delete_skill(self, name: str) -> None: + raise NotImplementedError + + async def scan_skill_directory(self, path: str) -> dict: + raise NotImplementedError + + async def list_skill_files( + self, + name: str, + path: str = '.', + include_hidden: bool = False, + max_entries: int = 200, + ) -> dict: + raise NotImplementedError + + async def read_skill_file(self, name: str, path: str) -> dict: + raise NotImplementedError + + async def write_skill_file(self, name: str, path: str, content: str) -> dict: + raise NotImplementedError + + async def preview_skill_zip(self, file_bytes: bytes, filename: str, source_subdir: str = '') -> list[dict]: + raise NotImplementedError + + async def install_skill_zip( + self, + file_bytes: bytes, + filename: str, + source_paths: list[str] | None = None, + source_path: str = '', + source_subdir: str = '', + ) -> list[dict]: + raise NotImplementedError + def _translate_action_error(exc: Exception) -> BoxError: """Convert an ActionCallError message back into the appropriate BoxError subclass.""" @@ -184,3 +230,82 @@ def get_managed_process_websocket_url(self, session_id: str, ws_relay_base_url: async def init(self, config: dict) -> None: await self._call(LangBotToBoxAction.INIT, config) + + async def list_skills(self) -> list[dict]: + data = await self._call(LangBotToBoxAction.LIST_SKILLS, {}) + return data['skills'] + + async def get_skill(self, name: str) -> dict | None: + data = await self._call(LangBotToBoxAction.GET_SKILL, {'name': name}) + return data.get('skill') + + async def create_skill(self, skill: dict) -> dict: + data = await self._call(LangBotToBoxAction.CREATE_SKILL, {'skill': skill}) + return data['skill'] + + async def update_skill(self, name: str, skill: dict) -> dict: + data = await self._call(LangBotToBoxAction.UPDATE_SKILL, {'name': name, 'skill': skill}) + return data['skill'] + + async def delete_skill(self, name: str) -> None: + await self._call(LangBotToBoxAction.DELETE_SKILL, {'name': name}) + + async def scan_skill_directory(self, path: str) -> dict: + return await self._call(LangBotToBoxAction.SCAN_SKILL_DIRECTORY, {'path': path}) + + async def list_skill_files( + self, + name: str, + path: str = '.', + include_hidden: bool = False, + max_entries: int = 200, + ) -> dict: + return await self._call( + LangBotToBoxAction.LIST_SKILL_FILES, + { + 'name': name, + 'path': path, + 'include_hidden': include_hidden, + 'max_entries': max_entries, + }, + ) + + async def read_skill_file(self, name: str, path: str) -> dict: + return await self._call(LangBotToBoxAction.READ_SKILL_FILE, {'name': name, 'path': path}) + + async def write_skill_file(self, name: str, path: str, content: str) -> dict: + return await self._call( + LangBotToBoxAction.WRITE_SKILL_FILE, + {'name': name, 'path': path, 'content': content}, + ) + + async def preview_skill_zip(self, file_bytes: bytes, filename: str, source_subdir: str = '') -> list[dict]: + file_key = await self.handler.send_file(file_bytes, 'zip') + data = await self._call( + LangBotToBoxAction.PREVIEW_SKILL_ZIP, + {'file_key': file_key, 'filename': filename, 'source_subdir': source_subdir}, + timeout=60.0, + ) + return data['skills'] + + async def install_skill_zip( + self, + file_bytes: bytes, + filename: str, + source_paths: list[str] | None = None, + source_path: str = '', + source_subdir: str = '', + ) -> list[dict]: + file_key = await self.handler.send_file(file_bytes, 'zip') + data = await self._call( + LangBotToBoxAction.INSTALL_SKILL_ZIP, + { + 'file_key': file_key, + 'filename': filename, + 'source_paths': source_paths or [], + 'source_path': source_path, + 'source_subdir': source_subdir, + }, + timeout=120.0, + ) + return data['skills'] diff --git a/src/langbot_plugin/box/runtime.py b/src/langbot_plugin/box/runtime.py index b3a2b8a..b603074 100644 --- a/src/langbot_plugin/box/runtime.py +++ b/src/langbot_plugin/box/runtime.py @@ -27,6 +27,7 @@ BoxSessionInfo, BoxSpec, ) +from .skill_store import BoxSkillStore _UTC = dt.timezone.utc _MANAGED_PROCESS_STDERR_PREVIEW_LIMIT = 4000 @@ -87,6 +88,7 @@ def __init__( self._sessions: dict[str, _RuntimeSession] = {} self._lock = asyncio.Lock() self.instance_id = uuid.uuid4().hex[:12] + self.skill_store = BoxSkillStore(self._box_config) def _create_e2b_backend(self, logger: logging.Logger) -> 'E2BSandboxBackend | None': """Create E2B backend if package is installed.""" @@ -117,6 +119,7 @@ def init(self, config: dict) -> None: """ self._box_config.update(config) self._apply_config_to_backends(config) + self.skill_store.update_config(self._box_config) if not self._sessions: self._backend = None diff --git a/src/langbot_plugin/box/server.py b/src/langbot_plugin/box/server.py index 0ec5939..ebcd89d 100644 --- a/src/langbot_plugin/box/server.py +++ b/src/langbot_plugin/box/server.py @@ -167,6 +167,106 @@ async def get_backend_info(data: dict[str, Any]) -> ActionResponse: info = await self._runtime.get_backend_info() return ActionResponse.success(info) + @self.action(LangBotToBoxAction.LIST_SKILLS) + async def list_skills(data: dict[str, Any]) -> ActionResponse: + return ActionResponse.success({'skills': self._runtime.skill_store.list_skills()}) + + @self.action(LangBotToBoxAction.GET_SKILL) + async def get_skill(data: dict[str, Any]) -> ActionResponse: + skill = self._runtime.skill_store.get_skill(data['name']) + return ActionResponse.success({'skill': skill}) + + @self.action(LangBotToBoxAction.CREATE_SKILL) + async def create_skill(data: dict[str, Any]) -> ActionResponse: + try: + skill = self._runtime.skill_store.create_skill(data['skill']) + except Exception as exc: + return ActionResponse.error(f'BoxValidationError: {exc}') + return ActionResponse.success({'skill': skill}) + + @self.action(LangBotToBoxAction.UPDATE_SKILL) + async def update_skill(data: dict[str, Any]) -> ActionResponse: + try: + skill = self._runtime.skill_store.update_skill(data['name'], data['skill']) + except Exception as exc: + return ActionResponse.error(f'BoxValidationError: {exc}') + return ActionResponse.success({'skill': skill}) + + @self.action(LangBotToBoxAction.DELETE_SKILL) + async def delete_skill(data: dict[str, Any]) -> ActionResponse: + try: + result = self._runtime.skill_store.delete_skill(data['name']) + except Exception as exc: + return ActionResponse.error(f'BoxValidationError: {exc}') + return ActionResponse.success(result) + + @self.action(LangBotToBoxAction.SCAN_SKILL_DIRECTORY) + async def scan_skill_directory(data: dict[str, Any]) -> ActionResponse: + try: + skill = self._runtime.skill_store.scan_directory(data['path']) + except Exception as exc: + return ActionResponse.error(f'BoxValidationError: {exc}') + return ActionResponse.success(skill) + + @self.action(LangBotToBoxAction.LIST_SKILL_FILES) + async def list_skill_files(data: dict[str, Any]) -> ActionResponse: + try: + result = self._runtime.skill_store.list_skill_files( + data['name'], + data.get('path', '.'), + include_hidden=bool(data.get('include_hidden', False)), + max_entries=int(data.get('max_entries', 200)), + ) + except Exception as exc: + return ActionResponse.error(f'BoxValidationError: {exc}') + return ActionResponse.success(result) + + @self.action(LangBotToBoxAction.READ_SKILL_FILE) + async def read_skill_file(data: dict[str, Any]) -> ActionResponse: + try: + result = self._runtime.skill_store.read_skill_file(data['name'], data['path']) + except Exception as exc: + return ActionResponse.error(f'BoxValidationError: {exc}') + return ActionResponse.success(result) + + @self.action(LangBotToBoxAction.WRITE_SKILL_FILE) + async def write_skill_file(data: dict[str, Any]) -> ActionResponse: + try: + result = self._runtime.skill_store.write_skill_file(data['name'], data['path'], data.get('content', '')) + except Exception as exc: + return ActionResponse.error(f'BoxValidationError: {exc}') + return ActionResponse.success(result) + + @self.action(LangBotToBoxAction.PREVIEW_SKILL_ZIP) + async def preview_skill_zip(data: dict[str, Any]) -> ActionResponse: + try: + file_bytes = await self.read_local_file(data['file_key']) + await self.delete_local_file(data['file_key']) + result = self._runtime.skill_store.preview_zip_upload( + file_bytes=file_bytes, + filename=data.get('filename', 'skill.zip'), + source_subdir=data.get('source_subdir') or '', + ) + except Exception as exc: + return ActionResponse.error(f'BoxValidationError: {exc}') + return ActionResponse.success({'skills': result}) + + @self.action(LangBotToBoxAction.INSTALL_SKILL_ZIP) + async def install_skill_zip(data: dict[str, Any]) -> ActionResponse: + try: + file_bytes = await self.read_local_file(data['file_key']) + await self.delete_local_file(data['file_key']) + result = self._runtime.skill_store.install_zip_upload( + file_bytes=file_bytes, + filename=data.get('filename', 'skill.zip'), + source_paths=data.get('source_paths') or [], + source_path=data.get('source_path') or '', + source_subdir=data.get('source_subdir') or '', + ) + except Exception as exc: + return ActionResponse.error(f'BoxValidationError: {exc}') + return ActionResponse.success({'skills': result}) + @self.action(LangBotToBoxAction.INIT) async def init(data: dict[str, Any]) -> ActionResponse: self._runtime.init(data) diff --git a/src/langbot_plugin/box/skill_store.py b/src/langbot_plugin/box/skill_store.py new file mode 100644 index 0000000..2f5e27e --- /dev/null +++ b/src/langbot_plugin/box/skill_store.py @@ -0,0 +1,639 @@ +from __future__ import annotations + +import datetime as dt +import io +import os +import posixpath +import shutil +import tempfile +import zipfile +from pathlib import Path +from typing import Optional + +import yaml + + +_FRONTMATTER_FIELDS = ( + 'name', + 'display_name', + 'description', +) + +_PUBLIC_SKILL_FIELDS = ( + 'name', + 'display_name', + 'description', + 'instructions', + 'package_root', + 'entry_file', + 'created_at', + 'updated_at', +) + + +def parse_frontmatter(content: str) -> tuple[dict, str]: + if not content.startswith('---'): + return {}, content + + lines = content.splitlines(keepends=True) + if not lines or lines[0].strip() != '---': + return {}, content + + for index in range(1, len(lines)): + if lines[index].strip() == '---': + metadata_text = ''.join(lines[1:index]) + instructions = ''.join(lines[index + 1 :]).lstrip('\n') + metadata = yaml.safe_load(metadata_text) or {} + if not isinstance(metadata, dict): + metadata = {} + return metadata, instructions + + return {}, content + + +def build_skill_md(metadata: dict, instructions: str) -> str: + frontmatter = {} + for key in _FRONTMATTER_FIELDS: + value = metadata.get(key) + if value is None: + continue + if isinstance(value, str) and not value.strip(): + continue + frontmatter[key] = value + + if not frontmatter: + return instructions + + frontmatter_text = yaml.dump(frontmatter, default_flow_style=False, allow_unicode=True, sort_keys=False).strip() + return f'---\n{frontmatter_text}\n---\n\n{instructions}' + + +class BoxSkillStore: + """Skill package storage owned by the Box runtime process.""" + + def __init__(self, config: dict | None = None): + self._config = config or {} + + def update_config(self, config: dict) -> None: + self._config = config or {} + + @property + def root(self) -> str: + local_config = self._config.get('local') or {} + host_root = str(local_config.get('host_root') or './data/box').strip() + skills_root = str(local_config.get('skills_root') or 'skills').strip() + + host_root_path = Path(host_root).expanduser() + if not host_root_path.is_absolute(): + host_root_path = Path.cwd() / host_root_path + host_root_path = host_root_path.resolve() + + skills_root_path = Path(skills_root).expanduser() + if not skills_root_path.is_absolute(): + skills_root_path = host_root_path / skills_root_path + return str(skills_root_path.resolve()) + + def list_skills(self) -> list[dict]: + os.makedirs(self.root, exist_ok=True) + skills: list[dict] = [] + for package_root, entry_file in self._discover_skill_directories(self.root, max_depth=6): + try: + skills.append(self._load_skill_package(package_root, entry_file)) + except Exception: + continue + skills.sort(key=lambda item: item.get('updated_at', ''), reverse=True) + return [self._serialize_skill(skill) for skill in skills] + + def get_skill(self, skill_name: str) -> Optional[dict]: + for skill in self.list_skills(): + if skill.get('name') == skill_name: + return skill + return None + + def create_skill(self, data: dict) -> dict: + name = self._validate_skill_name(data.get('name', '')) + if self.get_skill(name): + raise ValueError(f'Skill with name "{name}" already exists') + + package_root = self._normalize_package_root(data.get('package_root', '')) + managed_root = self._managed_skill_path(name) + target_root = managed_root + imported_skill_data: dict | None = None + + if package_root and self._managed_install_root_for_package(package_root): + if not os.path.isdir(package_root): + raise ValueError(f'Directory does not exist: {package_root}') + target_root = package_root + imported_skill_data = self._read_skill_package(target_root) + elif package_root and package_root != managed_root: + if not os.path.isdir(package_root): + raise ValueError(f'Directory does not exist: {package_root}') + if os.path.exists(managed_root): + raise ValueError(f'Skill directory already exists: {managed_root}') + os.makedirs(os.path.dirname(managed_root), exist_ok=True) + shutil.copytree(package_root, managed_root) + imported_skill_data = self._read_skill_package(managed_root) + else: + os.makedirs(managed_root, exist_ok=True) + + metadata = { + 'name': name, + 'display_name': self._resolve_create_field(data, 'display_name', imported_skill_data, default=''), + 'description': self._resolve_create_field(data, 'description', imported_skill_data, default=''), + } + instructions = self._resolve_create_field(data, 'instructions', imported_skill_data, default='') + self._write_skill_md(target_root, metadata, instructions) + + created = self.get_skill(name) + if not created: + raise ValueError(f'Failed to create skill "{name}"') + return created + + def update_skill(self, skill_name: str, data: dict) -> dict: + skill = self.get_skill(skill_name) + if not skill: + raise ValueError(f'Skill "{skill_name}" not found') + + requested_name = str(data.get('name', skill['name']) or skill['name']).strip() + if requested_name != skill['name']: + raise ValueError('Renaming skills is not supported') + + requested_package_root = str(data.get('package_root', '') or '').strip() + existing_package_root = self._normalize_package_root(skill['package_root']) + if requested_package_root and self._normalize_package_root(requested_package_root) != existing_package_root: + raise ValueError('Updating package_root is not supported; recreate the skill to import a different package') + + metadata = { + 'name': skill['name'], + 'display_name': data.get('display_name', skill.get('display_name', '')), + 'description': data.get('description', skill.get('description', '')), + } + instructions = str(data.get('instructions', skill.get('instructions', '')) or '') + self._write_skill_md(skill['package_root'], metadata, instructions) + + updated = self.get_skill(skill_name) + if not updated: + raise ValueError(f'Skill "{skill_name}" not found after update') + return updated + + def delete_skill(self, skill_name: str) -> dict: + skill = self.get_skill(skill_name) + if not skill: + raise ValueError(f'Skill "{skill_name}" not found') + + package_root = self._normalize_package_root(skill['package_root']) + managed_install_root = self._managed_install_root_for_package(package_root) + if not managed_install_root: + raise ValueError('Only managed skills under the Box skills root can be deleted') + + shutil.rmtree(managed_install_root, ignore_errors=True) + return {'deleted': skill_name} + + def scan_directory(self, path: str) -> dict: + if not os.path.isdir(path): + raise ValueError(f'Directory does not exist: {path}') + + discovered = self._discover_skill_directories(path, max_depth=2) + if not discovered: + raise ValueError(f'No SKILL.md found in {path} or its subdirectories (max depth: 2)') + if len(discovered) > 1: + candidates = ', '.join(found_path for found_path, _entry in discovered) + raise ValueError( + f'Multiple skill directories found in {path}. Please choose a more specific path: {candidates}' + ) + + package_root, entry_file = discovered[0] + return self._load_skill_package(package_root, entry_file) + + def list_skill_files( + self, + skill_name: str, + path: str = '.', + include_hidden: bool = False, + max_entries: int = 200, + ) -> dict: + skill = self._require_skill(skill_name) + target_dir, relative_path = self._resolve_skill_path(skill, path, expect_directory=True) + entries: list[dict] = [] + with os.scandir(target_dir) as iterator: + for entry in sorted(iterator, key=lambda item: item.name): + if not include_hidden and entry.name.startswith('.'): + continue + entry_rel_path = entry.name if relative_path in ('', '.') else os.path.join(relative_path, entry.name) + is_dir = entry.is_dir() + entries.append( + { + 'path': entry_rel_path.replace(os.sep, '/'), + 'name': entry.name, + 'is_dir': is_dir, + 'size': None if is_dir else entry.stat().st_size, + } + ) + if len(entries) >= max_entries: + break + + return { + 'skill': {'name': skill['name']}, + 'base_path': '.' if relative_path in ('', '.') else relative_path.replace(os.sep, '/'), + 'entries': entries, + 'truncated': len(entries) >= max_entries, + } + + def read_skill_file(self, skill_name: str, path: str) -> dict: + skill = self._require_skill(skill_name) + target_path, relative_path = self._resolve_skill_path(skill, path, expect_directory=False) + if not os.path.isfile(target_path): + raise ValueError(f'Skill file not found: {relative_path}') + + try: + with open(target_path, 'r', encoding='utf-8') as f: + content = f.read() + except UnicodeDecodeError as exc: + raise ValueError(f'Skill file is not valid UTF-8 text: {relative_path}') from exc + + return { + 'skill': {'name': skill['name']}, + 'path': relative_path.replace(os.sep, '/'), + 'content': content, + } + + def write_skill_file(self, skill_name: str, path: str, content: str) -> dict: + skill = self._require_skill(skill_name) + target_path, relative_path = self._resolve_skill_path(skill, path, expect_directory=False) + os.makedirs(os.path.dirname(target_path), exist_ok=True) + with open(target_path, 'w', encoding='utf-8') as f: + f.write(content) + + return { + 'skill': {'name': skill['name']}, + 'path': relative_path.replace(os.sep, '/'), + 'bytes_written': len(content.encode('utf-8')), + } + + def preview_zip_upload(self, *, file_bytes: bytes, filename: str, source_subdir: str = '') -> list[dict]: + if not file_bytes: + raise ValueError('Uploaded file is empty') + + tmp_dir = tempfile.mkdtemp(prefix='langbot_box_skill_preview_') + try: + skill_root = self._extract_uploaded_skill_to_temp(file_bytes, tmp_dir) + skill_root = self._resolve_source_subdir_root(skill_root, source_subdir) + return self._preview_skill_candidates( + skill_root, + base_target_name=self._uploaded_skill_target_stem(filename), + suffix='upload', + ) + finally: + shutil.rmtree(tmp_dir, ignore_errors=True) + + def install_zip_upload( + self, + *, + file_bytes: bytes, + filename: str, + source_paths: list[str] | None = None, + source_path: str = '', + source_subdir: str = '', + ) -> list[dict]: + if not file_bytes: + raise ValueError('Uploaded file is empty') + + tmp_dir = tempfile.mkdtemp(prefix='langbot_box_skill_upload_') + try: + skill_root = self._extract_uploaded_skill_to_temp(file_bytes, tmp_dir) + skill_root = self._resolve_source_subdir_root(skill_root, source_subdir) + previews = self._preview_skill_candidates( + skill_root, + base_target_name=self._uploaded_skill_target_stem(filename), + suffix='upload', + ) + selected_previews = self._select_preview_candidates( + previews, + {'source_paths': source_paths or [], 'source_path': source_path}, + ) + scanned = self._install_preview_candidates(skill_root, selected_previews) + return [self.get_skill(skill['name']) or self._serialize_skill(skill) for skill in scanned] + finally: + shutil.rmtree(tmp_dir, ignore_errors=True) + + def _require_skill(self, skill_name: str) -> dict: + skill = self.get_skill(skill_name) + if not skill: + raise ValueError(f'Skill "{skill_name}" not found') + return skill + + @staticmethod + def _serialize_skill(skill: dict) -> dict: + return {field: skill.get(field) for field in _PUBLIC_SKILL_FIELDS if field in skill} + + def _load_skill_package(self, package_root: str, entry_file: str = 'SKILL.md') -> dict: + package_root = self._normalize_package_root(package_root) + entry_path = os.path.join(package_root, entry_file) + with open(entry_path, 'r', encoding='utf-8') as f: + content = f.read() + + metadata, instructions = parse_frontmatter(content) + dir_name = os.path.basename(os.path.normpath(package_root)) + stat = os.stat(entry_path) + return { + 'name': str(metadata.get('name') or dir_name).strip(), + 'display_name': str(metadata.get('display_name') or metadata.get('name') or dir_name).strip(), + 'description': str(metadata.get('description') or '').strip(), + 'instructions': instructions, + 'package_root': package_root, + 'entry_file': entry_file, + 'created_at': dt.datetime.fromtimestamp(stat.st_ctime, tz=dt.timezone.utc).isoformat(), + 'updated_at': dt.datetime.fromtimestamp(stat.st_mtime, tz=dt.timezone.utc).isoformat(), + } + + def _read_skill_package(self, package_root: str) -> dict: + entry = self._find_skill_entry(package_root) + if entry is None: + raise ValueError(f'No SKILL.md found in {package_root}') + + skill = self._load_skill_package(entry[0], entry[1]) + return { + 'entry_file': skill.get('entry_file', 'SKILL.md'), + 'display_name': skill.get('display_name', ''), + 'description': skill.get('description', ''), + 'instructions': skill.get('instructions', ''), + } + + def _write_skill_md(self, package_root: str, metadata: dict, instructions: str) -> None: + package_root = self._normalize_package_root(package_root) + os.makedirs(package_root, exist_ok=True) + content = build_skill_md(metadata, instructions) + with open(os.path.join(package_root, 'SKILL.md'), 'w', encoding='utf-8') as f: + f.write(content) + + def _managed_skill_path(self, skill_name: str) -> str: + return self._normalize_package_root(os.path.join(self.root, skill_name)) + + def _managed_install_root_for_package(self, package_root: str) -> str: + managed_root = self._normalize_package_root(self.root) + package_root = self._normalize_package_root(package_root) + if not package_root or package_root == managed_root: + return '' + + prefix = f'{managed_root}{os.sep}' + if not package_root.startswith(prefix): + return '' + + relative = os.path.relpath(package_root, managed_root) + top_level = relative.split(os.sep, 1)[0] + if top_level in ('', '.', '..'): + return '' + return os.path.join(managed_root, top_level) + + def _build_preview_target_dir(self, base_target_name: str, source_path: str, suffix: str) -> str: + relative = str(source_path or '').strip().replace('\\', '/').strip('/') + leaf_name = relative.split('/')[-1] if relative else '' + target_name = base_target_name + if leaf_name and leaf_name != base_target_name: + target_name = f'{base_target_name}-{leaf_name}' + if suffix: + target_name = f'{target_name}-{suffix}' + return os.path.join(self.root, target_name) + + def _preview_skill_candidates(self, root_path: str, *, base_target_name: str, suffix: str) -> list[dict]: + discovered = self._discover_skill_directories(root_path, max_depth=2) + if not discovered: + raise ValueError(f'No SKILL.md found in {root_path} or its subdirectories (max depth: 2)') + + previews: list[dict] = [] + for package_root, entry_file in discovered: + skill = self._load_skill_package(package_root, entry_file) + relative_path = os.path.relpath(package_root, root_path) + if relative_path in ('', '.'): + relative_path = '' + skill['source_path'] = relative_path.replace(os.sep, '/') + skill['package_root'] = self._build_preview_target_dir(base_target_name, relative_path, suffix) + previews.append(skill) + + previews.sort(key=lambda item: item['source_path']) + return [self._serialize_skill_with_source(preview) for preview in previews] + + @staticmethod + def _serialize_skill_with_source(skill: dict) -> dict: + data = BoxSkillStore._serialize_skill(skill) + if 'source_path' in skill: + data['source_path'] = skill['source_path'] + return data + + def _select_preview_candidates(self, previews: list[dict], data: dict) -> list[dict]: + normalized_paths: list[str] = [] + raw_source_paths = data.get('source_paths', []) + if isinstance(raw_source_paths, list): + for source_path in raw_source_paths: + normalized = str(source_path or '').strip().replace('\\', '/').strip('/') + if normalized not in normalized_paths: + normalized_paths.append(normalized) + + legacy_source_path = str(data.get('source_path', '') or '').strip().replace('\\', '/').strip('/') + if legacy_source_path and legacy_source_path not in normalized_paths: + normalized_paths.append(legacy_source_path) + + if len(previews) == 1 and not normalized_paths: + return previews + + if not normalized_paths: + candidates = ', '.join(item['source_path'] or '.' for item in previews) + raise ValueError(f'Multiple skills found. Please choose one or more source_paths: {candidates}') + + selected: list[dict] = [] + available = {preview['source_path']: preview for preview in previews} + for normalized_path in normalized_paths: + preview = available.get(normalized_path) + if preview is None: + candidates = ', '.join(item['source_path'] or '.' for item in previews) + raise ValueError(f'Invalid source_path "{normalized_path}". Available: {candidates}') + selected.append(preview) + + return selected + + def _install_preview_candidates(self, root_path: str, selected_previews: list[dict]) -> list[dict]: + target_dirs: list[str] = [] + for preview in selected_previews: + target_dir = self._normalize_package_root(preview['package_root']) + if target_dir in target_dirs: + raise ValueError(f'Duplicate target directory selected: {target_dir}') + if os.path.exists(target_dir): + raise ValueError(f'Skill directory already exists: {target_dir}') + target_dirs.append(target_dir) + + installed_scans: list[dict] = [] + created_dirs: list[str] = [] + try: + for preview in selected_previews: + target_dir = self._normalize_package_root(preview['package_root']) + source_root = self._preview_source_root(root_path, preview['source_path']) + os.makedirs(os.path.dirname(target_dir), exist_ok=True) + shutil.copytree(source_root, target_dir) + created_dirs.append(target_dir) + installed_scans.append(self.scan_directory(target_dir)) + except Exception: + for target_dir in created_dirs: + shutil.rmtree(target_dir, ignore_errors=True) + raise + + return installed_scans + + def _extract_uploaded_skill_to_temp(self, file_bytes: bytes, tmp_dir: str) -> str: + extract_dir = os.path.join(tmp_dir, 'extracted') + try: + with zipfile.ZipFile(io.BytesIO(file_bytes), 'r') as zf: + self._safe_extract_zip(zf, extract_dir) + except zipfile.BadZipFile as exc: + raise ValueError('Uploaded file must be a valid .zip archive') from exc + + entries = os.listdir(extract_dir) + if len(entries) == 1 and os.path.isdir(os.path.join(extract_dir, entries[0])): + return os.path.join(extract_dir, entries[0]) + return extract_dir + + @staticmethod + def _uploaded_skill_target_stem(filename: str) -> str: + stem = os.path.splitext(os.path.basename(str(filename or '').strip()))[0] + safe_stem = ''.join(ch if ch.isalnum() or ch in ('-', '_') else '-' for ch in stem).strip('-_') + return safe_stem or 'uploaded-skill' + + @staticmethod + def _preview_source_root(root_path: str, source_path: str) -> str: + normalized = str(source_path or '').strip().replace('\\', '/').strip('/') + if not normalized: + return root_path + return os.path.join(root_path, normalized) + + @staticmethod + def _resolve_source_subdir_root(root_path: str, source_subdir: str) -> str: + normalized = str(source_subdir or '').strip().replace('\\', '/').strip('/') + if not normalized: + return root_path + + normalized_path = os.path.normpath(normalized) + if normalized_path.startswith('..') or normalized_path == '..' or os.path.isabs(normalized_path): + raise ValueError('source_subdir must stay within the uploaded archive') + + target_root = os.path.realpath(os.path.join(root_path, normalized_path)) + archive_root = os.path.realpath(root_path) + if target_root != archive_root and not target_root.startswith(f'{archive_root}{os.sep}'): + raise ValueError('source_subdir must stay within the uploaded archive') + if not os.path.isdir(target_root): + raise ValueError(f'source_subdir does not exist in the uploaded archive: {normalized}') + return target_root + + @staticmethod + def _safe_extract_zip(archive: zipfile.ZipFile, target_dir: str) -> None: + target_root = os.path.realpath(target_dir) + os.makedirs(target_root, exist_ok=True) + + for member in archive.infolist(): + member_name = member.filename + if not member_name or member_name.endswith('/'): + continue + + normalized = posixpath.normpath(member_name) + if normalized.startswith('../') or normalized == '..' or os.path.isabs(normalized): + raise ValueError(f'Archive contains an unsafe path: {member_name}') + + destination = os.path.realpath(os.path.join(target_root, normalized)) + if destination != target_root and not destination.startswith(f'{target_root}{os.sep}'): + raise ValueError(f'Archive contains an unsafe path: {member_name}') + + archive.extractall(target_root) + + def _resolve_skill_path(self, skill: dict, path: str, *, expect_directory: bool) -> tuple[str, str]: + package_root = self._normalize_package_root(skill.get('package_root', '')) + if not package_root: + raise ValueError(f'Skill "{skill.get("name", "")}" has no package_root') + + relative_path = str(path or '.').strip() or '.' + if os.path.isabs(relative_path): + raise ValueError('path must be relative to the skill package root') + + normalized_relative = os.path.normpath(relative_path) + if normalized_relative.startswith('..') or normalized_relative == '..': + raise ValueError('path must stay within the skill package root') + + target_path = os.path.realpath(os.path.join(package_root, normalized_relative)) + if target_path != package_root and not target_path.startswith(f'{package_root}{os.sep}'): + raise ValueError('path must stay within the skill package root') + + if expect_directory: + if not os.path.isdir(target_path): + raise ValueError(f'Skill directory not found: {relative_path}') + else: + parent_dir = os.path.dirname(target_path) or package_root + if parent_dir != package_root and not parent_dir.startswith(f'{package_root}{os.sep}'): + raise ValueError('path must stay within the skill package root') + + return target_path, normalized_relative + + @staticmethod + def _find_skill_entry(path: str) -> Optional[tuple[str, str]]: + for candidate in ('SKILL.md', 'skill.md'): + if os.path.isfile(os.path.join(path, candidate)): + return path, candidate + return None + + def _discover_skill_directories(self, root_path: str, max_depth: int = 2) -> list[tuple[str, str]]: + discovered: list[tuple[str, str]] = [] + queue: list[tuple[str, int]] = [(root_path, 0)] + seen: set[str] = set() + + while queue: + current_path, depth = queue.pop(0) + normalized_path = os.path.abspath(current_path) + if normalized_path in seen: + continue + seen.add(normalized_path) + + found = self._find_skill_entry(normalized_path) + if found: + discovered.append(found) + continue + + if depth >= max_depth: + continue + + try: + entries = sorted(os.scandir(normalized_path), key=lambda entry: entry.name) + except OSError: + continue + + for entry in entries: + if entry.is_dir(): + queue.append((entry.path, depth + 1)) + + return discovered + + @staticmethod + def _validate_skill_name(name: str) -> str: + name = str(name or '').strip() + if not name: + raise ValueError('Skill name is required') + if not name.replace('-', '').replace('_', '').isalnum(): + raise ValueError('Skill name can only contain letters, numbers, hyphens and underscores') + if len(name) > 64: + raise ValueError('Skill name cannot exceed 64 characters') + return name + + @staticmethod + def _normalize_package_root(package_root: str) -> str: + package_root = str(package_root).strip() + if not package_root: + return '' + return os.path.realpath(os.path.abspath(package_root)) + + @staticmethod + def _resolve_create_field(data: dict, field: str, imported_skill_data: dict | None, *, default: str) -> str: + raw_value = data.get(field) if field in data else None + if raw_value is None: + if imported_skill_data is not None: + return str(imported_skill_data.get(field, default) or default) + return default + + value = str(raw_value or '') + if imported_skill_data is not None and not value.strip(): + return str(imported_skill_data.get(field, default) or default) + return value diff --git a/tests/box/test_skill_store.py b/tests/box/test_skill_store.py new file mode 100644 index 0000000..dcba96f --- /dev/null +++ b/tests/box/test_skill_store.py @@ -0,0 +1,88 @@ +from __future__ import annotations + +import io +import zipfile + +from langbot_plugin.box.skill_store import BoxSkillStore + + +def _skill_zip(name: str = 'demo') -> bytes: + buffer = io.BytesIO() + with zipfile.ZipFile(buffer, 'w') as zf: + zf.writestr( + f'{name}/SKILL.md', + '---\n' + f'name: {name}\n' + f'display_name: {name.title()}\n' + 'description: Demo skill\n' + '---\n\n' + 'Use this skill for tests.\n', + ) + zf.writestr(f'{name}/notes.txt', 'hello') + return buffer.getvalue() + + +def _nested_skill_zip() -> bytes: + buffer = io.BytesIO() + with zipfile.ZipFile(buffer, 'w') as zf: + zf.writestr( + 'repo/packages/alpha/SKILL.md', + '---\nname: alpha\ndisplay_name: Alpha\n---\n\nAlpha instructions.\n', + ) + zf.writestr( + 'repo/packages/beta/SKILL.md', + '---\nname: beta\ndisplay_name: Beta\n---\n\nBeta instructions.\n', + ) + return buffer.getvalue() + + +def test_skill_store_installs_zip_under_configured_relative_skills_root(tmp_path): + store = BoxSkillStore({ + 'local': { + 'host_root': str(tmp_path), + 'skills_root': 'custom-skills', + } + }) + + preview = store.preview_zip_upload(file_bytes=_skill_zip(), filename='demo.zip') + assert preview[0]['package_root'] == str(tmp_path / 'custom-skills' / 'demo-upload') + + installed = store.install_zip_upload(file_bytes=_skill_zip(), filename='demo.zip') + assert installed[0]['name'] == 'demo' + assert installed[0]['package_root'] == str(tmp_path / 'custom-skills' / 'demo-upload') + + files = store.list_skill_files('demo') + assert {entry['name'] for entry in files['entries']} == {'SKILL.md', 'notes.txt'} + + content = store.read_skill_file('demo', 'notes.txt') + assert content['content'] == 'hello' + + store.write_skill_file('demo', 'notes.txt', 'updated') + assert store.read_skill_file('demo', 'notes.txt')['content'] == 'updated' + + +def test_skill_store_supports_source_subdir_before_selecting_candidates(tmp_path): + store = BoxSkillStore({ + 'local': { + 'host_root': str(tmp_path), + 'skills_root': 'skills', + } + }) + + preview = store.preview_zip_upload( + file_bytes=_nested_skill_zip(), + filename='repo.zip', + source_subdir='packages', + ) + + assert [skill['source_path'] for skill in preview] == ['alpha', 'beta'] + + installed = store.install_zip_upload( + file_bytes=_nested_skill_zip(), + filename='repo.zip', + source_subdir='packages', + source_paths=['beta'], + ) + + assert [skill['name'] for skill in installed] == ['beta'] + assert installed[0]['package_root'] == str(tmp_path / 'skills' / 'repo-beta-upload') From 1aa043f8d4d55a6f07ae998c20515e9d70530be8 Mon Sep 17 00:00:00 2001 From: Junyan Qin Date: Sun, 17 May 2026 23:08:57 +0800 Subject: [PATCH 24/34] feat: support skill zip target suffix --- src/langbot_plugin/box/client.py | 26 +++++++++++++++++++++++--- src/langbot_plugin/box/server.py | 2 ++ src/langbot_plugin/box/skill_store.py | 14 +++++++++++--- 3 files changed, 36 insertions(+), 6 deletions(-) diff --git a/src/langbot_plugin/box/client.py b/src/langbot_plugin/box/client.py index 5615752..c489008 100644 --- a/src/langbot_plugin/box/client.py +++ b/src/langbot_plugin/box/client.py @@ -91,7 +91,13 @@ async def read_skill_file(self, name: str, path: str) -> dict: async def write_skill_file(self, name: str, path: str, content: str) -> dict: raise NotImplementedError - async def preview_skill_zip(self, file_bytes: bytes, filename: str, source_subdir: str = '') -> list[dict]: + async def preview_skill_zip( + self, + file_bytes: bytes, + filename: str, + source_subdir: str = '', + target_suffix: str = 'upload', + ) -> list[dict]: raise NotImplementedError async def install_skill_zip( @@ -101,6 +107,7 @@ async def install_skill_zip( source_paths: list[str] | None = None, source_path: str = '', source_subdir: str = '', + target_suffix: str = 'upload', ) -> list[dict]: raise NotImplementedError @@ -279,11 +286,22 @@ async def write_skill_file(self, name: str, path: str, content: str) -> dict: {'name': name, 'path': path, 'content': content}, ) - async def preview_skill_zip(self, file_bytes: bytes, filename: str, source_subdir: str = '') -> list[dict]: + async def preview_skill_zip( + self, + file_bytes: bytes, + filename: str, + source_subdir: str = '', + target_suffix: str = 'upload', + ) -> list[dict]: file_key = await self.handler.send_file(file_bytes, 'zip') data = await self._call( LangBotToBoxAction.PREVIEW_SKILL_ZIP, - {'file_key': file_key, 'filename': filename, 'source_subdir': source_subdir}, + { + 'file_key': file_key, + 'filename': filename, + 'source_subdir': source_subdir, + 'target_suffix': target_suffix, + }, timeout=60.0, ) return data['skills'] @@ -295,6 +313,7 @@ async def install_skill_zip( source_paths: list[str] | None = None, source_path: str = '', source_subdir: str = '', + target_suffix: str = 'upload', ) -> list[dict]: file_key = await self.handler.send_file(file_bytes, 'zip') data = await self._call( @@ -305,6 +324,7 @@ async def install_skill_zip( 'source_paths': source_paths or [], 'source_path': source_path, 'source_subdir': source_subdir, + 'target_suffix': target_suffix, }, timeout=120.0, ) diff --git a/src/langbot_plugin/box/server.py b/src/langbot_plugin/box/server.py index ebcd89d..f66a858 100644 --- a/src/langbot_plugin/box/server.py +++ b/src/langbot_plugin/box/server.py @@ -246,6 +246,7 @@ async def preview_skill_zip(data: dict[str, Any]) -> ActionResponse: file_bytes=file_bytes, filename=data.get('filename', 'skill.zip'), source_subdir=data.get('source_subdir') or '', + target_suffix=data.get('target_suffix', 'upload'), ) except Exception as exc: return ActionResponse.error(f'BoxValidationError: {exc}') @@ -262,6 +263,7 @@ async def install_skill_zip(data: dict[str, Any]) -> ActionResponse: source_paths=data.get('source_paths') or [], source_path=data.get('source_path') or '', source_subdir=data.get('source_subdir') or '', + target_suffix=data.get('target_suffix', 'upload'), ) except Exception as exc: return ActionResponse.error(f'BoxValidationError: {exc}') diff --git a/src/langbot_plugin/box/skill_store.py b/src/langbot_plugin/box/skill_store.py index 2f5e27e..ad8eb15 100644 --- a/src/langbot_plugin/box/skill_store.py +++ b/src/langbot_plugin/box/skill_store.py @@ -270,7 +270,14 @@ def write_skill_file(self, skill_name: str, path: str, content: str) -> dict: 'bytes_written': len(content.encode('utf-8')), } - def preview_zip_upload(self, *, file_bytes: bytes, filename: str, source_subdir: str = '') -> list[dict]: + def preview_zip_upload( + self, + *, + file_bytes: bytes, + filename: str, + source_subdir: str = '', + target_suffix: str = 'upload', + ) -> list[dict]: if not file_bytes: raise ValueError('Uploaded file is empty') @@ -281,7 +288,7 @@ def preview_zip_upload(self, *, file_bytes: bytes, filename: str, source_subdir: return self._preview_skill_candidates( skill_root, base_target_name=self._uploaded_skill_target_stem(filename), - suffix='upload', + suffix=target_suffix, ) finally: shutil.rmtree(tmp_dir, ignore_errors=True) @@ -294,6 +301,7 @@ def install_zip_upload( source_paths: list[str] | None = None, source_path: str = '', source_subdir: str = '', + target_suffix: str = 'upload', ) -> list[dict]: if not file_bytes: raise ValueError('Uploaded file is empty') @@ -305,7 +313,7 @@ def install_zip_upload( previews = self._preview_skill_candidates( skill_root, base_target_name=self._uploaded_skill_target_stem(filename), - suffix='upload', + suffix=target_suffix, ) selected_previews = self._select_preview_candidates( previews, From bd5c2a658def2b9c037a43e8c07e2cae80c1ebd4 Mon Sep 17 00:00:00 2001 From: Junyan Qin Date: Mon, 18 May 2026 17:26:35 +0800 Subject: [PATCH 25/34] chore: ignore box runtime data --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 7c011af..5498a13 100644 --- a/.gitignore +++ b/.gitignore @@ -174,6 +174,7 @@ cython_debug/ .pypirc /data/plugins/ +/data/box/ /debug/ uv.lock src/.DS_Store From e5617c778522253f1956c299726a5df4d6b1f71f Mon Sep 17 00:00:00 2001 From: huanghuoguoguo <1051233107@qq.com> Date: Mon, 18 May 2026 17:45:19 +0800 Subject: [PATCH 26/34] fix(box): reselect backend for status checks --- src/langbot_plugin/box/runtime.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/langbot_plugin/box/runtime.py b/src/langbot_plugin/box/runtime.py index b603074..91e6869 100644 --- a/src/langbot_plugin/box/runtime.py +++ b/src/langbot_plugin/box/runtime.py @@ -224,6 +224,8 @@ def get_managed_process(self, session_id: str, process_id: str = 'default') -> d # ── Observability ───────────────────────────────────────────────── async def get_backend_info(self) -> dict: + if self._backend is None: + self._backend = await self._select_backend() backend = self._backend if backend is None: return {'name': None, 'available': False} From 0fea9b181786382203d8c2dd1ecd8ba162697e4f Mon Sep 17 00:00:00 2001 From: huanghuoguoguo <1051233107@qq.com> Date: Mon, 18 May 2026 22:29:07 +0800 Subject: [PATCH 27/34] fix(box): sync E2B extra mounts --- src/langbot_plugin/box/e2b_backend.py | 122 ++++++++++++++++++++++++-- 1 file changed, 116 insertions(+), 6 deletions(-) diff --git a/src/langbot_plugin/box/e2b_backend.py b/src/langbot_plugin/box/e2b_backend.py index 178b4a3..46ee031 100644 --- a/src/langbot_plugin/box/e2b_backend.py +++ b/src/langbot_plugin/box/e2b_backend.py @@ -4,6 +4,8 @@ import json import logging import os +import posixpath +import shlex from .backend import BaseSandboxBackend, _MAX_RAW_OUTPUT_BYTES from .errors import BoxError @@ -69,6 +71,11 @@ def _adapt_path_for_e2b(path: str) -> str: return path +def _rewrite_command_paths_for_e2b(command: str) -> str: + """Rewrite LangBot's logical /workspace paths for E2B's real writable path.""" + return command.replace('/workspace', E2B_WORKSPACE_DIR) + + class E2BSandboxBackend(BaseSandboxBackend): """E2B/CubeSandbox sandbox backend. @@ -197,7 +204,11 @@ async def start_session(self, spec: BoxSpec) -> BoxSessionInfo: network=spec.network, host_path=spec.host_path, host_path_mode=spec.host_path_mode, - mount_path=mount_path, + # Keep the logical mount path in session metadata. The runtime + # compares future BoxSpec objects against this value when reusing + # sessions; storing the E2B-internal path here makes every later + # /workspace request look incompatible. + mount_path=spec.mount_path, persistent=spec.persistent, cpus=spec.cpus, memory_mb=spec.memory_mb, @@ -225,8 +236,9 @@ async def exec(self, session: BoxSessionInfo, spec: BoxSpec) -> BoxExecutionResu if self._api_url: connect_kwargs['domain'] = self._api_url - # Adapt workdir for E2B environment (use session's mount_path as base) + # Adapt workdir and logical /workspace command paths for E2B. workdir = _adapt_path_for_e2b(spec.workdir) + command = _rewrite_command_paths_for_e2b(spec.cmd) cmd_preview = spec.cmd.strip() if len(cmd_preview) > 400: @@ -246,11 +258,13 @@ async def exec(self, session: BoxSessionInfo, spec: BoxSpec) -> BoxExecutionResu except Exception as exc: raise BoxError(f'Failed to connect to E2B sandbox: {exc}') + await self._sync_mounts_to_e2b(sandbox, spec) + # Run the command - # Note: E2B requires workdir to exist before running command - # We create it as part of the command, not via cwd parameter + # Note: E2B requires cwd to exist before running command. We create it + # as part of the command and then run from that directory. run_kwargs = { - 'cmd': f'mkdir -p {workdir} && cd {workdir} && {spec.cmd}', + 'cmd': f'mkdir -p {shlex.quote(workdir)} && cd {shlex.quote(workdir)} && {command}', 'timeout': spec.timeout_sec, } if spec.env: @@ -274,6 +288,8 @@ async def exec(self, session: BoxSessionInfo, spec: BoxSpec) -> BoxExecutionResu ) raise BoxError(f'E2B command execution failed: {exc}') + await self._sync_mounts_from_e2b(sandbox, spec) + duration_ms = int((dt.datetime.now(dt.timezone.utc) - start).total_seconds() * 1000) # Process output - apply truncation if needed @@ -290,6 +306,100 @@ async def exec(self, session: BoxSessionInfo, spec: BoxSpec) -> BoxExecutionResu duration_ms=duration_ms, ) + async def _sync_mounts_to_e2b(self, sandbox, spec: BoxSpec) -> None: + """Best-effort upload of all logical mounts into public E2B.""" + if spec.host_path is not None and spec.host_path_mode != BoxHostMountMode.NONE: + await self._sync_host_tree_to_e2b( + sandbox, + host_root=spec.host_path, + remote_root=_adapt_path_for_e2b(spec.mount_path), + ) + + for mount in spec.extra_mounts: + if mount.mode == BoxHostMountMode.NONE: + continue + await self._sync_host_tree_to_e2b( + sandbox, + host_root=mount.host_path, + remote_root=_adapt_path_for_e2b(mount.mount_path), + ) + + async def _sync_mounts_from_e2b(self, sandbox, spec: BoxSpec) -> None: + """Best-effort download of writable E2B mounts into host paths.""" + if spec.host_path is not None and spec.host_path_mode == BoxHostMountMode.READ_WRITE: + await self._sync_e2b_tree_to_host( + sandbox, + remote_root=_adapt_path_for_e2b(spec.mount_path), + host_root=spec.host_path, + ) + + for mount in spec.extra_mounts: + if mount.mode != BoxHostMountMode.READ_WRITE: + continue + await self._sync_e2b_tree_to_host( + sandbox, + remote_root=_adapt_path_for_e2b(mount.mount_path), + host_root=mount.host_path, + ) + + async def _sync_host_tree_to_e2b(self, sandbox, *, host_root: str, remote_root: str) -> None: + """Best-effort sync for public E2B, which has no local bind mounts.""" + if not os.path.isdir(host_root): + return + + for root, dirs, files in os.walk(host_root): + dirs[:] = [d for d in dirs if d not in {'.git', '__pycache__', '.venv', 'node_modules'}] + rel_dir = os.path.relpath(root, host_root) + remote_dir = remote_root if rel_dir == '.' else posixpath.join(remote_root, rel_dir.replace(os.sep, '/')) + try: + await sandbox.commands.run(f'mkdir -p {shlex.quote(remote_dir)}', timeout=10) + except Exception as exc: + self.logger.debug(f'Failed to create E2B sync dir {remote_dir}: {exc}') + continue + + for filename in files: + host_file = os.path.join(root, filename) + try: + if os.path.getsize(host_file) > _MAX_RAW_OUTPUT_BYTES: + continue + with open(host_file, 'rb') as f: + data = f.read() + remote_file = posixpath.join(remote_dir, filename) + await sandbox.files.write(remote_file, data) + except Exception as exc: + self.logger.debug(f'Failed to sync host file to E2B {host_file}: {exc}') + + async def _sync_e2b_tree_to_host(self, sandbox, *, remote_root: str, host_root: str) -> None: + """Best-effort download of an E2B mount into the matching host path.""" + os.makedirs(host_root, exist_ok=True) + try: + entries = await sandbox.files.list(remote_root, depth=16) + except Exception as exc: + self.logger.debug(f'Failed to list E2B mount for sync {remote_root}: {exc}') + return + + for entry in entries: + remote_path = str(getattr(entry, 'path', '') or '') + if not remote_path or remote_path == remote_root or not remote_path.startswith(remote_root + '/'): + continue + rel_path = remote_path[len(remote_root) :].lstrip('/') + real_host_root = os.path.realpath(host_root) + host_path = os.path.realpath(os.path.join(real_host_root, *rel_path.split('/'))) + if not (host_path == real_host_root or host_path.startswith(real_host_root + os.sep)): + continue + + entry_type = getattr(getattr(entry, 'type', None), 'value', '') + try: + if entry_type == 'dir': + os.makedirs(host_path, exist_ok=True) + elif entry_type == 'file': + os.makedirs(os.path.dirname(host_path), exist_ok=True) + data = await sandbox.files.read(remote_path, format='bytes') + with open(host_path, 'wb') as f: + f.write(bytes(data)) + except Exception as exc: + self.logger.debug(f'Failed to sync E2B file to host {remote_path}: {exc}') + async def stop_session(self, session: BoxSessionInfo): """Kill the E2B sandbox.""" self.logger.info( @@ -316,4 +426,4 @@ def _truncate_output(self, output: str, limit: int = _MAX_RAW_OUTPUT_BYTES) -> s truncated = output[:limit] truncated += f'\n... [output clipped at {limit} bytes]' return truncated - return output \ No newline at end of file + return output From 686fcc0d811fcce53927c44ec0e744e9daf4bd45 Mon Sep 17 00:00:00 2001 From: huanghuoguoguo <1051233107@qq.com> Date: Mon, 18 May 2026 22:54:32 +0800 Subject: [PATCH 28/34] fix(box): support installed nsjail CLI --- src/langbot_plugin/box/nsjail_backend.py | 77 +++++++++++++++++------- 1 file changed, 56 insertions(+), 21 deletions(-) diff --git a/src/langbot_plugin/box/nsjail_backend.py b/src/langbot_plugin/box/nsjail_backend.py index 921c8ff..e8eced6 100644 --- a/src/langbot_plugin/box/nsjail_backend.py +++ b/src/langbot_plugin/box/nsjail_backend.py @@ -108,11 +108,12 @@ async def start_session(self, spec: BoxSpec) -> BoxSessionInfo: session_dir = self._base_dir / session_dir_name # Per-session writable directories. + root_dir = session_dir / 'root' workspace_dir = session_dir / 'workspace' tmp_dir = session_dir / 'tmp' home_dir = session_dir / 'home' - for d in (workspace_dir, tmp_dir, home_dir): + for d in (root_dir, workspace_dir, tmp_dir, home_dir): d.mkdir(parents=True, exist_ok=True) # If host_path is specified, we will use it directly instead of the @@ -144,7 +145,10 @@ async def start_session(self, spec: BoxSpec) -> BoxSessionInfo: session_id=spec.session_id, backend_name=self.name, backend_session_id=str(session_dir), - image='host', + # Keep the requested logical image in metadata so runtime session + # reuse sees later specs as compatible. nsjail still executes + # against host-mounted system paths rather than a container image. + image=spec.image, network=spec.network, host_path=spec.host_path, host_path_mode=spec.host_path_mode, @@ -288,20 +292,19 @@ def _build_nsjail_args( # Mode: one-shot execution. args.extend(['--mode', 'o']) - # Namespace isolation. - args.extend([ - '--clone_newuser', - '--clone_newns', - '--clone_newpid', - '--clone_newipc', - '--clone_newuts', - '--clone_newcgroup', - ]) + # nsjail enables the relevant clone namespaces by default. Some + # versions do not expose positive --clone_new* flags, only disable + # flags, so rely on defaults for broad compatibility. + + # Use a per-session chroot root so nsjail can create mount targets + # without needing write access to the host root. + root_dir = session_dir / 'root' + root_dir.mkdir(parents=True, exist_ok=True) + self._ensure_chroot_mount_targets(root_dir, session, spec) + args.extend(['--chroot', str(root_dir)]) # Network namespace. - if spec.network == BoxNetworkMode.OFF: - args.append('--clone_newnet') - else: + if spec.network != BoxNetworkMode.OFF: args.append('--disable_clone_newnet') # Read-only system mounts. @@ -333,7 +336,7 @@ def _build_nsjail_args( # The actual command. quoted_workdir = shlex.quote(spec.workdir) user_cmd = f'mkdir -p {quoted_workdir} && cd {quoted_workdir} && {spec.cmd}' - args.extend(['--', 'sh', '-lc', user_cmd]) + args.extend(['--', '/bin/sh', '-lc', user_cmd]) return args @@ -366,25 +369,57 @@ def _build_writable_mounts( if spec.host_path_mode == BoxHostMountMode.READ_ONLY: args.extend(['--bindmount_ro', f'{spec.host_path}:{spec.mount_path}']) else: - args.extend(['--rw_bind', f'{spec.host_path}:{spec.mount_path}']) + args.extend(['--bindmount', f'{spec.host_path}:{spec.mount_path}']) else: workspace_dir = session_dir / 'workspace' - args.extend(['--rw_bind', f'{workspace_dir}:{spec.mount_path}']) + args.extend(['--bindmount', f'{workspace_dir}:{spec.mount_path}']) for mount in spec.extra_mounts: if mount.mode == BoxHostMountMode.READ_ONLY: args.extend(['--bindmount_ro', f'{mount.host_path}:{mount.mount_path}']) elif mount.mode == BoxHostMountMode.READ_WRITE: - args.extend(['--rw_bind', f'{mount.host_path}:{mount.mount_path}']) + args.extend(['--bindmount', f'{mount.host_path}:{mount.mount_path}']) # /tmp and /home are always per-session writable. tmp_dir = session_dir / 'tmp' home_dir = session_dir / 'home' - args.extend(['--rw_bind', f'{tmp_dir}:/tmp']) - args.extend(['--rw_bind', f'{home_dir}:/home']) + args.extend(['--bindmount', f'{tmp_dir}:/tmp']) + args.extend(['--bindmount', f'{home_dir}:/home']) return args + def _ensure_chroot_mount_targets( + self, + root_dir: pathlib.Path, + session: BoxSessionInfo, + spec: BoxSpec, + ) -> None: + mount_paths = { + '/proc', + '/dev', + '/tmp', + '/home', + spec.mount_path, + session.mount_path, + } + mount_paths.update(_READONLY_SYSTEM_MOUNTS) + mount_paths.update(_READONLY_ETC_ENTRIES) + for mount in spec.extra_mounts: + mount_paths.add(mount.mount_path) + + for mount_path in mount_paths: + if not mount_path: + continue + target = root_dir / mount_path.lstrip('/') + try: + if os.path.isfile(mount_path): + target.parent.mkdir(parents=True, exist_ok=True) + target.touch(exist_ok=True) + else: + target.mkdir(parents=True, exist_ok=True) + except Exception as exc: + self.logger.debug(f'Failed to prepare nsjail mount target {target}: {exc}') + def _build_resource_limits(self, spec: BoxSpec) -> list[str]: args: list[str] = [] @@ -455,7 +490,7 @@ def _detect_cgroup_v2() -> bool: # A rough heuristic: if the user owns a cgroup directory we're probably # running under systemd user delegation. user_slice = cgroup_mount / f'user.slice/user-{os.getuid()}.slice' - if user_slice.exists(): + if user_slice.exists() and os.access(user_slice, os.W_OK): return True # If running as root (uid 0), cgroup v2 is always usable. if os.getuid() == 0: From feed5309c6edd2a6518389a1135ac33c7b127e1b Mon Sep 17 00:00:00 2001 From: huanghuoguoguo <1051233107@qq.com> Date: Mon, 18 May 2026 23:00:42 +0800 Subject: [PATCH 29/34] test(box): cover nsjail CLI compatibility --- tests/box/test_e2b_backend.py | 12 +++-- tests/box/test_nsjail_backend.py | 90 ++++++++++++++++++++++++++++---- 2 files changed, 86 insertions(+), 16 deletions(-) diff --git a/tests/box/test_e2b_backend.py b/tests/box/test_e2b_backend.py index d98969a..0252c35 100644 --- a/tests/box/test_e2b_backend.py +++ b/tests/box/test_e2b_backend.py @@ -160,8 +160,9 @@ async def test_start_session_basic(backend, mock_e2b_module): assert info.backend_name == 'e2b' assert info.session_id == 'sess1' assert info.backend_session_id == 'sandbox-test-123' - # Path should be adapted - assert info.mount_path == '/home/user/workspace' + # Session metadata keeps LangBot's logical mount path so later specs + # with /workspace can reuse the same session. + assert info.mount_path == '/workspace' # Verify AsyncSandbox.create was called with api_key mock_e2b_module.create.assert_called_once() @@ -229,8 +230,9 @@ async def test_start_session_custom_mount_path(backend, mock_e2b_module): info = await backend.start_session(spec) - # Path should be adapted - assert info.mount_path == '/home/user/workspace/myproject' + # Session metadata keeps the logical mount path; command execution adapts + # it to E2B's internal writable path. + assert info.mount_path == '/workspace/myproject' # ── CubeSandbox host-mount metadata ─────────────────────────────────── @@ -477,4 +479,4 @@ def test_check_e2b_available_returns_false_on_import_error(): with mock.patch('builtins.__import__', side_effect=ImportError('No e2b')): result = _check_e2b_available() - assert result is False \ No newline at end of file + assert result is False diff --git a/tests/box/test_nsjail_backend.py b/tests/box/test_nsjail_backend.py index fca4e2b..2a45b19 100644 --- a/tests/box/test_nsjail_backend.py +++ b/tests/box/test_nsjail_backend.py @@ -22,6 +22,7 @@ from langbot_plugin.box.models import ( BoxExecutionStatus, BoxHostMountMode, + BoxMountSpec, BoxNetworkMode, BoxSessionInfo, BoxSpec, @@ -80,6 +81,7 @@ async def test_start_session_creates_directories(backend, tmp_base): session_dir = pathlib.Path(info.backend_session_id) assert session_dir.exists() + assert (session_dir / 'root').is_dir() assert (session_dir / 'workspace').is_dir() assert (session_dir / 'tmp').is_dir() assert (session_dir / 'home').is_dir() @@ -87,7 +89,7 @@ async def test_start_session_creates_directories(backend, tmp_base): assert info.backend_name == 'nsjail' assert info.session_id == 'sess1' - assert info.image == 'host' + assert info.image == spec.image assert info.read_only_rootfs is True @@ -128,31 +130,35 @@ async def test_stop_session_removes_directory(backend, tmp_base): def test_build_nsjail_args_basic(backend, tmp_base): tmp_base.mkdir(parents=True, exist_ok=True) session_dir = tmp_base / 'test_session' - for d in ('workspace', 'tmp', 'home'): + for d in ('root', 'workspace', 'tmp', 'home'): (session_dir / d).mkdir(parents=True) + spec = BoxSpec(session_id='s1', cmd='echo hello', env={'FOO': 'bar'}) session = BoxSessionInfo( session_id='s1', backend_name='nsjail', backend_session_id=str(session_dir), - image='host', + image=spec.image, network=BoxNetworkMode.OFF, created_at='2024-01-01T00:00:00+00:00', last_used_at='2024-01-01T00:00:00+00:00', ) - spec = BoxSpec(session_id='s1', cmd='echo hello', env={'FOO': 'bar'}) args = backend._build_nsjail_args(session, spec, session_dir) assert args[0] == 'nsjail' assert '--mode' in args assert args[args.index('--mode') + 1] == 'o' - assert '--clone_newnet' in args + assert '--chroot' in args + assert args[args.index('--chroot') + 1] == str(session_dir / 'root') + assert '--clone_newnet' not in args + assert '--clone_newuser' not in args + assert '--clone_newns' not in args assert '--disable_clone_newnet' not in args assert '--really_quiet' in args # Writable mounts should reference session directories. - rw_binds = [args[i + 1] for i, a in enumerate(args) if a == '--rw_bind'] + rw_binds = [args[i + 1] for i, a in enumerate(args) if a == '--bindmount'] workspace_mount = f'{session_dir}/workspace:/workspace' assert workspace_mount in rw_binds @@ -162,13 +168,18 @@ def test_build_nsjail_args_basic(backend, tmp_base): # Command is the last part after '--'. separator_idx = args.index('--') - assert args[separator_idx + 1] == 'sh' + assert args[separator_idx + 1] == '/bin/sh' + + # Mount target directories are created under the per-session chroot root. + assert (session_dir / 'root' / 'workspace').is_dir() + assert (session_dir / 'root' / 'tmp').is_dir() + assert (session_dir / 'root' / 'home').is_dir() def test_build_nsjail_args_network_on(backend, tmp_base): tmp_base.mkdir(parents=True, exist_ok=True) session_dir = tmp_base / 'test_session_net' - for d in ('workspace', 'tmp', 'home'): + for d in ('root', 'workspace', 'tmp', 'home'): (session_dir / d).mkdir(parents=True) session = BoxSessionInfo( @@ -191,7 +202,7 @@ def test_build_nsjail_args_network_on(backend, tmp_base): def test_build_nsjail_args_host_path_ro(backend, tmp_base): tmp_base.mkdir(parents=True, exist_ok=True) session_dir = tmp_base / 'test_hp' - for d in ('workspace', 'tmp', 'home'): + for d in ('root', 'workspace', 'tmp', 'home'): (session_dir / d).mkdir(parents=True) session = BoxSessionInfo( @@ -221,7 +232,7 @@ def test_build_nsjail_args_host_path_ro(backend, tmp_base): def test_build_nsjail_args_uses_custom_mount_path(backend, tmp_base): tmp_base.mkdir(parents=True, exist_ok=True) session_dir = tmp_base / 'test_custom_mount' - for d in ('workspace', 'tmp', 'home'): + for d in ('root', 'workspace', 'tmp', 'home'): (session_dir / d).mkdir(parents=True) session = BoxSessionInfo( @@ -247,9 +258,44 @@ def test_build_nsjail_args_uses_custom_mount_path(backend, tmp_base): args = backend._build_nsjail_args(session, spec, session_dir) - rw_binds = [args[i + 1] for i, a in enumerate(args) if a == '--rw_bind'] + rw_binds = [args[i + 1] for i, a in enumerate(args) if a == '--bindmount'] assert '/data/project:/project' in rw_binds assert args[args.index('--cwd') + 1] == '/project/src' + assert (session_dir / 'root' / 'project').is_dir() + + +def test_build_nsjail_args_extra_mounts_prepare_targets(backend, tmp_base): + tmp_base.mkdir(parents=True, exist_ok=True) + session_dir = tmp_base / 'test_extra_mount' + for d in ('root', 'workspace', 'tmp', 'home'): + (session_dir / d).mkdir(parents=True) + + session = BoxSessionInfo( + session_id='s5', + backend_name='nsjail', + backend_session_id=str(session_dir), + image='host', + network=BoxNetworkMode.OFF, + created_at='2024-01-01T00:00:00+00:00', + last_used_at='2024-01-01T00:00:00+00:00', + ) + spec = BoxSpec( + session_id='s5', + cmd='ls /workspace/.skills/demo', + extra_mounts=[ + BoxMountSpec( + host_path='/data/skills/demo', + mount_path='/workspace/.skills/demo', + mode=BoxHostMountMode.READ_WRITE, + ) + ], + ) + + args = backend._build_nsjail_args(session, spec, session_dir) + + rw_binds = [args[i + 1] for i, a in enumerate(args) if a == '--bindmount'] + assert '/data/skills/demo:/workspace/.skills/demo' in rw_binds + assert (session_dir / 'root' / 'workspace' / '.skills' / 'demo').is_dir() def test_build_resource_limits_cgroup(backend): @@ -346,6 +392,28 @@ def always_exists(self): assert NsjailBackend._detect_cgroup_v2() is True +def test_detect_cgroup_v2_user_slice_must_be_writable(): + orig_exists = pathlib.Path.exists + + def fake_exists(self): + path = str(self) + return path == '/sys/fs/cgroup' or path.endswith('cgroup.controllers') or 'user.slice' in path + + with ( + mock.patch('os.getuid', return_value=1000), + mock.patch.object(pathlib.Path, 'exists', fake_exists), + mock.patch('os.access', return_value=False), + ): + assert NsjailBackend._detect_cgroup_v2() is False + + with ( + mock.patch('os.getuid', return_value=1000), + mock.patch.object(pathlib.Path, 'exists', fake_exists), + mock.patch('os.access', return_value=True), + ): + assert NsjailBackend._detect_cgroup_v2() is True + + # ── cleanup_orphaned_containers ─────────────────────────────────────── @pytest.mark.anyio From 40a97abe1ce34a0dd1ad2d4a7e6b5e1ea54fb7f4 Mon Sep 17 00:00:00 2001 From: Junyan Qin Date: Mon, 18 May 2026 23:32:56 +0800 Subject: [PATCH 30/34] fix: make file transfer keys unique --- src/langbot_plugin/runtime/io/handler.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/langbot_plugin/runtime/io/handler.py b/src/langbot_plugin/runtime/io/handler.py index 1436c94..eecc997 100644 --- a/src/langbot_plugin/runtime/io/handler.py +++ b/src/langbot_plugin/runtime/io/handler.py @@ -16,6 +16,7 @@ import os import hashlib import base64 +import uuid import aiofiles import aiofiles.os import logging @@ -70,11 +71,9 @@ def __init__( @self.action(CommonAction.FILE_CHUNK) async def file_chunk(data: dict[str, Any]) -> ActionResponse: file_key = data["file_key"] - file_length = data["file_length"] chunk_base64 = data["chunk_base64"] chunk_index = data["chunk_index"] chunk_amount = data["chunk_amount"] - chunk_size = data["chunk_size"] # append the chunk to the file async with aiofiles.open( os.path.join(FILE_STORAGE_DIR, file_key), "ab" @@ -268,8 +267,10 @@ def decorator( # ====== file transfer ====== async def send_file(self, file_bytes: bytes, file_extension: str) -> str: """Send a file to the peer, chunk by chunk, in base64.""" - hash_value = hashlib.sha256(file_bytes).hexdigest() - file_key = f"{hash_value}.{file_extension}" + hash_value = hashlib.sha256(file_bytes).hexdigest()[:16] + extension = file_extension.strip(".") + suffix = f".{extension}" if extension else "" + file_key = f"{hash_value}-{uuid.uuid4().hex}{suffix}" file_length = len(file_bytes) chunk_amount = max( 1, (file_length + FILE_CHUNK_LENGTH - 1) // FILE_CHUNK_LENGTH @@ -308,4 +309,7 @@ async def read_local_file(self, file_key: str) -> bytes: return await f.read() async def delete_local_file(self, file_key: str) -> None: - await aiofiles.os.remove(os.path.join(FILE_STORAGE_DIR, file_key)) + try: + await aiofiles.os.remove(os.path.join(FILE_STORAGE_DIR, file_key)) + except FileNotFoundError: + return From d3bb239718029361259daea5ccad62bca9271e29 Mon Sep 17 00:00:00 2001 From: Junyan Qin Date: Tue, 19 May 2026 00:45:35 +0800 Subject: [PATCH 31/34] fix(box): stop individual managed processes --- src/langbot_plugin/box/actions.py | 47 ++--- src/langbot_plugin/box/client.py | 208 +++++++++++++--------- src/langbot_plugin/box/runtime.py | 280 +++++++++++++++++++----------- src/langbot_plugin/box/server.py | 232 +++++++++++++++---------- 4 files changed, 472 insertions(+), 295 deletions(-) diff --git a/src/langbot_plugin/box/actions.py b/src/langbot_plugin/box/actions.py index 5bed42e..fea4da6 100644 --- a/src/langbot_plugin/box/actions.py +++ b/src/langbot_plugin/box/actions.py @@ -8,26 +8,27 @@ class LangBotToBoxAction(ActionType): """Actions sent from LangBot to the Box runtime.""" - INIT = 'box_init' # Initialize with full box config (highest priority) - HEALTH = 'box_health' - STATUS = 'box_status' - EXEC = 'box_exec' - CREATE_SESSION = 'box_create_session' - GET_SESSION = 'box_get_session' - GET_SESSIONS = 'box_get_sessions' - DELETE_SESSION = 'box_delete_session' - START_MANAGED_PROCESS = 'box_start_managed_process' - GET_MANAGED_PROCESS = 'box_get_managed_process' - GET_BACKEND_INFO = 'box_get_backend_info' - LIST_SKILLS = 'box_list_skills' - GET_SKILL = 'box_get_skill' - CREATE_SKILL = 'box_create_skill' - UPDATE_SKILL = 'box_update_skill' - DELETE_SKILL = 'box_delete_skill' - SCAN_SKILL_DIRECTORY = 'box_scan_skill_directory' - LIST_SKILL_FILES = 'box_list_skill_files' - READ_SKILL_FILE = 'box_read_skill_file' - WRITE_SKILL_FILE = 'box_write_skill_file' - PREVIEW_SKILL_ZIP = 'box_preview_skill_zip' - INSTALL_SKILL_ZIP = 'box_install_skill_zip' - SHUTDOWN = 'box_shutdown' + INIT = "box_init" # Initialize with full box config (highest priority) + HEALTH = "box_health" + STATUS = "box_status" + EXEC = "box_exec" + CREATE_SESSION = "box_create_session" + GET_SESSION = "box_get_session" + GET_SESSIONS = "box_get_sessions" + DELETE_SESSION = "box_delete_session" + START_MANAGED_PROCESS = "box_start_managed_process" + GET_MANAGED_PROCESS = "box_get_managed_process" + STOP_MANAGED_PROCESS = "box_stop_managed_process" + GET_BACKEND_INFO = "box_get_backend_info" + LIST_SKILLS = "box_list_skills" + GET_SKILL = "box_get_skill" + CREATE_SKILL = "box_create_skill" + UPDATE_SKILL = "box_update_skill" + DELETE_SKILL = "box_delete_skill" + SCAN_SKILL_DIRECTORY = "box_scan_skill_directory" + LIST_SKILL_FILES = "box_list_skill_files" + READ_SKILL_FILE = "box_read_skill_file" + WRITE_SKILL_FILE = "box_write_skill_file" + PREVIEW_SKILL_ZIP = "box_preview_skill_zip" + INSTALL_SKILL_ZIP = "box_install_skill_zip" + SHUTDOWN = "box_shutdown" diff --git a/src/langbot_plugin/box/client.py b/src/langbot_plugin/box/client.py index c489008..dc3f78d 100644 --- a/src/langbot_plugin/box/client.py +++ b/src/langbot_plugin/box/client.py @@ -47,10 +47,19 @@ async def delete_session(self, session_id: str) -> None: ... async def create_session(self, spec: BoxSpec) -> dict: ... @abc.abstractmethod - async def start_managed_process(self, session_id: str, spec: BoxManagedProcessSpec) -> BoxManagedProcessInfo: ... + async def start_managed_process( + self, session_id: str, spec: BoxManagedProcessSpec + ) -> BoxManagedProcessInfo: ... @abc.abstractmethod - async def get_managed_process(self, session_id: str, process_id: str = 'default') -> BoxManagedProcessInfo: ... + async def get_managed_process( + self, session_id: str, process_id: str = "default" + ) -> BoxManagedProcessInfo: ... + + @abc.abstractmethod + async def stop_managed_process( + self, session_id: str, process_id: str = "default" + ) -> None: ... @abc.abstractmethod async def get_session(self, session_id: str) -> dict: ... @@ -79,7 +88,7 @@ async def scan_skill_directory(self, path: str) -> dict: async def list_skill_files( self, name: str, - path: str = '.', + path: str = ".", include_hidden: bool = False, max_entries: int = 200, ) -> dict: @@ -95,8 +104,8 @@ async def preview_skill_zip( self, file_bytes: bytes, filename: str, - source_subdir: str = '', - target_suffix: str = 'upload', + source_subdir: str = "", + target_suffix: str = "upload", ) -> list[dict]: raise NotImplementedError @@ -105,9 +114,9 @@ async def install_skill_zip( file_bytes: bytes, filename: str, source_paths: list[str] | None = None, - source_path: str = '', - source_subdir: str = '', - target_suffix: str = 'upload', + source_path: str = "", + source_subdir: str = "", + target_suffix: str = "upload", ) -> list[dict]: raise NotImplementedError @@ -125,12 +134,12 @@ def _translate_action_error(exc: Exception) -> BoxError: msg = str(exc) _ERROR_PREFIX_MAP: list[tuple[str, type[BoxError]]] = [ - ('BoxValidationError:', BoxValidationError), - ('BoxSessionNotFoundError:', BoxSessionNotFoundError), - ('BoxSessionConflictError:', BoxSessionConflictError), - ('BoxManagedProcessNotFoundError:', BoxManagedProcessNotFoundError), - ('BoxManagedProcessConflictError:', BoxManagedProcessConflictError), - ('BoxBackendUnavailableError:', BoxBackendUnavailableError), + ("BoxValidationError:", BoxValidationError), + ("BoxSessionNotFoundError:", BoxSessionNotFoundError), + ("BoxSessionConflictError:", BoxSessionConflictError), + ("BoxManagedProcessNotFoundError:", BoxManagedProcessNotFoundError), + ("BoxManagedProcessConflictError:", BoxManagedProcessConflictError), + ("BoxBackendUnavailableError:", BoxBackendUnavailableError), ] for prefix, cls in _ERROR_PREFIX_MAP: if prefix in msg: @@ -148,13 +157,15 @@ def __init__(self, logger: logging.Logger): @property def handler(self) -> Handler: if self._handler is None: - raise BoxRuntimeUnavailableError('box runtime not connected') + raise BoxRuntimeUnavailableError("box runtime not connected") return self._handler def set_handler(self, handler: Handler) -> None: self._handler = handler - async def _call(self, action: LangBotToBoxAction, data: dict[str, Any], timeout: float = 15.0) -> dict[str, Any]: + async def _call( + self, action: LangBotToBoxAction, data: dict[str, Any], timeout: float = 15.0 + ) -> dict[str, Any]: try: return await self.handler.call_action(action, data, timeout=timeout) except BoxRuntimeUnavailableError: @@ -165,20 +176,22 @@ async def _call(self, action: LangBotToBoxAction, data: dict[str, Any], timeout: async def initialize(self) -> None: try: await self._call(LangBotToBoxAction.HEALTH, {}) - self._logger.info('LangBot Box runtime connected via action RPC.') + self._logger.info("LangBot Box runtime connected via action RPC.") except Exception as exc: - raise BoxRuntimeUnavailableError(f'box runtime unavailable: {exc}') from exc + raise BoxRuntimeUnavailableError(f"box runtime unavailable: {exc}") from exc async def execute(self, spec: BoxSpec) -> BoxExecutionResult: - data = await self._call(LangBotToBoxAction.EXEC, spec.model_dump(mode='json'), timeout=300.0) + data = await self._call( + LangBotToBoxAction.EXEC, spec.model_dump(mode="json"), timeout=300.0 + ) return BoxExecutionResult( - session_id=data['session_id'], - backend_name=data['backend_name'], - status=BoxExecutionStatus(data['status']), - exit_code=data.get('exit_code'), - stdout=data.get('stdout', ''), - stderr=data.get('stderr', ''), - duration_ms=data['duration_ms'], + session_id=data["session_id"], + backend_name=data["backend_name"], + status=BoxExecutionStatus(data["status"]), + exit_code=data.get("exit_code"), + stdout=data.get("stdout", ""), + stderr=data.get("stderr", ""), + duration_ms=data["duration_ms"], ) async def shutdown(self) -> None: @@ -194,138 +207,171 @@ async def get_status(self) -> dict: async def get_sessions(self) -> list[dict]: data = await self._call(LangBotToBoxAction.GET_SESSIONS, {}) - return data['sessions'] + return data["sessions"] async def get_session(self, session_id: str) -> dict: - return await self._call(LangBotToBoxAction.GET_SESSION, {'session_id': session_id}) + return await self._call( + LangBotToBoxAction.GET_SESSION, {"session_id": session_id} + ) async def get_backend_info(self) -> dict: return await self._call(LangBotToBoxAction.GET_BACKEND_INFO, {}) async def delete_session(self, session_id: str) -> None: - await self._call(LangBotToBoxAction.DELETE_SESSION, {'session_id': session_id}, timeout=30.0) + await self._call( + LangBotToBoxAction.DELETE_SESSION, {"session_id": session_id}, timeout=30.0 + ) async def create_session(self, spec: BoxSpec) -> dict: - return await self._call(LangBotToBoxAction.CREATE_SESSION, spec.model_dump(mode='json')) + return await self._call( + LangBotToBoxAction.CREATE_SESSION, spec.model_dump(mode="json") + ) - async def start_managed_process(self, session_id: str, spec: BoxManagedProcessSpec) -> BoxManagedProcessInfo: + async def start_managed_process( + self, session_id: str, spec: BoxManagedProcessSpec + ) -> BoxManagedProcessInfo: data = await self._call( LangBotToBoxAction.START_MANAGED_PROCESS, - {'session_id': session_id, 'spec': spec.model_dump(mode='json')}, + {"session_id": session_id, "spec": spec.model_dump(mode="json")}, ) return BoxManagedProcessInfo.model_validate(data) - async def get_managed_process(self, session_id: str, process_id: str = 'default') -> BoxManagedProcessInfo: - data = await self._call(LangBotToBoxAction.GET_MANAGED_PROCESS, { - 'session_id': session_id, - 'process_id': process_id, - }) + async def get_managed_process( + self, session_id: str, process_id: str = "default" + ) -> BoxManagedProcessInfo: + data = await self._call( + LangBotToBoxAction.GET_MANAGED_PROCESS, + { + "session_id": session_id, + "process_id": process_id, + }, + ) return BoxManagedProcessInfo.model_validate(data) - def get_managed_process_websocket_url(self, session_id: str, ws_relay_base_url: str, process_id: str = 'default') -> str: + async def stop_managed_process( + self, session_id: str, process_id: str = "default" + ) -> None: + await self._call( + LangBotToBoxAction.STOP_MANAGED_PROCESS, + { + "session_id": session_id, + "process_id": process_id, + }, + timeout=30.0, + ) + + def get_managed_process_websocket_url( + self, session_id: str, ws_relay_base_url: str, process_id: str = "default" + ) -> str: base = ws_relay_base_url - if base.startswith('https://'): - scheme = 'wss://' - suffix = base[len('https://') :] - elif base.startswith('http://'): - scheme = 'ws://' - suffix = base[len('http://') :] + if base.startswith("https://"): + scheme = "wss://" + suffix = base[len("https://") :] + elif base.startswith("http://"): + scheme = "ws://" + suffix = base[len("http://") :] else: - scheme = 'ws://' + scheme = "ws://" suffix = base - return f'{scheme}{suffix}/v1/sessions/{session_id}/managed-process/{process_id}/ws' + return ( + f"{scheme}{suffix}/v1/sessions/{session_id}/managed-process/{process_id}/ws" + ) async def init(self, config: dict) -> None: await self._call(LangBotToBoxAction.INIT, config) async def list_skills(self) -> list[dict]: data = await self._call(LangBotToBoxAction.LIST_SKILLS, {}) - return data['skills'] + return data["skills"] async def get_skill(self, name: str) -> dict | None: - data = await self._call(LangBotToBoxAction.GET_SKILL, {'name': name}) - return data.get('skill') + data = await self._call(LangBotToBoxAction.GET_SKILL, {"name": name}) + return data.get("skill") async def create_skill(self, skill: dict) -> dict: - data = await self._call(LangBotToBoxAction.CREATE_SKILL, {'skill': skill}) - return data['skill'] + data = await self._call(LangBotToBoxAction.CREATE_SKILL, {"skill": skill}) + return data["skill"] async def update_skill(self, name: str, skill: dict) -> dict: - data = await self._call(LangBotToBoxAction.UPDATE_SKILL, {'name': name, 'skill': skill}) - return data['skill'] + data = await self._call( + LangBotToBoxAction.UPDATE_SKILL, {"name": name, "skill": skill} + ) + return data["skill"] async def delete_skill(self, name: str) -> None: - await self._call(LangBotToBoxAction.DELETE_SKILL, {'name': name}) + await self._call(LangBotToBoxAction.DELETE_SKILL, {"name": name}) async def scan_skill_directory(self, path: str) -> dict: - return await self._call(LangBotToBoxAction.SCAN_SKILL_DIRECTORY, {'path': path}) + return await self._call(LangBotToBoxAction.SCAN_SKILL_DIRECTORY, {"path": path}) async def list_skill_files( self, name: str, - path: str = '.', + path: str = ".", include_hidden: bool = False, max_entries: int = 200, ) -> dict: return await self._call( LangBotToBoxAction.LIST_SKILL_FILES, { - 'name': name, - 'path': path, - 'include_hidden': include_hidden, - 'max_entries': max_entries, + "name": name, + "path": path, + "include_hidden": include_hidden, + "max_entries": max_entries, }, ) async def read_skill_file(self, name: str, path: str) -> dict: - return await self._call(LangBotToBoxAction.READ_SKILL_FILE, {'name': name, 'path': path}) + return await self._call( + LangBotToBoxAction.READ_SKILL_FILE, {"name": name, "path": path} + ) async def write_skill_file(self, name: str, path: str, content: str) -> dict: return await self._call( LangBotToBoxAction.WRITE_SKILL_FILE, - {'name': name, 'path': path, 'content': content}, + {"name": name, "path": path, "content": content}, ) async def preview_skill_zip( self, file_bytes: bytes, filename: str, - source_subdir: str = '', - target_suffix: str = 'upload', + source_subdir: str = "", + target_suffix: str = "upload", ) -> list[dict]: - file_key = await self.handler.send_file(file_bytes, 'zip') + file_key = await self.handler.send_file(file_bytes, "zip") data = await self._call( LangBotToBoxAction.PREVIEW_SKILL_ZIP, { - 'file_key': file_key, - 'filename': filename, - 'source_subdir': source_subdir, - 'target_suffix': target_suffix, + "file_key": file_key, + "filename": filename, + "source_subdir": source_subdir, + "target_suffix": target_suffix, }, timeout=60.0, ) - return data['skills'] + return data["skills"] async def install_skill_zip( self, file_bytes: bytes, filename: str, source_paths: list[str] | None = None, - source_path: str = '', - source_subdir: str = '', - target_suffix: str = 'upload', + source_path: str = "", + source_subdir: str = "", + target_suffix: str = "upload", ) -> list[dict]: - file_key = await self.handler.send_file(file_bytes, 'zip') + file_key = await self.handler.send_file(file_bytes, "zip") data = await self._call( LangBotToBoxAction.INSTALL_SKILL_ZIP, { - 'file_key': file_key, - 'filename': filename, - 'source_paths': source_paths or [], - 'source_path': source_path, - 'source_subdir': source_subdir, - 'target_suffix': target_suffix, + "file_key": file_key, + "filename": filename, + "source_paths": source_paths or [], + "source_path": source_path, + "source_subdir": source_subdir, + "target_suffix": target_suffix, }, timeout=120.0, ) - return data['skills'] + return data["skills"] diff --git a/src/langbot_plugin/box/runtime.py b/src/langbot_plugin/box/runtime.py index 91e6869..9f3fdd0 100644 --- a/src/langbot_plugin/box/runtime.py +++ b/src/langbot_plugin/box/runtime.py @@ -8,6 +8,7 @@ import logging import os import uuid +from typing import TYPE_CHECKING from .backend import BaseSandboxBackend, DockerBackend from .nsjail_backend import NsjailBackend @@ -29,6 +30,9 @@ ) from .skill_store import BoxSkillStore +if TYPE_CHECKING: + from .e2b_backend import E2BSandboxBackend + _UTC = dt.timezone.utc _MANAGED_PROCESS_STDERR_PREVIEW_LIMIT = 4000 @@ -53,7 +57,9 @@ def is_running(self) -> bool: class _RuntimeSession: info: BoxSessionInfo lock: asyncio.Lock - managed_processes: dict[str, _ManagedProcess] = dataclasses.field(default_factory=dict) + managed_processes: dict[str, _ManagedProcess] = dataclasses.field( + default_factory=dict + ) class BoxRuntime: @@ -67,12 +73,14 @@ def __init__( # Load configuration from environment variable (passed by LangBot) self._box_config: dict = {} - config_json = os.getenv('LANGBOT_BOX_CONFIG', '') + config_json = os.getenv("LANGBOT_BOX_CONFIG", "") if config_json: try: self._box_config = json.loads(config_json) except json.JSONDecodeError: - logger.warning(f'Failed to parse LANGBOT_BOX_CONFIG: {config_json[:100]}') + logger.warning( + f"Failed to parse LANGBOT_BOX_CONFIG: {config_json[:100]}" + ) # Build backend list if backends is None: @@ -90,13 +98,14 @@ def __init__( self.instance_id = uuid.uuid4().hex[:12] self.skill_store = BoxSkillStore(self._box_config) - def _create_e2b_backend(self, logger: logging.Logger) -> 'E2BSandboxBackend | None': + def _create_e2b_backend(self, logger: logging.Logger) -> "E2BSandboxBackend | None": """Create E2B backend if package is installed.""" try: from .e2b_backend import E2BSandboxBackend + return E2BSandboxBackend(logger) except ImportError: - logger.debug('e2b package not installed, E2B backend unavailable') + logger.debug("e2b package not installed, E2B backend unavailable") return None async def initialize(self): @@ -110,7 +119,9 @@ async def initialize(self): try: await self._backend.cleanup_orphaned_containers(self.instance_id) except Exception as exc: - self.logger.warning(f'LangBot Box orphan container cleanup failed: {exc}') + self.logger.warning( + f"LangBot Box orphan container cleanup failed: {exc}" + ) def init(self, config: dict) -> None: """Initialize with full box configuration from LangBot. @@ -129,22 +140,22 @@ def _apply_config_to_backends(self, config: dict) -> None: if backend is None: continue backend_config = config.get(backend.name, {}) - if backend_config and hasattr(backend, 'configure'): + if backend_config and hasattr(backend, "configure"): backend.configure(backend_config) async def execute(self, spec: BoxSpec) -> BoxExecutionResult: if not spec.cmd: - raise BoxValidationError('cmd must not be empty') + raise BoxValidationError("cmd must not be empty") session = await self._get_or_create_session(spec) async with session.lock: self.logger.info( - 'LangBot Box execute: ' - f'session_id={spec.session_id} ' - f'backend_session_id={session.info.backend_session_id} ' - f'backend={session.info.backend_name} ' - f'workdir={spec.workdir} ' - f'timeout_sec={spec.timeout_sec}' + "LangBot Box execute: " + f"session_id={spec.session_id} " + f"backend_session_id={session.info.backend_session_id} " + f"backend={session.info.backend_name} " + f"workdir={spec.workdir} " + f"timeout_sec={spec.timeout_sec}" ) result = await (await self._get_backend()).exec(session.info, spec) @@ -174,14 +185,16 @@ async def create_session(self, spec: BoxSpec) -> dict: async def delete_session(self, session_id: str) -> None: async with self._lock: if session_id not in self._sessions: - raise BoxSessionNotFoundError(f'session {session_id} not found') + raise BoxSessionNotFoundError(f"session {session_id} not found") await self._drop_session_locked(session_id) - async def start_managed_process(self, session_id: str, spec: BoxManagedProcessSpec) -> dict: + async def start_managed_process( + self, session_id: str, spec: BoxManagedProcessSpec + ) -> dict: async with self._lock: runtime_session = self._sessions.get(session_id) if runtime_session is None: - raise BoxSessionNotFoundError(f'session {session_id} not found') + raise BoxSessionNotFoundError(f"session {session_id} not found") async with runtime_session.lock: process_id = spec.process_id @@ -191,8 +204,8 @@ async def start_managed_process(self, session_id: str, spec: BoxManagedProcessSp # This happens when LangBot restarts while the Box runtime # keeps the persistent session alive. self.logger.info( - f'LangBot Box terminating stale managed process before restart: ' - f'session_id={session_id} process_id={process_id}' + f"LangBot Box terminating stale managed process before restart: " + f"session_id={session_id} process_id={process_id}" ) await self._terminate_managed_process(existing) del runtime_session.managed_processes[process_id] @@ -208,19 +221,50 @@ async def start_managed_process(self, session_id: str, spec: BoxManagedProcessSp ) runtime_session.managed_processes[process_id] = managed_process runtime_session.info.last_used_at = dt.datetime.now(_UTC) - asyncio.create_task(self._drain_managed_process_stderr(runtime_session.info.session_id, process_id, managed_process)) - asyncio.create_task(self._watch_managed_process(runtime_session.info.session_id, process_id, managed_process)) - return self._managed_process_to_dict(runtime_session.info.session_id, process_id, managed_process) + asyncio.create_task( + self._drain_managed_process_stderr( + runtime_session.info.session_id, process_id, managed_process + ) + ) + asyncio.create_task( + self._watch_managed_process( + runtime_session.info.session_id, process_id, managed_process + ) + ) + return self._managed_process_to_dict( + runtime_session.info.session_id, process_id, managed_process + ) - def get_managed_process(self, session_id: str, process_id: str = 'default') -> dict: + def get_managed_process(self, session_id: str, process_id: str = "default") -> dict: runtime_session = self._sessions.get(session_id) if runtime_session is None: - raise BoxSessionNotFoundError(f'session {session_id} not found') + raise BoxSessionNotFoundError(f"session {session_id} not found") managed_process = runtime_session.managed_processes.get(process_id) if managed_process is None: - raise BoxManagedProcessNotFoundError(f'session {session_id} has no managed process with process_id={process_id}') + raise BoxManagedProcessNotFoundError( + f"session {session_id} has no managed process with process_id={process_id}" + ) return self._managed_process_to_dict(session_id, process_id, managed_process) + async def stop_managed_process( + self, session_id: str, process_id: str = "default" + ) -> None: + runtime_session = self._sessions.get(session_id) + if runtime_session is None: + raise BoxSessionNotFoundError(f"session {session_id} not found") + + async with runtime_session.lock: + managed_process = runtime_session.managed_processes.pop(process_id, None) + if managed_process is None: + raise BoxManagedProcessNotFoundError( + f"session {session_id} has no managed process with process_id={process_id}" + ) + await self._terminate_managed_process(managed_process) + runtime_session.info.last_used_at = dt.datetime.now(_UTC) + self.logger.info( + f"LangBot Box managed process stopped: session_id={session_id} process_id={process_id}" + ) + # ── Observability ───────────────────────────────────────────────── async def get_backend_info(self) -> dict: @@ -228,12 +272,12 @@ async def get_backend_info(self) -> dict: self._backend = await self._select_backend() backend = self._backend if backend is None: - return {'name': None, 'available': False} + return {"name": None, "available": False} try: available = await backend.is_available() except Exception: available = False - return {'name': backend.name, 'available': available} + return {"name": backend.name, "available": available} def get_sessions(self) -> list[dict]: return [self._session_to_dict(s.info) for s in self._sessions.values()] @@ -241,27 +285,30 @@ def get_sessions(self) -> list[dict]: def get_session(self, session_id: str) -> dict: runtime_session = self._sessions.get(session_id) if runtime_session is None: - raise BoxSessionNotFoundError(f'session {session_id} not found') + raise BoxSessionNotFoundError(f"session {session_id} not found") result = self._session_to_dict(runtime_session.info) if runtime_session.managed_processes: - result['managed_processes'] = { + managed_processes = { pid: self._managed_process_to_dict(session_id, pid, mp) for pid, mp in runtime_session.managed_processes.items() } + result["managed_processes"] = managed_processes + if "default" in managed_processes: + result["managed_process"] = managed_processes["default"] return result async def get_status(self) -> dict: backend_info = await self.get_backend_info() return { - 'backend': backend_info, - 'active_sessions': len(self._sessions), - 'managed_processes': sum( + "backend": backend_info, + "active_sessions": len(self._sessions), + "managed_processes": sum( 1 for runtime_session in self._sessions.values() for mp in runtime_session.managed_processes.values() if mp.is_running ), - 'session_ttl_sec': self.session_ttl_sec, + "session_ttl_sec": self.session_ttl_sec, } async def _get_or_create_session(self, spec: BoxSpec) -> _RuntimeSession: @@ -274,10 +321,10 @@ async def _get_or_create_session(self, spec: BoxSpec) -> _RuntimeSession: backend = await self._get_backend() if not await backend.is_session_alive(existing.info): self.logger.warning( - 'LangBot Box session backend disappeared, recreating: ' - f'session_id={spec.session_id} ' - f'backend_session_id={existing.info.backend_session_id} ' - f'backend={existing.info.backend_name}' + "LangBot Box session backend disappeared, recreating: " + f"session_id={spec.session_id} " + f"backend_session_id={existing.info.backend_session_id} " + f"backend={existing.info.backend_name}" ) await self._drop_session_locked(spec.session_id) existing = None @@ -285,10 +332,10 @@ async def _get_or_create_session(self, spec: BoxSpec) -> _RuntimeSession: if existing is not None: existing.info.last_used_at = dt.datetime.now(_UTC) self.logger.info( - 'LangBot Box session reused: ' - f'session_id={spec.session_id} ' - f'backend_session_id={existing.info.backend_session_id} ' - f'backend={existing.info.backend_name}' + "LangBot Box session reused: " + f"session_id={spec.session_id} " + f"backend_session_id={existing.info.backend_session_id} " + f"backend={existing.info.backend_name}" ) return existing @@ -297,16 +344,16 @@ async def _get_or_create_session(self, spec: BoxSpec) -> _RuntimeSession: runtime_session = _RuntimeSession(info=info, lock=asyncio.Lock()) self._sessions[spec.session_id] = runtime_session self.logger.info( - 'LangBot Box session created: ' - f'session_id={spec.session_id} ' - f'backend_session_id={info.backend_session_id} ' - f'backend={info.backend_name} ' - f'image={info.image} ' - f'network={info.network.value} ' - f'host_path={info.host_path} ' - f'host_path_mode={info.host_path_mode.value} ' - f'mount_path={info.mount_path} ' - f'workspace_quota_mb={info.workspace_quota_mb}' + "LangBot Box session created: " + f"session_id={spec.session_id} " + f"backend_session_id={info.backend_session_id} " + f"backend={info.backend_name} " + f"image={info.image} " + f"network={info.network.value} " + f"host_path={info.host_path} " + f"host_path_mode={info.host_path_mode.value} " + f"mount_path={info.mount_path} " + f"workspace_quota_mb={info.workspace_quota_mb}" ) return runtime_session @@ -315,40 +362,44 @@ async def _get_backend(self) -> BaseSandboxBackend: self._backend = await self._select_backend() if self._backend is None: raise BoxBackendUnavailableError( - 'LangBot Box backend unavailable. Install and start Docker or nsjail before using exec.' + "LangBot Box backend unavailable. Install and start Docker or nsjail before using exec." ) return self._backend # Backends grouped under each top-level box.backend choice. # 'local' picks the first available local container backend (docker → nsjail). - _LOCAL_BACKEND_NAMES = ('docker', 'nsjail') + _LOCAL_BACKEND_NAMES = ("docker", "nsjail") async def _select_backend(self) -> BaseSandboxBackend | None: # Backend override priority: BOX_BACKEND env var > box.backend config. # Accepted values: 'local', 'docker', 'nsjail', 'e2b'. 'local' fans out # to a list; everything else must match a single backend name exactly. - configured = (self._box_config.get('backend') or '').strip() - forced = (os.getenv('BOX_BACKEND') or configured or '').strip() - source_label = 'BOX_BACKEND' if os.getenv('BOX_BACKEND') else 'box.backend' + configured = (self._box_config.get("backend") or "").strip() + forced = (os.getenv("BOX_BACKEND") or configured or "").strip() + source_label = "BOX_BACKEND" if os.getenv("BOX_BACKEND") else "box.backend" candidates: list[BaseSandboxBackend] - if forced == 'local': + if forced == "local": candidates = [ - b for b in self.backends if b is not None and b.name in self._LOCAL_BACKEND_NAMES + b + for b in self.backends + if b is not None and b.name in self._LOCAL_BACKEND_NAMES ] if not candidates: self.logger.error( - f'LangBot Box: no local backend registered ' - f'({source_label}={forced})' + f"LangBot Box: no local backend registered " + f"({source_label}={forced})" ) return None elif forced: - candidates = [b for b in self.backends if b is not None and b.name == forced] + candidates = [ + b for b in self.backends if b is not None and b.name == forced + ] if not candidates: available_names = [b.name for b in self.backends if b is not None] self.logger.error( f'LangBot Box backend "{forced}" not found ' - f'({source_label}={forced}, available: {available_names})' + f"({source_label}={forced}, available: {available_names})" ) return None else: @@ -358,19 +409,27 @@ async def _select_backend(self) -> BaseSandboxBackend | None: try: await backend.initialize() if await backend.is_available(): - label = f'{backend.name} (forced via {source_label}={forced})' if forced else backend.name - self.logger.info(f'LangBot Box using backend: {label}') + label = ( + f"{backend.name} (forced via {source_label}={forced})" + if forced + else backend.name + ) + self.logger.info(f"LangBot Box using backend: {label}") return backend except Exception as exc: - self.logger.warning(f'LangBot Box backend {backend.name} probe failed: {exc}') + self.logger.warning( + f"LangBot Box backend {backend.name} probe failed: {exc}" + ) if forced: self.logger.error( f'LangBot Box backend "{forced}" probed but not available ' - f'({source_label}={forced})' + f"({source_label}={forced})" ) - self.logger.warning('LangBot Box backend unavailable: no supported backend (Docker, nsjail, E2B) is ready') + self.logger.warning( + "LangBot Box backend unavailable: no supported backend (Docker, nsjail, E2B) is ready" + ) return None async def _reap_expired_sessions_locked(self): @@ -399,39 +458,43 @@ async def _drop_session_locked(self, session_id: str): try: self.logger.info( - 'LangBot Box session cleanup: ' - f'session_id={session_id} ' - f'backend_session_id={runtime_session.info.backend_session_id} ' - f'backend={runtime_session.info.backend_name}' + "LangBot Box session cleanup: " + f"session_id={session_id} " + f"backend_session_id={runtime_session.info.backend_session_id} " + f"backend={runtime_session.info.backend_name}" ) await self._backend.stop_session(runtime_session.info) except Exception as exc: - self.logger.warning(f'Failed to clean up box session {session_id}: {exc}') + self.logger.warning(f"Failed to clean up box session {session_id}: {exc}") def _assert_session_compatible(self, session: BoxSessionInfo, spec: BoxSpec): _COMPAT_FIELDS = ( - 'network', - 'image', - 'host_path', - 'host_path_mode', - 'mount_path', - 'persistent', - 'cpus', - 'memory_mb', - 'pids_limit', - 'read_only_rootfs', - 'workspace_quota_mb', + "network", + "image", + "host_path", + "host_path_mode", + "mount_path", + "persistent", + "cpus", + "memory_mb", + "pids_limit", + "read_only_rootfs", + "workspace_quota_mb", ) for field in _COMPAT_FIELDS: session_val = getattr(session, field) spec_val = getattr(spec, field) if session_val != spec_val: - display = session_val.value if hasattr(session_val, 'value') else session_val + display = ( + session_val.value if hasattr(session_val, "value") else session_val + ) raise BoxSessionConflictError( - f'Box session {spec.session_id} already exists with {field}={display}' + f"Box session {spec.session_id} already exists with {field}={display}" ) - async def _drain_managed_process_stderr(self, session_id: str, process_id: str, managed_process: _ManagedProcess) -> None: + async def _drain_managed_process_stderr( + self, session_id: str, process_id: str, managed_process: _ManagedProcess + ) -> None: stream = managed_process.process.stderr if stream is None: return @@ -441,31 +504,44 @@ async def _drain_managed_process_stderr(self, session_id: str, process_id: str, chunk = await stream.readline() if not chunk: break - text = chunk.decode('utf-8', errors='replace').rstrip() + text = chunk.decode("utf-8", errors="replace").rstrip() if not text: continue managed_process.stderr_chunks.append(text) - managed_process.stderr_total_len += len(text) + 1 # +1 for '\n' separator + managed_process.stderr_total_len += ( + len(text) + 1 + ) # +1 for '\n' separator while ( - managed_process.stderr_total_len > _MANAGED_PROCESS_STDERR_PREVIEW_LIMIT + managed_process.stderr_total_len + > _MANAGED_PROCESS_STDERR_PREVIEW_LIMIT and managed_process.stderr_chunks ): removed = managed_process.stderr_chunks.popleft() managed_process.stderr_total_len -= len(removed) + 1 - self.logger.info(f'LangBot Box managed process stderr: session_id={session_id} process_id={process_id} {text}') + self.logger.info( + f"LangBot Box managed process stderr: session_id={session_id} process_id={process_id} {text}" + ) except Exception as exc: - self.logger.warning(f'Failed to drain managed process stderr for {session_id}/{process_id}: {exc}') + self.logger.warning( + f"Failed to drain managed process stderr for {session_id}/{process_id}: {exc}" + ) - async def _watch_managed_process(self, session_id: str, process_id: str, managed_process: _ManagedProcess) -> None: + async def _watch_managed_process( + self, session_id: str, process_id: str, managed_process: _ManagedProcess + ) -> None: return_code = await managed_process.process.wait() managed_process.exit_code = return_code managed_process.exited_at = dt.datetime.now(_UTC) runtime_session = self._sessions.get(session_id) if runtime_session is not None: runtime_session.info.last_used_at = managed_process.exited_at - self.logger.info(f'LangBot Box managed process exited: session_id={session_id} process_id={process_id} return_code={return_code}') + self.logger.info( + f"LangBot Box managed process exited: session_id={session_id} process_id={process_id} return_code={return_code}" + ) - async def _terminate_managed_process(self, managed_process: _ManagedProcess) -> None: + async def _terminate_managed_process( + self, managed_process: _ManagedProcess + ) -> None: if not managed_process.is_running: return @@ -494,9 +570,15 @@ async def _terminate_managed_process(self, managed_process: _ManagedProcess) -> managed_process.exit_code = process.returncode managed_process.exited_at = dt.datetime.now(_UTC) - def _managed_process_to_dict(self, session_id: str, process_id: str, managed_process: _ManagedProcess) -> dict: - stderr_preview = '\n'.join(managed_process.stderr_chunks) - status = BoxManagedProcessStatus.RUNNING if managed_process.is_running else BoxManagedProcessStatus.EXITED + def _managed_process_to_dict( + self, session_id: str, process_id: str, managed_process: _ManagedProcess + ) -> dict: + stderr_preview = "\n".join(managed_process.stderr_chunks) + status = ( + BoxManagedProcessStatus.RUNNING + if managed_process.is_running + else BoxManagedProcessStatus.EXITED + ) return BoxManagedProcessInfo( session_id=session_id, process_id=process_id, @@ -510,8 +592,8 @@ def _managed_process_to_dict(self, session_id: str, process_id: str, managed_pro exited_at=managed_process.exited_at, exit_code=managed_process.exit_code, stderr_preview=stderr_preview, - ).model_dump(mode='json') + ).model_dump(mode="json") @staticmethod def _session_to_dict(info: BoxSessionInfo) -> dict: - return info.model_dump(mode='json') + return info.model_dump(mode="json") diff --git a/src/langbot_plugin/box/server.py b/src/langbot_plugin/box/server.py index f66a858..d648b8d 100644 --- a/src/langbot_plugin/box/server.py +++ b/src/langbot_plugin/box/server.py @@ -17,7 +17,6 @@ import argparse import asyncio import datetime as dt -import json import logging import sys from typing import Any @@ -41,11 +40,11 @@ from .models import BoxExecutionResult, BoxManagedProcessSpec, BoxSpec from .runtime import BoxRuntime -logger = logging.getLogger('langbot.box.server') +logger = logging.getLogger("langbot.box.server") def _result_to_dict(result: BoxExecutionResult) -> dict: - return result.model_dump(mode='json') + return result.model_dump(mode="json") # ── aiohttp WebSocket → Connection adapter ─────────────────────────── @@ -67,7 +66,7 @@ async def send(self, message: str) -> None: try: await self._ws.send_str(message) except ConnectionResetError: - raise ConnectionClosedError('Connection closed during send') + raise ConnectionClosedError("Connection closed during send") async def receive(self) -> str: msg = await self._ws.receive() @@ -79,8 +78,8 @@ async def receive(self) -> str: web.WSMsgType.CLOSED, web.WSMsgType.ERROR, ): - raise ConnectionClosedError('Connection closed') - raise ConnectionClosedError(f'Unexpected message type: {msg.type}') + raise ConnectionClosedError("Connection closed") + raise ConnectionClosedError(f"Unexpected message type: {msg.type}") async def close(self) -> None: await self._ws.close() @@ -92,7 +91,7 @@ async def close(self) -> None: class BoxServerHandler(Handler): """Server-side handler that registers box actions backed by BoxRuntime.""" - name = 'BoxServerHandler' + name = "BoxServerHandler" def __init__(self, connection: Connection, runtime: BoxRuntime): super().__init__(connection) @@ -119,7 +118,7 @@ async def exec_cmd(data: dict[str, Any]) -> ActionResponse: try: spec = BoxSpec.model_validate(data) except pydantic.ValidationError as exc: - return ActionResponse.error(f'BoxValidationError: {exc}') + return ActionResponse.error(f"BoxValidationError: {exc}") result = await self._runtime.execute(spec) return ActionResponse.success(_result_to_dict(result)) @@ -128,39 +127,50 @@ async def create_session(data: dict[str, Any]) -> ActionResponse: try: spec = BoxSpec.model_validate(data) except pydantic.ValidationError as exc: - return ActionResponse.error(f'BoxValidationError: {exc}') + return ActionResponse.error(f"BoxValidationError: {exc}") info = await self._runtime.create_session(spec) return ActionResponse.success(info) @self.action(LangBotToBoxAction.GET_SESSION) async def get_session(data: dict[str, Any]) -> ActionResponse: - return ActionResponse.success(self._runtime.get_session(data['session_id'])) + return ActionResponse.success(self._runtime.get_session(data["session_id"])) @self.action(LangBotToBoxAction.GET_SESSIONS) async def get_sessions(data: dict[str, Any]) -> ActionResponse: - return ActionResponse.success({'sessions': self._runtime.get_sessions()}) + return ActionResponse.success({"sessions": self._runtime.get_sessions()}) @self.action(LangBotToBoxAction.DELETE_SESSION) async def delete_session(data: dict[str, Any]) -> ActionResponse: - await self._runtime.delete_session(data['session_id']) - return ActionResponse.success({'deleted': data['session_id']}) + await self._runtime.delete_session(data["session_id"]) + return ActionResponse.success({"deleted": data["session_id"]}) @self.action(LangBotToBoxAction.START_MANAGED_PROCESS) async def start_managed_process(data: dict[str, Any]) -> ActionResponse: - session_id = data['session_id'] + session_id = data["session_id"] try: - spec = BoxManagedProcessSpec.model_validate(data['spec']) + spec = BoxManagedProcessSpec.model_validate(data["spec"]) except pydantic.ValidationError as exc: - return ActionResponse.error(f'BoxValidationError: {exc}') + return ActionResponse.error(f"BoxValidationError: {exc}") info = await self._runtime.start_managed_process(session_id, spec) return ActionResponse.success(info) @self.action(LangBotToBoxAction.GET_MANAGED_PROCESS) async def get_managed_process(data: dict[str, Any]) -> ActionResponse: - return ActionResponse.success(self._runtime.get_managed_process( - data['session_id'], - data.get('process_id', 'default'), - )) + return ActionResponse.success( + self._runtime.get_managed_process( + data["session_id"], + data.get("process_id", "default"), + ) + ) + + @self.action(LangBotToBoxAction.STOP_MANAGED_PROCESS) + async def stop_managed_process(data: dict[str, Any]) -> ActionResponse: + await self._runtime.stop_managed_process( + data["session_id"], data.get("process_id", "default") + ) + return ActionResponse.success( + {"stopped": data.get("process_id", "default")} + ) @self.action(LangBotToBoxAction.GET_BACKEND_INFO) async def get_backend_info(data: dict[str, Any]) -> ActionResponse: @@ -169,110 +179,118 @@ async def get_backend_info(data: dict[str, Any]) -> ActionResponse: @self.action(LangBotToBoxAction.LIST_SKILLS) async def list_skills(data: dict[str, Any]) -> ActionResponse: - return ActionResponse.success({'skills': self._runtime.skill_store.list_skills()}) + return ActionResponse.success( + {"skills": self._runtime.skill_store.list_skills()} + ) @self.action(LangBotToBoxAction.GET_SKILL) async def get_skill(data: dict[str, Any]) -> ActionResponse: - skill = self._runtime.skill_store.get_skill(data['name']) - return ActionResponse.success({'skill': skill}) + skill = self._runtime.skill_store.get_skill(data["name"]) + return ActionResponse.success({"skill": skill}) @self.action(LangBotToBoxAction.CREATE_SKILL) async def create_skill(data: dict[str, Any]) -> ActionResponse: try: - skill = self._runtime.skill_store.create_skill(data['skill']) + skill = self._runtime.skill_store.create_skill(data["skill"]) except Exception as exc: - return ActionResponse.error(f'BoxValidationError: {exc}') - return ActionResponse.success({'skill': skill}) + return ActionResponse.error(f"BoxValidationError: {exc}") + return ActionResponse.success({"skill": skill}) @self.action(LangBotToBoxAction.UPDATE_SKILL) async def update_skill(data: dict[str, Any]) -> ActionResponse: try: - skill = self._runtime.skill_store.update_skill(data['name'], data['skill']) + skill = self._runtime.skill_store.update_skill( + data["name"], data["skill"] + ) except Exception as exc: - return ActionResponse.error(f'BoxValidationError: {exc}') - return ActionResponse.success({'skill': skill}) + return ActionResponse.error(f"BoxValidationError: {exc}") + return ActionResponse.success({"skill": skill}) @self.action(LangBotToBoxAction.DELETE_SKILL) async def delete_skill(data: dict[str, Any]) -> ActionResponse: try: - result = self._runtime.skill_store.delete_skill(data['name']) + result = self._runtime.skill_store.delete_skill(data["name"]) except Exception as exc: - return ActionResponse.error(f'BoxValidationError: {exc}') + return ActionResponse.error(f"BoxValidationError: {exc}") return ActionResponse.success(result) @self.action(LangBotToBoxAction.SCAN_SKILL_DIRECTORY) async def scan_skill_directory(data: dict[str, Any]) -> ActionResponse: try: - skill = self._runtime.skill_store.scan_directory(data['path']) + skill = self._runtime.skill_store.scan_directory(data["path"]) except Exception as exc: - return ActionResponse.error(f'BoxValidationError: {exc}') + return ActionResponse.error(f"BoxValidationError: {exc}") return ActionResponse.success(skill) @self.action(LangBotToBoxAction.LIST_SKILL_FILES) async def list_skill_files(data: dict[str, Any]) -> ActionResponse: try: result = self._runtime.skill_store.list_skill_files( - data['name'], - data.get('path', '.'), - include_hidden=bool(data.get('include_hidden', False)), - max_entries=int(data.get('max_entries', 200)), + data["name"], + data.get("path", "."), + include_hidden=bool(data.get("include_hidden", False)), + max_entries=int(data.get("max_entries", 200)), ) except Exception as exc: - return ActionResponse.error(f'BoxValidationError: {exc}') + return ActionResponse.error(f"BoxValidationError: {exc}") return ActionResponse.success(result) @self.action(LangBotToBoxAction.READ_SKILL_FILE) async def read_skill_file(data: dict[str, Any]) -> ActionResponse: try: - result = self._runtime.skill_store.read_skill_file(data['name'], data['path']) + result = self._runtime.skill_store.read_skill_file( + data["name"], data["path"] + ) except Exception as exc: - return ActionResponse.error(f'BoxValidationError: {exc}') + return ActionResponse.error(f"BoxValidationError: {exc}") return ActionResponse.success(result) @self.action(LangBotToBoxAction.WRITE_SKILL_FILE) async def write_skill_file(data: dict[str, Any]) -> ActionResponse: try: - result = self._runtime.skill_store.write_skill_file(data['name'], data['path'], data.get('content', '')) + result = self._runtime.skill_store.write_skill_file( + data["name"], data["path"], data.get("content", "") + ) except Exception as exc: - return ActionResponse.error(f'BoxValidationError: {exc}') + return ActionResponse.error(f"BoxValidationError: {exc}") return ActionResponse.success(result) @self.action(LangBotToBoxAction.PREVIEW_SKILL_ZIP) async def preview_skill_zip(data: dict[str, Any]) -> ActionResponse: try: - file_bytes = await self.read_local_file(data['file_key']) - await self.delete_local_file(data['file_key']) + file_bytes = await self.read_local_file(data["file_key"]) + await self.delete_local_file(data["file_key"]) result = self._runtime.skill_store.preview_zip_upload( file_bytes=file_bytes, - filename=data.get('filename', 'skill.zip'), - source_subdir=data.get('source_subdir') or '', - target_suffix=data.get('target_suffix', 'upload'), + filename=data.get("filename", "skill.zip"), + source_subdir=data.get("source_subdir") or "", + target_suffix=data.get("target_suffix", "upload"), ) except Exception as exc: - return ActionResponse.error(f'BoxValidationError: {exc}') - return ActionResponse.success({'skills': result}) + return ActionResponse.error(f"BoxValidationError: {exc}") + return ActionResponse.success({"skills": result}) @self.action(LangBotToBoxAction.INSTALL_SKILL_ZIP) async def install_skill_zip(data: dict[str, Any]) -> ActionResponse: try: - file_bytes = await self.read_local_file(data['file_key']) - await self.delete_local_file(data['file_key']) + file_bytes = await self.read_local_file(data["file_key"]) + await self.delete_local_file(data["file_key"]) result = self._runtime.skill_store.install_zip_upload( file_bytes=file_bytes, - filename=data.get('filename', 'skill.zip'), - source_paths=data.get('source_paths') or [], - source_path=data.get('source_path') or '', - source_subdir=data.get('source_subdir') or '', - target_suffix=data.get('target_suffix', 'upload'), + filename=data.get("filename", "skill.zip"), + source_paths=data.get("source_paths") or [], + source_path=data.get("source_path") or "", + source_subdir=data.get("source_subdir") or "", + target_suffix=data.get("target_suffix", "upload"), ) except Exception as exc: - return ActionResponse.error(f'BoxValidationError: {exc}') - return ActionResponse.success({'skills': result}) + return ActionResponse.error(f"BoxValidationError: {exc}") + return ActionResponse.success({"skills": result}) @self.action(LangBotToBoxAction.INIT) async def init(data: dict[str, Any]) -> ActionResponse: self._runtime.init(data) - return ActionResponse.success({'initialized': True}) + return ActionResponse.success({"initialized": True}) @self.action(LangBotToBoxAction.SHUTDOWN) async def shutdown(data: dict[str, Any]) -> ActionResponse: @@ -285,29 +303,37 @@ async def shutdown(data: dict[str, Any]) -> ActionResponse: def _error_response(exc: Exception) -> web.Response: return web.json_response( - {'error': {'code': type(exc).__name__, 'message': str(exc)}}, + {"error": {"code": type(exc).__name__, "message": str(exc)}}, status=400, ) async def handle_managed_process_ws(request: web.Request) -> web.StreamResponse: - runtime: BoxRuntime = request.app['runtime'] - session_id = request.match_info['session_id'] - process_id = request.match_info.get('process_id', 'default') + runtime: BoxRuntime = request.app["runtime"] + session_id = request.match_info["session_id"] + process_id = request.match_info.get("process_id", "default") runtime_session = runtime._sessions.get(session_id) if runtime_session is None: - return _error_response(BoxSessionNotFoundError(f'session {session_id} not found')) + return _error_response( + BoxSessionNotFoundError(f"session {session_id} not found") + ) managed_process = runtime_session.managed_processes.get(process_id) if managed_process is None: - return _error_response(BoxManagedProcessNotFoundError(f'session {session_id} has no managed process with process_id={process_id}')) + return _error_response( + BoxManagedProcessNotFoundError( + f"session {session_id} has no managed process with process_id={process_id}" + ) + ) if not managed_process.is_running: return _error_response( - BoxManagedProcessConflictError(f'managed process {process_id} in session {session_id} is not running') + BoxManagedProcessConflictError( + f"managed process {process_id} in session {session_id} is not running" + ) ) - ws = web.WebSocketResponse(protocols=('mcp',)) + ws = web.WebSocketResponse(protocols=("mcp",)) await ws.prepare(request) async with managed_process.attach_lock: @@ -315,7 +341,7 @@ async def handle_managed_process_ws(request: web.Request) -> web.StreamResponse: stdout = process.stdout stdin = process.stdin if stdout is None or stdin is None: - await ws.close(message=b'managed process stdio unavailable') + await ws.close(message=b"managed process stdio unavailable") return ws async def _stdout_to_ws() -> None: @@ -323,13 +349,13 @@ async def _stdout_to_ws() -> None: line = await stdout.readline() if not line: break - await ws.send_str(line.decode('utf-8', errors='replace').rstrip('\n')) + await ws.send_str(line.decode("utf-8", errors="replace").rstrip("\n")) runtime_session.info.last_used_at = dt.datetime.now(dt.timezone.utc) async def _ws_to_stdin() -> None: async for msg in ws: if msg.type == web.WSMsgType.TEXT: - stdin.write((msg.data + '\n').encode('utf-8')) + stdin.write((msg.data + "\n").encode("utf-8")) await stdin.drain() runtime_session.info.last_used_at = dt.datetime.now(dt.timezone.utc) elif msg.type in ( @@ -362,7 +388,7 @@ async def _ws_to_stdin() -> None: async def handle_rpc_ws(request: web.Request) -> web.StreamResponse: """Handle action RPC over a single aiohttp WebSocket connection.""" - runtime: BoxRuntime = request.app['runtime'] + runtime: BoxRuntime = request.app["runtime"] ws = web.WebSocketResponse() await ws.prepare(request) @@ -380,14 +406,27 @@ async def handle_rpc_ws(request: web.Request) -> web.StreamResponse: def create_app(runtime: BoxRuntime) -> web.Application: """Create the aiohttp app with all WebSocket routes on a single port.""" app = web.Application() - app['runtime'] = runtime - app.router.add_get('/rpc/ws', handle_rpc_ws) - app.router.add_get('/v1/sessions/{session_id}/managed-process/{process_id}/ws', handle_managed_process_ws) + app["runtime"] = runtime + app.router.add_get("/rpc/ws", handle_rpc_ws) + app.router.add_get( + "/v1/sessions/{session_id}/managed-process/{process_id}/ws", + handle_managed_process_ws, + ) # Backward-compatible route (defaults to process_id='default') - app.router.add_get('/v1/sessions/{session_id}/managed-process/ws', handle_managed_process_ws) + app.router.add_get( + "/v1/sessions/{session_id}/managed-process/ws", handle_managed_process_ws + ) return app +def create_ws_relay_app(runtime: BoxRuntime) -> web.Application: + """Backward-compatible alias for older callers. + + The relay and action RPC endpoints now live in one aiohttp app. + """ + return create_app(runtime) + + # ── Entry point ────────────────────────────────────────────────────── @@ -404,14 +443,16 @@ async def _run_server(host: str, port: int, mode: str) -> None: await runner.setup() site = web.TCPSite(runner, host, port) await site.start() - logger.info(f'Box server listening on {host}:{port}') + logger.info(f"Box server listening on {host}:{port}") except OSError as exc: - logger.warning(f'Box server failed to bind {host}:{port}: {exc}') - logger.warning('Managed process WebSocket attach will be unavailable.') + logger.warning(f"Box server failed to bind {host}:{port}: {exc}") + logger.warning("Managed process WebSocket attach will be unavailable.") try: - if mode == 'stdio': - from langbot_plugin.runtime.io.controllers.stdio.server import StdioServerController + if mode == "stdio": + from langbot_plugin.runtime.io.controllers.stdio.server import ( + StdioServerController, + ) async def new_connection_callback(connection: Connection) -> None: handler = BoxServerHandler(connection, runtime) @@ -422,7 +463,7 @@ async def new_connection_callback(connection: Connection) -> None: else: # In ws mode, action RPC is served via aiohttp on /rpc/ws. # Keep the server alive until cancelled. - logger.info(f'Box action RPC available at ws://{host}:{port}/rpc/ws') + logger.info(f"Box action RPC available at ws://{host}:{port}/rpc/ws") stop_event = asyncio.Event() await stop_event.wait() finally: @@ -432,29 +473,36 @@ async def new_connection_callback(connection: Connection) -> None: def main(argv: list[str] | None = None) -> None: - parser = argparse.ArgumentParser(description='LangBot Box Runtime Service') - parser.add_argument('--host', default='0.0.0.0', help='Bind address') - parser.add_argument('--ws-control-port', type=int, default=5410, help='The port for control connection') + parser = argparse.ArgumentParser(description="LangBot Box Runtime Service") + parser.add_argument("--host", default="0.0.0.0", help="Bind address") + parser.add_argument( + "--ws-control-port", + type=int, + default=5410, + help="The port for control connection", + ) parser.add_argument( - '--port', + "--port", type=int, - dest='ws_control_port', + dest="ws_control_port", help=argparse.SUPPRESS, ) - parser.add_argument('--stdio-control', action='store_true', help='Use stdio for control connection') parser.add_argument( - '--mode', - choices=['auto', 'stdio', 'ws'], + "--stdio-control", action="store_true", help="Use stdio for control connection" + ) + parser.add_argument( + "--mode", + choices=["auto", "stdio", "ws"], help=argparse.SUPPRESS, ) args = parser.parse_args(argv) - stdio_control = args.stdio_control or args.mode == 'stdio' - control_mode = 'stdio' if stdio_control else 'ws' + stdio_control = args.stdio_control or args.mode == "stdio" + control_mode = "stdio" if stdio_control else "ws" configure_process_logging(stream=sys.stderr) asyncio.run(_run_server(args.host, args.ws_control_port, control_mode)) -if __name__ == '__main__': +if __name__ == "__main__": main() From 0360c655d9f47dd08a96f208de2c8ff7d766a053 Mon Sep 17 00:00:00 2001 From: Junyan Qin Date: Thu, 21 May 2026 13:20:40 +0800 Subject: [PATCH 32/34] refactor(box): consolidate box entry point to the lbp CLI subcommand Box previously had three launch paths while the plugin runtime had one. Align box with the runtime so both are launched exclusively via the lbp CLI subcommand: - Delete box/__main__.py (removes `python -m langbot_plugin.box`). - Remove the `if __name__ == "__main__"` guard from box/server.py (removes `python -m langbot_plugin.box.server`). - server.main() now takes the parsed argparse.Namespace, mirroring how `lbp rt` drives langbot_plugin.runtime.app.main; the box subparser in langbot_plugin.cli is the single source of the argument schema. - Drop the legacy hidden `--mode {auto,stdio,ws}` flag and the hidden `--port` alias of `--ws-control-port`. Mode is selected solely by `-s`/`--stdio-control` (WebSocket by default), identical to `lbp rt`. The box CLI surface is now --host, -s/--stdio-control, --ws-control-port. --- src/langbot_plugin/box/__main__.py | 7 ---- src/langbot_plugin/box/server.py | 56 +++++++++++------------------- src/langbot_plugin/cli/__init__.py | 19 ++++------ 3 files changed, 27 insertions(+), 55 deletions(-) delete mode 100644 src/langbot_plugin/box/__main__.py diff --git a/src/langbot_plugin/box/__main__.py b/src/langbot_plugin/box/__main__.py deleted file mode 100644 index 6c41643..0000000 --- a/src/langbot_plugin/box/__main__.py +++ /dev/null @@ -1,7 +0,0 @@ -"""Allow running the Box server via ``python -m langbot_plugin.box``.""" - -from .server import main - -if __name__ == "__main__": - main() - diff --git a/src/langbot_plugin/box/server.py b/src/langbot_plugin/box/server.py index d648b8d..601b028 100644 --- a/src/langbot_plugin/box/server.py +++ b/src/langbot_plugin/box/server.py @@ -1,10 +1,16 @@ -"""Standalone Box Runtime service exposing BoxRuntime via action RPC. +"""Box Runtime service exposing BoxRuntime via action RPC. -Usage (ws, standalone/manual mode): - python -m langbot_plugin.box.server +This module is the implementation of the `box` CLI subcommand. The only +supported entry point is the `lbp` CLI, which mirrors the plugin runtime's +`rt` subcommand: -Usage (stdio, launched by LangBot as subprocess): - python -m langbot_plugin.box.server --stdio-control + lbp box # WebSocket control transport (default) + lbp box -s # stdio control transport + +`main()` is invoked by the CLI with the parsed argument namespace, exactly +as `lbp rt` drives ``langbot_plugin.runtime.app.main``. There is no +``python -m langbot_plugin.box`` / ``python -m langbot_plugin.box.server`` +launch path. All WebSocket endpoints share a single port (default 5410): /rpc/ws — Action RPC (control channel) @@ -472,37 +478,17 @@ async def new_connection_callback(connection: Connection) -> None: await runner.cleanup() -def main(argv: list[str] | None = None) -> None: - parser = argparse.ArgumentParser(description="LangBot Box Runtime Service") - parser.add_argument("--host", default="0.0.0.0", help="Bind address") - parser.add_argument( - "--ws-control-port", - type=int, - default=5410, - help="The port for control connection", - ) - parser.add_argument( - "--port", - type=int, - dest="ws_control_port", - help=argparse.SUPPRESS, - ) - parser.add_argument( - "--stdio-control", action="store_true", help="Use stdio for control connection" - ) - parser.add_argument( - "--mode", - choices=["auto", "stdio", "ws"], - help=argparse.SUPPRESS, - ) - args = parser.parse_args(argv) +def main(args: argparse.Namespace) -> None: + """Run the Box runtime service. - stdio_control = args.stdio_control or args.mode == "stdio" - control_mode = "stdio" if stdio_control else "ws" + Invoked by the `box` CLI subcommand with the parsed argument namespace, + mirroring how `lbp rt` drives ``langbot_plugin.runtime.app.main``. The + argument schema is defined once, on the `box` subparser in + ``langbot_plugin.cli``. + """ + # Mode selection mirrors the plugin runtime (`lbp rt`): WebSocket by + # default, stdio when `-s`/`--stdio-control` is passed. + control_mode = "stdio" if args.stdio_control else "ws" configure_process_logging(stream=sys.stderr) asyncio.run(_run_server(args.host, args.ws_control_port, control_mode)) - - -if __name__ == "__main__": - main() diff --git a/src/langbot_plugin/cli/__init__.py b/src/langbot_plugin/cli/__init__.py index f0cf4e6..dc48f27 100644 --- a/src/langbot_plugin/cli/__init__.py +++ b/src/langbot_plugin/cli/__init__.py @@ -130,7 +130,10 @@ def main(): "--host", default="0.0.0.0", help="Bind address" ) box_parser.add_argument( - "--stdio-control", action="store_true", help="Use stdio for control connection" + "-s", + "--stdio-control", + action="store_true", + help="Use stdio for control connection", ) box_parser.add_argument( "--ws-control-port", @@ -138,17 +141,6 @@ def main(): default=5410, help="The port for control connection", ) - box_parser.add_argument( - "--port", - type=int, - dest="ws_control_port", - help=argparse.SUPPRESS, - ) - box_parser.add_argument( - "--mode", - choices=["auto", "stdio", "ws"], - help=argparse.SUPPRESS, - ) args = parser.parse_args() @@ -180,7 +172,8 @@ def main(): runtime_app.main(args) case "box": from langbot_plugin.box.server import main as box_main - box_main(sys.argv[2:]) + + box_main(args) case _: cli_print("unknown_command", args.command) sys.exit(1) From 23cd80d09193eb62624883aaadf63aa51f88a7c0 Mon Sep 17 00:00:00 2001 From: Junyan Qin Date: Thu, 21 May 2026 13:22:24 +0800 Subject: [PATCH 33/34] chore: bump beta 1 version --- pyproject.toml | 2 +- src/langbot_plugin/version.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index ed55da6..b16c25c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "langbot-plugin" -version = "0.3.10" +version = "0.4.0-beta.1" description = "This package contains the SDK, CLI for building plugins for LangBot, plus the runtime for hosting LangBot plugins" readme = "README.md" authors = [ diff --git a/src/langbot_plugin/version.py b/src/langbot_plugin/version.py index d7b30e1..3bf78d8 100644 --- a/src/langbot_plugin/version.py +++ b/src/langbot_plugin/version.py @@ -1 +1 @@ -__version__ = "0.3.6" +__version__ = "0.4.0-beta.1" From 667a27480285360e2cf2a0f8900795a91bd644f0 Mon Sep 17 00:00:00 2001 From: RockChinQ Date: Fri, 22 May 2026 05:41:39 -0400 Subject: [PATCH 34/34] fix: use box backend config only --- src/langbot_plugin/box/runtime.py | 9 +++-- tests/box/test_backend_selection.py | 54 ++++++++++++++++++++--------- 2 files changed, 41 insertions(+), 22 deletions(-) diff --git a/src/langbot_plugin/box/runtime.py b/src/langbot_plugin/box/runtime.py index 9f3fdd0..7550816 100644 --- a/src/langbot_plugin/box/runtime.py +++ b/src/langbot_plugin/box/runtime.py @@ -371,12 +371,11 @@ async def _get_backend(self) -> BaseSandboxBackend: _LOCAL_BACKEND_NAMES = ("docker", "nsjail") async def _select_backend(self) -> BaseSandboxBackend | None: - # Backend override priority: BOX_BACKEND env var > box.backend config. + # Backend selection comes from box.backend only. # Accepted values: 'local', 'docker', 'nsjail', 'e2b'. 'local' fans out - # to a list; everything else must match a single backend name exactly. - configured = (self._box_config.get("backend") or "").strip() - forced = (os.getenv("BOX_BACKEND") or configured or "").strip() - source_label = "BOX_BACKEND" if os.getenv("BOX_BACKEND") else "box.backend" + # to local container backends; everything else must match one backend exactly. + forced = (self._box_config.get("backend") or "").strip() + source_label = "box.backend" candidates: list[BaseSandboxBackend] if forced == "local": diff --git a/tests/box/test_backend_selection.py b/tests/box/test_backend_selection.py index 48204b6..2439394 100644 --- a/tests/box/test_backend_selection.py +++ b/tests/box/test_backend_selection.py @@ -100,18 +100,19 @@ def test_e2b_import_failure_returns_none(logger): assert len(active_backends) >= 2 -# ── BOX_BACKEND environment variable ─────────────────────────────────── +# ── box.backend configuration ────────────────────────────────────────── @pytest.mark.anyio -async def test_box_backend_forces_specific_backend(logger): - """BOX_BACKEND env var forces selection of named backend.""" +async def test_box_backend_config_forces_specific_backend(logger): + """box.backend config forces selection of named backend.""" backend_e2b = MockBackend(logger, 'e2b', available=True) backend_docker = MockBackend(logger, 'docker', available=True) backend_nsjail = MockBackend(logger, 'nsjail', available=False) runtime = BoxRuntime(logger, backends=[backend_e2b, backend_docker, backend_nsjail]) + runtime.init({'backend': 'docker'}) - with mock.patch('os.getenv', side_effect=lambda k: 'docker' if k == 'BOX_BACKEND' else None): + with mock.patch('os.getenv', return_value=None): selected = await runtime._select_backend() assert selected.name == 'docker' @@ -119,52 +120,70 @@ async def test_box_backend_forces_specific_backend(logger): @pytest.mark.anyio -async def test_box_backend_unavailable_returns_none(logger): - """When BOX_BACKEND specifies unavailable backend, returns None.""" +async def test_box_backend_config_unavailable_returns_none(logger): + """When box.backend specifies unavailable backend, returns None.""" backend_e2b = MockBackend(logger, 'e2b', available=False) backend_docker = MockBackend(logger, 'docker', available=True) runtime = BoxRuntime(logger, backends=[backend_e2b, backend_docker]) + runtime.init({'backend': 'e2b'}) - with mock.patch('os.getenv', side_effect=lambda k: 'e2b' if k == 'BOX_BACKEND' else None): + with mock.patch('os.getenv', return_value=None): selected = await runtime._select_backend() assert selected is None @pytest.mark.anyio -async def test_box_backend_not_found_returns_none(logger): - """When BOX_BACKEND specifies unknown backend name, returns None.""" +async def test_box_backend_config_not_found_returns_none(logger): + """When box.backend specifies unknown backend name, returns None.""" backend_docker = MockBackend(logger, 'docker', available=True) runtime = BoxRuntime(logger, backends=[backend_docker]) + runtime.init({'backend': 'unknown'}) - with mock.patch('os.getenv', side_effect=lambda k: 'unknown' if k == 'BOX_BACKEND' else None): + with mock.patch('os.getenv', return_value=None): selected = await runtime._select_backend() assert selected is None @pytest.mark.anyio -async def test_box_backend_no_fallback(logger): - """When BOX_BACKEND is set but backend unavailable, does NOT fallback.""" +async def test_box_backend_config_no_fallback(logger): + """When box.backend is set but backend unavailable, does NOT fallback.""" backend_e2b = MockBackend(logger, 'e2b', available=False) backend_docker = MockBackend(logger, 'docker', available=True) runtime = BoxRuntime(logger, backends=[backend_e2b, backend_docker]) + runtime.init({'backend': 'e2b'}) - with mock.patch('os.getenv', side_effect=lambda k: 'e2b' if k == 'BOX_BACKEND' else None): + with mock.patch('os.getenv', return_value=None): selected = await runtime._select_backend() # Should return None, not fallback to docker assert selected is None +@pytest.mark.anyio +async def test_box_backend_env_var_is_ignored(logger): + """BOX_BACKEND is not an independent override; use box.backend instead.""" + backend_docker = MockBackend(logger, 'docker', available=True) + backend_e2b = MockBackend(logger, 'e2b', available=True) + + runtime = BoxRuntime(logger, backends=[backend_docker, backend_e2b]) + runtime.init({'backend': 'docker'}) + + with mock.patch('os.getenv', side_effect=lambda k: 'e2b' if k == 'BOX_BACKEND' else None): + selected = await runtime._select_backend() + + assert selected is backend_docker + + # ── Auto-detect backend selection ─────────────────────────────────────── @pytest.mark.anyio async def test_auto_detect_first_available(logger): - """Without BOX_BACKEND, selects first available backend.""" + """Without box.backend, selects first available backend.""" backend_e2b = MockBackend(logger, 'e2b', available=False) backend_docker = MockBackend(logger, 'docker', available=True) backend_nsjail = MockBackend(logger, 'nsjail', available=False) @@ -242,14 +261,15 @@ def test_custom_backends_list_preserved(logger): @pytest.mark.anyio -async def test_custom_backends_with_box_backend(logger): - """BOX_BACKEND works with custom backends list.""" +async def test_custom_backends_with_box_backend_config(logger): + """box.backend works with custom backends list.""" backend_a = MockBackend(logger, 'a', available=True) backend_b = MockBackend(logger, 'b', available=True) runtime = BoxRuntime(logger, backends=[backend_a, backend_b]) + runtime.init({'backend': 'b'}) - with mock.patch('os.getenv', side_effect=lambda k: 'b' if k == 'BOX_BACKEND' else None): + with mock.patch('os.getenv', return_value=None): selected = await runtime._select_backend() assert selected.name == 'b'