diff --git a/.gitignore b/.gitignore
index 7c011af..5498a13 100644
--- a/.gitignore
+++ b/.gitignore
@@ -174,6 +174,7 @@ cython_debug/
 .pypirc
 
 /data/plugins/
+/data/box/
 /debug/
 uv.lock
 src/.DS_Store
diff --git a/pyproject.toml b/pyproject.toml
index 0631261..b16c25c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "langbot-plugin"
-version = "0.3.7"
+version = "0.4.0-beta.1"
 description = "This package contains the SDK, CLI for building plugins for LangBot, plus the runtime for hosting LangBot plugins"
 readme = "README.md"
 authors = [
@@ -9,7 +9,9 @@ authors = [
 requires-python = ">=3.10"
 dependencies = [
     "aiofiles>=24.1.0",
+    "aiohttp>=3.9.0",
     "dotenv>=0.9.9",
+    "e2b>=2.15",
     "httpx>=0.28.1",
     "jinja2>=3.1.6",
     "pip>=25.2",
diff --git a/src/langbot_plugin/box/__init__.py b/src/langbot_plugin/box/__init__.py
new file mode 100644
index 0000000..5bd86a4
--- /dev/null
+++ b/src/langbot_plugin/box/__init__.py
@@ -0,0 +1,5 @@
+"""LangBot Box runtime package."""
+
+from .client import BoxRuntimeClient, ActionRPCBoxClient
+
+__all__ = ['BoxRuntimeClient', 'ActionRPCBoxClient']
diff --git a/src/langbot_plugin/box/actions.py b/src/langbot_plugin/box/actions.py
new file mode 100644
index 0000000..fea4da6
--- /dev/null
+++ b/src/langbot_plugin/box/actions.py
@@ -0,0 +1,34 @@
+"""Box-specific action types for the action RPC protocol."""
+
+from __future__ import annotations
+
+from langbot_plugin.entities.io.actions.enums import ActionType
+
+
+class LangBotToBoxAction(ActionType):
+    """Actions sent from LangBot to the Box runtime."""
+
+    INIT = "box_init"  # Initialize with full box config (highest priority)
+    HEALTH = "box_health"
+    STATUS = "box_status"
+    EXEC = "box_exec"
+    CREATE_SESSION = "box_create_session"
+    GET_SESSION = "box_get_session"
+    GET_SESSIONS = "box_get_sessions"
+    DELETE_SESSION = "box_delete_session"
+    START_MANAGED_PROCESS = "box_start_managed_process"
+    GET_MANAGED_PROCESS = "box_get_managed_process"
+    STOP_MANAGED_PROCESS = "box_stop_managed_process"
+    GET_BACKEND_INFO = "box_get_backend_info"
+    LIST_SKILLS = "box_list_skills"
+    GET_SKILL = "box_get_skill"
+    CREATE_SKILL = "box_create_skill"
+    UPDATE_SKILL = "box_update_skill"
+    DELETE_SKILL = "box_delete_skill"
+    SCAN_SKILL_DIRECTORY = "box_scan_skill_directory"
+    LIST_SKILL_FILES = "box_list_skill_files"
+    READ_SKILL_FILE = "box_read_skill_file"
+    WRITE_SKILL_FILE = "box_write_skill_file"
+    PREVIEW_SKILL_ZIP = "box_preview_skill_zip"
+    INSTALL_SKILL_ZIP = "box_install_skill_zip"
+    SHUTDOWN = "box_shutdown"
diff --git a/src/langbot_plugin/box/backend.py b/src/langbot_plugin/box/backend.py
new file mode 100644
index 0000000..37ffbe3
--- /dev/null
+++ b/src/langbot_plugin/box/backend.py
@@ -0,0 +1,411 @@
+from __future__ import annotations
+
+import abc
+import asyncio
+import dataclasses
+import datetime as dt
+import logging
+import re
+import shlex
+import shutil
+import uuid
+
+from .errors import BoxError
+from .models import (
+    BoxExecutionResult,
+    BoxExecutionStatus,
+    BoxHostMountMode,
+    BoxNetworkMode,
+    BoxSessionInfo,
+    BoxSpec,
+)
+from .security import validate_sandbox_security
+
+# Hard cap on raw subprocess output to prevent unbounded memory usage.
+# Container timeout already bounds duration, but fast commands can still
+# produce large output within the time limit.  After this many bytes the
+# remaining output is discarded before decoding.
+_MAX_RAW_OUTPUT_BYTES = 1_048_576  # 1 MB per stream
+
+
+@dataclasses.dataclass(slots=True)
+class _CommandResult:
+    return_code: int
+    stdout: str
+    stderr: str
+    timed_out: bool = False
+
+
+class BaseSandboxBackend(abc.ABC):
+    name: str
+    instance_id: str = ''
+
+    def __init__(self, logger: logging.Logger):
+        self.logger = logger
+
+    async def initialize(self):
+        return None
+
+    @abc.abstractmethod
+    async def is_available(self) -> bool:
+        pass
+
+    @abc.abstractmethod
+    async def start_session(self, spec: BoxSpec) -> BoxSessionInfo:
+        pass
+
+    @abc.abstractmethod
+    async def exec(self, session: BoxSessionInfo, spec: BoxSpec) -> BoxExecutionResult:
+        pass
+
+    @abc.abstractmethod
+    async def stop_session(self, session: BoxSessionInfo):
+        pass
+
+    async def is_session_alive(self, session: BoxSessionInfo) -> bool:
+        return True
+
+    async def start_managed_process(self, session: BoxSessionInfo, spec):
+        raise BoxError(f'{self.name} backend does not support managed processes')
+
+    async def cleanup_orphaned_containers(self, current_instance_id: str = ''):
+        """Remove lingering containers from previous runs. No-op by default."""
+        pass
+
+
+class CLISandboxBackend(BaseSandboxBackend):
+    command: str
+
+    def __init__(self, logger: logging.Logger, command: str, backend_name: str):
+        super().__init__(logger)
+        self.command = command
+        self.name = backend_name
+
+    async def is_available(self) -> bool:
+        if shutil.which(self.command) is None:
+            return False
+
+        result = await self._run_command([self.command, 'info'], timeout_sec=5, check=False)
+        return result.return_code == 0 and not result.timed_out
+
+    async def start_session(self, spec: BoxSpec) -> BoxSessionInfo:
+        validate_sandbox_security(spec)
+
+        now = dt.datetime.now(dt.timezone.utc)
+        container_name = self._build_container_name(spec.session_id)
+
+        args = [
+            self.command,
+            'run',
+            '-d',
+        ]
+
+        if not spec.persistent:
+            args.append('--rm')
+
+        args.extend([
+            '--name',
+            container_name,
+            '--label',
+            'langbot.box=true',
+            '--label',
+            f'langbot.session_id={spec.session_id}',
+            '--label',
+            f'langbot.box.instance_id={self.instance_id}',
+        ])
+
+        if spec.network == BoxNetworkMode.OFF:
+            args.extend(['--network', 'none'])
+
+        # Resource limits
+        args.extend(['--cpus', str(spec.cpus)])
+        args.extend(['--memory', f'{spec.memory_mb}m'])
+        args.extend(['--pids-limit', str(spec.pids_limit)])
+
+        if spec.read_only_rootfs:
+            args.append('--read-only')
+            args.extend(['--tmpfs', '/tmp:size=64m'])
+
+        if spec.host_path is not None and spec.host_path_mode != BoxHostMountMode.NONE:
+            mount_spec = f'{spec.host_path}:{spec.mount_path}:{spec.host_path_mode.value}'
+            args.extend(['-v', mount_spec])
+
+        for mount in spec.extra_mounts:
+            if mount.mode != BoxHostMountMode.NONE:
+                args.extend(['-v', f'{mount.host_path}:{mount.mount_path}:{mount.mode.value}'])
+
+        args.extend([spec.image, 'sh', '-lc', 'while true; do sleep 3600; done'])
+
+        self.logger.info(
+            f'LangBot Box backend start_session: backend={self.name} '
+            f'session_id={spec.session_id} container_name={container_name} '
+            f'image={spec.image} network={spec.network.value} '
+            f'host_path={spec.host_path} host_path_mode={spec.host_path_mode.value} mount_path={spec.mount_path} '
+            f'cpus={spec.cpus} memory_mb={spec.memory_mb} pids_limit={spec.pids_limit} '
+            f'read_only_rootfs={spec.read_only_rootfs} workspace_quota_mb={spec.workspace_quota_mb}'
+        )
+
+        await self._run_command(args, timeout_sec=30, check=True)
+
+        return BoxSessionInfo(
+            session_id=spec.session_id,
+            backend_name=self.name,
+            backend_session_id=container_name,
+            image=spec.image,
+            network=spec.network,
+            host_path=spec.host_path,
+            host_path_mode=spec.host_path_mode,
+            mount_path=spec.mount_path,
+            persistent=spec.persistent,
+            cpus=spec.cpus,
+            memory_mb=spec.memory_mb,
+            pids_limit=spec.pids_limit,
+            read_only_rootfs=spec.read_only_rootfs,
+            workspace_quota_mb=spec.workspace_quota_mb,
+            created_at=now,
+            last_used_at=now,
+        )
+
+    async def exec(self, session: BoxSessionInfo, spec: BoxSpec) -> BoxExecutionResult:
+        start = dt.datetime.now(dt.timezone.utc)
+        args = [self.command, 'exec']
+
+        for key, value in spec.env.items():
+            args.extend(['-e', f'{key}={value}'])
+
+        args.extend(
+            [
+                session.backend_session_id,
+                'sh',
+                '-lc',
+                self._build_exec_command(spec.workdir, spec.cmd),
+            ]
+        )
+
+        cmd_preview = spec.cmd.strip()
+        if len(cmd_preview) > 400:
+            cmd_preview = f'{cmd_preview[:397]}...'
+        self.logger.info(
+            f'LangBot Box backend exec: backend={self.name} '
+            f'session_id={session.session_id} container_name={session.backend_session_id} '
+            f'workdir={spec.workdir} timeout_sec={spec.timeout_sec} '
+            f'env_keys={sorted(spec.env.keys())} cmd={cmd_preview}'
+        )
+
+        result = await self._run_command(args, timeout_sec=spec.timeout_sec, check=False)
+        duration_ms = int((dt.datetime.now(dt.timezone.utc) - start).total_seconds() * 1000)
+
+        if result.timed_out:
+            return BoxExecutionResult(
+                session_id=session.session_id,
+                backend_name=self.name,
+                status=BoxExecutionStatus.TIMED_OUT,
+                exit_code=None,
+                stdout=result.stdout,
+                stderr=result.stderr or f'Command timed out after {spec.timeout_sec} seconds.',
+                duration_ms=duration_ms,
+            )
+
+        return BoxExecutionResult(
+            session_id=session.session_id,
+            backend_name=self.name,
+            status=BoxExecutionStatus.COMPLETED,
+            exit_code=result.return_code,
+            stdout=result.stdout,
+            stderr=result.stderr,
+            duration_ms=duration_ms,
+        )
+
+    async def stop_session(self, session: BoxSessionInfo):
+        self.logger.info(
+            f'LangBot Box backend stop_session: backend={self.name} '
+            f'session_id={session.session_id} container_name={session.backend_session_id}'
+        )
+        await self._run_command(
+            [self.command, 'rm', '-f', session.backend_session_id],
+            timeout_sec=20,
+            check=False,
+        )
+
+    async def is_session_alive(self, session: BoxSessionInfo) -> bool:
+        result = await self._run_command(
+            [
+                self.command,
+                'inspect',
+                '-f',
+                '{{.State.Running}}',
+                session.backend_session_id,
+            ],
+            timeout_sec=5,
+            check=False,
+        )
+        return result.return_code == 0 and result.stdout.strip().lower() == 'true'
+
+    async def cleanup_orphaned_containers(self, current_instance_id: str = ''):
+        """Remove langbot.box containers from previous instances.
+
+        Only removes containers whose ``langbot.box.instance_id`` label does
+        NOT match *current_instance_id*.  Containers without the label (from
+        older versions) are also removed.
+        """
+        result = await self._run_command(
+            [
+                self.command,
+                'ps',
+                '-a',
+                '--filter',
+                'label=langbot.box=true',
+                '--format',
+                '{{.ID}}\t{{.Label "langbot.box.instance_id"}}',
+            ],
+            timeout_sec=10,
+            check=False,
+        )
+        if result.return_code != 0 or not result.stdout.strip():
+            return
+        orphan_ids = []
+        for line in result.stdout.strip().split('\n'):
+            line = line.strip()
+            if not line:
+                continue
+            parts = line.split('\t', 1)
+            cid = parts[0].strip()
+            label_instance = parts[1].strip() if len(parts) > 1 else ''
+            if label_instance != current_instance_id:
+                orphan_ids.append(cid)
+        if not orphan_ids:
+            return
+        for cid in orphan_ids:
+            self.logger.info(f'Cleaning up orphaned Box container: {cid}')
+        await self._run_command(
+            [self.command, 'rm', '-f', *orphan_ids],
+            timeout_sec=30,
+            check=False,
+        )
+
+    async def start_managed_process(self, session: BoxSessionInfo, spec) -> asyncio.subprocess.Process:
+        args = [self.command, 'exec', '-i']
+
+        for key, value in spec.env.items():
+            args.extend(['-e', f'{key}={value}'])
+
+        args.extend(
+            [
+                session.backend_session_id,
+                'sh',
+                '-lc',
+                self._build_spawn_command(spec.cwd, spec.command, spec.args),
+            ]
+        )
+
+        self.logger.info(
+            f'LangBot Box backend start_managed_process: backend={self.name} '
+            f'session_id={session.session_id} container_name={session.backend_session_id} '
+            f'cwd={spec.cwd} env_keys={sorted(spec.env.keys())} command={spec.command} args={spec.args}'
+        )
+
+        return await asyncio.create_subprocess_exec(
+            *args,
+            stdin=asyncio.subprocess.PIPE,
+            stdout=asyncio.subprocess.PIPE,
+            stderr=asyncio.subprocess.PIPE,
+        )
+
+    def _build_container_name(self, session_id: str) -> str:
+        normalized = re.sub(r'[^a-zA-Z0-9_.-]+', '-', session_id).strip('-').lower() or 'session'
+        suffix = uuid.uuid4().hex[:8]
+        return f'langbot-box-{normalized[:32]}-{suffix}'
+
+    def _build_exec_command(self, workdir: str, cmd: str) -> str:
+        quoted_workdir = shlex.quote(workdir)
+        return f'mkdir -p {quoted_workdir} && cd {quoted_workdir} && {cmd}'
+
+    def _build_spawn_command(self, cwd: str, command: str, args: list[str]) -> str:
+        quoted_cwd = shlex.quote(cwd)
+        command_parts = [shlex.quote(command), *[shlex.quote(arg) for arg in args]]
+        return f'mkdir -p {quoted_cwd} && cd {quoted_cwd} && exec {" ".join(command_parts)}'
+
+    async def _run_command(
+        self,
+        args: list[str],
+        timeout_sec: int,
+        check: bool,
+    ) -> _CommandResult:
+        process = await asyncio.create_subprocess_exec(
+            *args,
+            stdout=asyncio.subprocess.PIPE,
+            stderr=asyncio.subprocess.PIPE,
+        )
+        stdout_task = asyncio.create_task(self._read_stream(process.stdout))
+        stderr_task = asyncio.create_task(self._read_stream(process.stderr))
+
+        timed_out = False
+        try:
+            await asyncio.wait_for(process.wait(), timeout=timeout_sec)
+        except asyncio.TimeoutError:
+            process.kill()
+            timed_out = True
+            await process.wait()
+
+        stdout_bytes, stdout_total = await stdout_task
+        stderr_bytes, stderr_total = await stderr_task
+
+        if timed_out:
+            return _CommandResult(
+                return_code=-1,
+                stdout=self._clip_captured_bytes(stdout_bytes, stdout_total),
+                stderr=self._clip_captured_bytes(stderr_bytes, stderr_total),
+                timed_out=True,
+            )
+
+        stdout = self._clip_captured_bytes(stdout_bytes, stdout_total)
+        stderr = self._clip_captured_bytes(stderr_bytes, stderr_total)
+
+        if check and process.returncode != 0:
+            raise BoxError(self._format_cli_error(stderr or stdout or 'unknown backend error'))
+
+        return _CommandResult(
+            return_code=process.returncode,
+            stdout=stdout,
+            stderr=stderr,
+            timed_out=False,
+        )
+
+    @staticmethod
+    def _clip_captured_bytes(data: bytes, total_size: int, limit: int = _MAX_RAW_OUTPUT_BYTES) -> str:
+        text = data.decode('utf-8', errors='replace').strip()
+        if total_size > limit:
+            text += f'\n... [raw output clipped at {limit} bytes, {total_size - limit} bytes discarded]'
+        return text
+
+    @staticmethod
+    async def _read_stream(
+        stream: asyncio.StreamReader | None,
+        limit: int = _MAX_RAW_OUTPUT_BYTES,
+    ) -> tuple[bytes, int]:
+        if stream is None:
+            return b'', 0
+
+        chunks = bytearray()
+        total_size = 0
+        while True:
+            chunk = await stream.read(65536)
+            if not chunk:
+                break
+            total_size += len(chunk)
+            remaining = limit - len(chunks)
+            if remaining > 0:
+                chunks.extend(chunk[:remaining])
+
+        return bytes(chunks), total_size
+
+    def _format_cli_error(self, message: str) -> str:
+        message = ' '.join(message.split())
+        if len(message) > 300:
+            message = f'{message[:297]}...'
+        return f'{self.name} backend error: {message}'
+
+
+class DockerBackend(CLISandboxBackend):
+    def __init__(self, logger: logging.Logger):
+        super().__init__(logger=logger, command='docker', backend_name='docker')
diff --git a/src/langbot_plugin/box/client.py b/src/langbot_plugin/box/client.py
new file mode 100644
index 0000000..dc3f78d
--- /dev/null
+++ b/src/langbot_plugin/box/client.py
@@ -0,0 +1,377 @@
+"""BoxRuntimeClient abstraction for Box Runtime access."""
+
+from __future__ import annotations
+
+import abc
+import logging
+from typing import Any
+
+from langbot_plugin.runtime.io.handler import Handler
+
+from .actions import LangBotToBoxAction
+from .errors import BoxError, BoxRuntimeUnavailableError
+from .models import (
+    BoxExecutionResult,
+    BoxExecutionStatus,
+    BoxManagedProcessInfo,
+    BoxManagedProcessSpec,
+    BoxSpec,
+)
+
+
+class BoxRuntimeClient(abc.ABC):
+    """Abstract interface that BoxService uses to talk to a Box Runtime."""
+
+    @abc.abstractmethod
+    async def initialize(self) -> None: ...
+
+    @abc.abstractmethod
+    async def execute(self, spec: BoxSpec) -> BoxExecutionResult: ...
+
+    @abc.abstractmethod
+    async def shutdown(self) -> None: ...
+
+    @abc.abstractmethod
+    async def get_status(self) -> dict: ...
+
+    @abc.abstractmethod
+    async def get_sessions(self) -> list[dict]: ...
+
+    @abc.abstractmethod
+    async def get_backend_info(self) -> dict: ...
+
+    @abc.abstractmethod
+    async def delete_session(self, session_id: str) -> None: ...
+
+    @abc.abstractmethod
+    async def create_session(self, spec: BoxSpec) -> dict: ...
+
+    @abc.abstractmethod
+    async def start_managed_process(
+        self, session_id: str, spec: BoxManagedProcessSpec
+    ) -> BoxManagedProcessInfo: ...
+
+    @abc.abstractmethod
+    async def get_managed_process(
+        self, session_id: str, process_id: str = "default"
+    ) -> BoxManagedProcessInfo: ...
+
+    @abc.abstractmethod
+    async def stop_managed_process(
+        self, session_id: str, process_id: str = "default"
+    ) -> None: ...
+
+    @abc.abstractmethod
+    async def get_session(self, session_id: str) -> dict: ...
+
+    @abc.abstractmethod
+    async def init(self, config: dict) -> None: ...
+
+    async def list_skills(self) -> list[dict]:
+        raise NotImplementedError
+
+    async def get_skill(self, name: str) -> dict | None:
+        raise NotImplementedError
+
+    async def create_skill(self, skill: dict) -> dict:
+        raise NotImplementedError
+
+    async def update_skill(self, name: str, skill: dict) -> dict:
+        raise NotImplementedError
+
+    async def delete_skill(self, name: str) -> None:
+        raise NotImplementedError
+
+    async def scan_skill_directory(self, path: str) -> dict:
+        raise NotImplementedError
+
+    async def list_skill_files(
+        self,
+        name: str,
+        path: str = ".",
+        include_hidden: bool = False,
+        max_entries: int = 200,
+    ) -> dict:
+        raise NotImplementedError
+
+    async def read_skill_file(self, name: str, path: str) -> dict:
+        raise NotImplementedError
+
+    async def write_skill_file(self, name: str, path: str, content: str) -> dict:
+        raise NotImplementedError
+
+    async def preview_skill_zip(
+        self,
+        file_bytes: bytes,
+        filename: str,
+        source_subdir: str = "",
+        target_suffix: str = "upload",
+    ) -> list[dict]:
+        raise NotImplementedError
+
+    async def install_skill_zip(
+        self,
+        file_bytes: bytes,
+        filename: str,
+        source_paths: list[str] | None = None,
+        source_path: str = "",
+        source_subdir: str = "",
+        target_suffix: str = "upload",
+    ) -> list[dict]:
+        raise NotImplementedError
+
+
+def _translate_action_error(exc: Exception) -> BoxError:
+    """Convert an ActionCallError message back into the appropriate BoxError subclass."""
+    from .errors import (
+        BoxBackendUnavailableError,
+        BoxManagedProcessConflictError,
+        BoxManagedProcessNotFoundError,
+        BoxSessionConflictError,
+        BoxSessionNotFoundError,
+        BoxValidationError,
+    )
+
+    msg = str(exc)
+    _ERROR_PREFIX_MAP: list[tuple[str, type[BoxError]]] = [
+        ("BoxValidationError:", BoxValidationError),
+        ("BoxSessionNotFoundError:", BoxSessionNotFoundError),
+        ("BoxSessionConflictError:", BoxSessionConflictError),
+        ("BoxManagedProcessNotFoundError:", BoxManagedProcessNotFoundError),
+        ("BoxManagedProcessConflictError:", BoxManagedProcessConflictError),
+        ("BoxBackendUnavailableError:", BoxBackendUnavailableError),
+    ]
+    for prefix, cls in _ERROR_PREFIX_MAP:
+        if prefix in msg:
+            return cls(msg)
+    return BoxError(msg)
+
+
+class ActionRPCBoxClient(BoxRuntimeClient):
+    """Client that talks to BoxRuntime via the action RPC protocol."""
+
+    def __init__(self, logger: logging.Logger):
+        self._logger = logger
+        self._handler: Handler | None = None
+
+    @property
+    def handler(self) -> Handler:
+        if self._handler is None:
+            raise BoxRuntimeUnavailableError("box runtime not connected")
+        return self._handler
+
+    def set_handler(self, handler: Handler) -> None:
+        self._handler = handler
+
+    async def _call(
+        self, action: LangBotToBoxAction, data: dict[str, Any], timeout: float = 15.0
+    ) -> dict[str, Any]:
+        try:
+            return await self.handler.call_action(action, data, timeout=timeout)
+        except BoxRuntimeUnavailableError:
+            raise
+        except Exception as exc:
+            raise _translate_action_error(exc) from exc
+
+    async def initialize(self) -> None:
+        try:
+            await self._call(LangBotToBoxAction.HEALTH, {})
+            self._logger.info("LangBot Box runtime connected via action RPC.")
+        except Exception as exc:
+            raise BoxRuntimeUnavailableError(f"box runtime unavailable: {exc}") from exc
+
+    async def execute(self, spec: BoxSpec) -> BoxExecutionResult:
+        data = await self._call(
+            LangBotToBoxAction.EXEC, spec.model_dump(mode="json"), timeout=300.0
+        )
+        return BoxExecutionResult(
+            session_id=data["session_id"],
+            backend_name=data["backend_name"],
+            status=BoxExecutionStatus(data["status"]),
+            exit_code=data.get("exit_code"),
+            stdout=data.get("stdout", ""),
+            stderr=data.get("stderr", ""),
+            duration_ms=data["duration_ms"],
+        )
+
+    async def shutdown(self) -> None:
+        if self._handler is not None:
+            try:
+                await self._call(LangBotToBoxAction.SHUTDOWN, {})
+            except Exception:
+                pass
+            self._handler = None
+
+    async def get_status(self) -> dict:
+        return await self._call(LangBotToBoxAction.STATUS, {})
+
+    async def get_sessions(self) -> list[dict]:
+        data = await self._call(LangBotToBoxAction.GET_SESSIONS, {})
+        return data["sessions"]
+
+    async def get_session(self, session_id: str) -> dict:
+        return await self._call(
+            LangBotToBoxAction.GET_SESSION, {"session_id": session_id}
+        )
+
+    async def get_backend_info(self) -> dict:
+        return await self._call(LangBotToBoxAction.GET_BACKEND_INFO, {})
+
+    async def delete_session(self, session_id: str) -> None:
+        await self._call(
+            LangBotToBoxAction.DELETE_SESSION, {"session_id": session_id}, timeout=30.0
+        )
+
+    async def create_session(self, spec: BoxSpec) -> dict:
+        return await self._call(
+            LangBotToBoxAction.CREATE_SESSION, spec.model_dump(mode="json")
+        )
+
+    async def start_managed_process(
+        self, session_id: str, spec: BoxManagedProcessSpec
+    ) -> BoxManagedProcessInfo:
+        data = await self._call(
+            LangBotToBoxAction.START_MANAGED_PROCESS,
+            {"session_id": session_id, "spec": spec.model_dump(mode="json")},
+        )
+        return BoxManagedProcessInfo.model_validate(data)
+
+    async def get_managed_process(
+        self, session_id: str, process_id: str = "default"
+    ) -> BoxManagedProcessInfo:
+        data = await self._call(
+            LangBotToBoxAction.GET_MANAGED_PROCESS,
+            {
+                "session_id": session_id,
+                "process_id": process_id,
+            },
+        )
+        return BoxManagedProcessInfo.model_validate(data)
+
+    async def stop_managed_process(
+        self, session_id: str, process_id: str = "default"
+    ) -> None:
+        await self._call(
+            LangBotToBoxAction.STOP_MANAGED_PROCESS,
+            {
+                "session_id": session_id,
+                "process_id": process_id,
+            },
+            timeout=30.0,
+        )
+
+    def get_managed_process_websocket_url(
+        self, session_id: str, ws_relay_base_url: str, process_id: str = "default"
+    ) -> str:
+        base = ws_relay_base_url
+        if base.startswith("https://"):
+            scheme = "wss://"
+            suffix = base[len("https://") :]
+        elif base.startswith("http://"):
+            scheme = "ws://"
+            suffix = base[len("http://") :]
+        else:
+            scheme = "ws://"
+            suffix = base
+        return (
+            f"{scheme}{suffix}/v1/sessions/{session_id}/managed-process/{process_id}/ws"
+        )
+
+    async def init(self, config: dict) -> None:
+        await self._call(LangBotToBoxAction.INIT, config)
+
+    async def list_skills(self) -> list[dict]:
+        data = await self._call(LangBotToBoxAction.LIST_SKILLS, {})
+        return data["skills"]
+
+    async def get_skill(self, name: str) -> dict | None:
+        data = await self._call(LangBotToBoxAction.GET_SKILL, {"name": name})
+        return data.get("skill")
+
+    async def create_skill(self, skill: dict) -> dict:
+        data = await self._call(LangBotToBoxAction.CREATE_SKILL, {"skill": skill})
+        return data["skill"]
+
+    async def update_skill(self, name: str, skill: dict) -> dict:
+        data = await self._call(
+            LangBotToBoxAction.UPDATE_SKILL, {"name": name, "skill": skill}
+        )
+        return data["skill"]
+
+    async def delete_skill(self, name: str) -> None:
+        await self._call(LangBotToBoxAction.DELETE_SKILL, {"name": name})
+
+    async def scan_skill_directory(self, path: str) -> dict:
+        return await self._call(LangBotToBoxAction.SCAN_SKILL_DIRECTORY, {"path": path})
+
+    async def list_skill_files(
+        self,
+        name: str,
+        path: str = ".",
+        include_hidden: bool = False,
+        max_entries: int = 200,
+    ) -> dict:
+        return await self._call(
+            LangBotToBoxAction.LIST_SKILL_FILES,
+            {
+                "name": name,
+                "path": path,
+                "include_hidden": include_hidden,
+                "max_entries": max_entries,
+            },
+        )
+
+    async def read_skill_file(self, name: str, path: str) -> dict:
+        return await self._call(
+            LangBotToBoxAction.READ_SKILL_FILE, {"name": name, "path": path}
+        )
+
+    async def write_skill_file(self, name: str, path: str, content: str) -> dict:
+        return await self._call(
+            LangBotToBoxAction.WRITE_SKILL_FILE,
+            {"name": name, "path": path, "content": content},
+        )
+
+    async def preview_skill_zip(
+        self,
+        file_bytes: bytes,
+        filename: str,
+        source_subdir: str = "",
+        target_suffix: str = "upload",
+    ) -> list[dict]:
+        file_key = await self.handler.send_file(file_bytes, "zip")
+        data = await self._call(
+            LangBotToBoxAction.PREVIEW_SKILL_ZIP,
+            {
+                "file_key": file_key,
+                "filename": filename,
+                "source_subdir": source_subdir,
+                "target_suffix": target_suffix,
+            },
+            timeout=60.0,
+        )
+        return data["skills"]
+
+    async def install_skill_zip(
+        self,
+        file_bytes: bytes,
+        filename: str,
+        source_paths: list[str] | None = None,
+        source_path: str = "",
+        source_subdir: str = "",
+        target_suffix: str = "upload",
+    ) -> list[dict]:
+        file_key = await self.handler.send_file(file_bytes, "zip")
+        data = await self._call(
+            LangBotToBoxAction.INSTALL_SKILL_ZIP,
+            {
+                "file_key": file_key,
+                "filename": filename,
+                "source_paths": source_paths or [],
+                "source_path": source_path,
+                "source_subdir": source_subdir,
+                "target_suffix": target_suffix,
+            },
+            timeout=120.0,
+        )
+        return data["skills"]
diff --git a/src/langbot_plugin/box/e2b_backend.py b/src/langbot_plugin/box/e2b_backend.py
new file mode 100644
index 0000000..46ee031
--- /dev/null
+++ b/src/langbot_plugin/box/e2b_backend.py
@@ -0,0 +1,429 @@
+from __future__ import annotations
+
+import datetime as dt
+import json
+import logging
+import os
+import posixpath
+import shlex
+
+from .backend import BaseSandboxBackend, _MAX_RAW_OUTPUT_BYTES
+from .errors import BoxError
+from .models import (
+    BoxExecutionResult,
+    BoxExecutionStatus,
+    BoxHostMountMode,
+    BoxNetworkMode,
+    BoxSessionInfo,
+    BoxSpec,
+)
+from .security import validate_sandbox_security
+
+# E2B sandbox uses /home/user as the default writable directory
+# We map /workspace to /home/user/workspace for compatibility
+E2B_DEFAULT_WORKDIR = '/home/user'
+E2B_WORKSPACE_DIR = '/home/user/workspace'
+
+# Lazy imports for e2b - only imported when actually needed
+_e2b_available: bool | None = None
+_AsyncSandbox = None
+_CommandResult = None
+
+
+def _check_e2b_available(force: bool = False) -> bool:
+    """Check if e2b package is available (cached result).
+
+    Args:
+        force: If True, re-check even if cached result exists.
+    """
+    global _e2b_available, _AsyncSandbox, _CommandResult
+    if _e2b_available is not None and not force:
+        return _e2b_available
+
+    try:
+        from e2b import AsyncSandbox, CommandResult
+
+        _AsyncSandbox = AsyncSandbox
+        _CommandResult = CommandResult
+        _e2b_available = True
+    except ImportError:
+        _e2b_available = False
+
+    return _e2b_available
+
+
+def _reset_e2b_cache() -> None:
+    """Reset the e2b availability cache, forcing re-check on next call."""
+    global _e2b_available, _AsyncSandbox, _CommandResult
+    _e2b_available = None
+    _AsyncSandbox = None
+    _CommandResult = None
+
+
+def _adapt_path_for_e2b(path: str) -> str:
+    """Adapt paths for E2B sandbox environment.
+
+    E2B sandbox doesn't have /workspace by default, so we map it to
+    /home/user/workspace which is writable.
+    """
+    if path == '/workspace' or path.startswith('/workspace/'):
+        return path.replace('/workspace', E2B_WORKSPACE_DIR, 1)
+    return path
+
+
+def _rewrite_command_paths_for_e2b(command: str) -> str:
+    """Rewrite LangBot's logical /workspace paths for E2B's real writable path."""
+    return command.replace('/workspace', E2B_WORKSPACE_DIR)
+
+
+class E2BSandboxBackend(BaseSandboxBackend):
+    """E2B/CubeSandbox sandbox backend.
+
+    Supports both E2B cloud service and self-hosted CubeSandbox.
+    Configuration sources (priority from high to low):
+    1. Environment variables: E2B_API_KEY, E2B_API_URL
+    2. Configuration passed via configure() method (from LangBot config.yaml)
+    """
+
+    name = 'e2b'
+
+    def __init__(self, logger: logging.Logger):
+        super().__init__(logger)
+        self._api_key: str | None = None
+        self._api_url: str | None = None
+        self._template: str | None = None
+        self._config_from_langbot: dict = {}
+
+    def configure(self, config: dict) -> None:
+        """Apply configuration from LangBot config.yaml.
+
+        Environment variables take precedence over config.yaml values.
+        """
+        self._config_from_langbot = config
+        # Reset cache to force re-check if e2b package was installed later
+        _reset_e2b_cache()
+
+    async def initialize(self):
+        """Load configuration from environment variables (priority) or config.yaml."""
+        # Environment variables take precedence
+        self._api_key = os.getenv('E2B_API_KEY') or self._config_from_langbot.get('api_key')
+        self._api_url = os.getenv('E2B_API_URL') or self._config_from_langbot.get('api_url')
+        self._template = self._config_from_langbot.get('template')
+
+    async def is_available(self) -> bool:
+        """Check if E2B backend is available.
+
+        Returns True if:
+        1. e2b package is installed
+        2. E2B_API_KEY environment variable is set
+        """
+        if not _check_e2b_available():
+            self.logger.info('e2b package not installed')
+            return False
+
+        if not self._api_key:
+            self.logger.info('E2B_API_KEY not set')
+            return False
+
+        return True
+
+    async def start_session(self, spec: BoxSpec) -> BoxSessionInfo:
+        """Create a new E2B sandbox session.
+
+        Maps BoxSpec fields to AsyncSandbox.create() parameters:
+        - template: spec.image (E2B template ID)
+        - envs: spec.env
+        - timeout: sandbox lifetime timeout (not command timeout)
+        - metadata: CubeSandbox host-mount configuration
+        """
+        validate_sandbox_security(spec)
+
+        if not _check_e2b_available():
+            raise BoxError('e2b package not installed')
+
+        now = dt.datetime.now(dt.timezone.utc)
+
+        # Adapt paths for E2B environment
+        workdir = _adapt_path_for_e2b(spec.workdir)
+        mount_path = _adapt_path_for_e2b(spec.mount_path)
+
+        # Build create parameters
+        create_kwargs = {}
+
+        # Template - use spec.image if provided, otherwise configured template, otherwise E2B default
+        if spec.image and spec.image != 'rockchin/langbot-sandbox:latest':
+            create_kwargs['template'] = spec.image
+        elif self._template:
+            create_kwargs['template'] = self._template
+
+        # Environment variables
+        if spec.env:
+            create_kwargs['envs'] = spec.env
+
+        # API key and domain (for CubeSandbox self-deployment)
+        if self._api_key:
+            create_kwargs['api_key'] = self._api_key
+        if self._api_url:
+            # E2B SDK uses 'domain' for self-hosted API URL
+            create_kwargs['domain'] = self._api_url
+
+        # Build metadata for CubeSandbox host-mount
+        metadata = {}
+        if spec.host_path and spec.host_path_mode != BoxHostMountMode.NONE:
+            metadata['host-mount'] = json.dumps([{
+                'hostPath': spec.host_path,
+                'mountPath': mount_path,
+                'readOnly': spec.host_path_mode == BoxHostMountMode.READ_ONLY,
+            }])
+        if metadata:
+            create_kwargs['metadata'] = metadata
+
+        # Network mode - E2B uses allow_internet_access parameter
+        # Note: E2B SDK doesn't have this directly in create(), but CubeSandbox may support it
+        # For now, we rely on template configuration for network access
+
+        self.logger.info(
+            f'LangBot Box backend start_session: backend=e2b '
+            f'session_id={spec.session_id} '
+            f'template={create_kwargs.get("template", "default")} '
+            f'network={spec.network.value} '
+            f'host_path={spec.host_path} host_path_mode={spec.host_path_mode.value} mount_path={mount_path} '
+            f'env_keys={sorted(spec.env.keys())}'
+        )
+
+        try:
+            sandbox = await _AsyncSandbox.create(**create_kwargs)
+        except Exception as exc:
+            raise BoxError(f'Failed to create E2B sandbox: {exc}')
+
+        return BoxSessionInfo(
+            session_id=spec.session_id,
+            backend_name=self.name,
+            backend_session_id=sandbox.sandbox_id,
+            image=spec.image,
+            network=spec.network,
+            host_path=spec.host_path,
+            host_path_mode=spec.host_path_mode,
+            # Keep the logical mount path in session metadata. The runtime
+            # compares future BoxSpec objects against this value when reusing
+            # sessions; storing the E2B-internal path here makes every later
+            # /workspace request look incompatible.
+            mount_path=spec.mount_path,
+            persistent=spec.persistent,
+            cpus=spec.cpus,
+            memory_mb=spec.memory_mb,
+            pids_limit=spec.pids_limit,
+            read_only_rootfs=spec.read_only_rootfs,
+            workspace_quota_mb=spec.workspace_quota_mb,
+            created_at=now,
+            last_used_at=now,
+        )
+
+    async def exec(self, session: BoxSessionInfo, spec: BoxSpec) -> BoxExecutionResult:
+        """Execute a command in the E2B sandbox.
+
+        Reconnects to existing sandbox via AsyncSandbox.connect() and runs command.
+        """
+        if not _check_e2b_available():
+            raise BoxError('e2b package not installed')
+
+        start = dt.datetime.now(dt.timezone.utc)
+
+        # Connect kwargs
+        connect_kwargs = {}
+        if self._api_key:
+            connect_kwargs['api_key'] = self._api_key
+        if self._api_url:
+            connect_kwargs['domain'] = self._api_url
+
+        # Adapt workdir and logical /workspace command paths for E2B.
+        workdir = _adapt_path_for_e2b(spec.workdir)
+        command = _rewrite_command_paths_for_e2b(spec.cmd)
+
+        cmd_preview = spec.cmd.strip()
+        if len(cmd_preview) > 400:
+            cmd_preview = f'{cmd_preview[:397]}...'
+        self.logger.info(
+            f'LangBot Box backend exec: backend=e2b '
+            f'session_id={session.session_id} sandbox_id={session.backend_session_id} '
+            f'workdir={workdir} timeout_sec={spec.timeout_sec} '
+            f'env_keys={sorted(spec.env.keys())} cmd={cmd_preview}'
+        )
+
+        try:
+            sandbox = await _AsyncSandbox.connect(
+                sandbox_id=session.backend_session_id,
+                **connect_kwargs
+            )
+        except Exception as exc:
+            raise BoxError(f'Failed to connect to E2B sandbox: {exc}')
+
+        await self._sync_mounts_to_e2b(sandbox, spec)
+
+        # Run the command
+        # Note: E2B requires cwd to exist before running command. We create it
+        # as part of the command and then run from that directory.
+        run_kwargs = {
+            'cmd': f'mkdir -p {shlex.quote(workdir)} && cd {shlex.quote(workdir)} && {command}',
+            'timeout': spec.timeout_sec,
+        }
+        if spec.env:
+            run_kwargs['envs'] = spec.env
+
+        try:
+            result = await sandbox.commands.run(**run_kwargs)
+        except Exception as exc:
+            # Check if it's a timeout
+            duration_ms = int((dt.datetime.now(dt.timezone.utc) - start).total_seconds() * 1000)
+            error_msg = str(exc)
+            if 'timeout' in error_msg.lower() or 'timed out' in error_msg.lower():
+                return BoxExecutionResult(
+                    session_id=session.session_id,
+                    backend_name=self.name,
+                    status=BoxExecutionStatus.TIMED_OUT,
+                    exit_code=None,
+                    stdout='',
+                    stderr=f'Command timed out after {spec.timeout_sec} seconds.',
+                    duration_ms=duration_ms,
+                )
+            raise BoxError(f'E2B command execution failed: {exc}')
+
+        await self._sync_mounts_from_e2b(sandbox, spec)
+
+        duration_ms = int((dt.datetime.now(dt.timezone.utc) - start).total_seconds() * 1000)
+
+        # Process output - apply truncation if needed
+        stdout = self._truncate_output(result.stdout or '')
+        stderr = self._truncate_output(result.stderr or '')
+
+        return BoxExecutionResult(
+            session_id=session.session_id,
+            backend_name=self.name,
+            status=BoxExecutionStatus.COMPLETED,
+            exit_code=result.exit_code,
+            stdout=stdout,
+            stderr=stderr,
+            duration_ms=duration_ms,
+        )
+
+    async def _sync_mounts_to_e2b(self, sandbox, spec: BoxSpec) -> None:
+        """Best-effort upload of all logical mounts into public E2B."""
+        if spec.host_path is not None and spec.host_path_mode != BoxHostMountMode.NONE:
+            await self._sync_host_tree_to_e2b(
+                sandbox,
+                host_root=spec.host_path,
+                remote_root=_adapt_path_for_e2b(spec.mount_path),
+            )
+
+        for mount in spec.extra_mounts:
+            if mount.mode == BoxHostMountMode.NONE:
+                continue
+            await self._sync_host_tree_to_e2b(
+                sandbox,
+                host_root=mount.host_path,
+                remote_root=_adapt_path_for_e2b(mount.mount_path),
+            )
+
+    async def _sync_mounts_from_e2b(self, sandbox, spec: BoxSpec) -> None:
+        """Best-effort download of writable E2B mounts into host paths."""
+        if spec.host_path is not None and spec.host_path_mode == BoxHostMountMode.READ_WRITE:
+            await self._sync_e2b_tree_to_host(
+                sandbox,
+                remote_root=_adapt_path_for_e2b(spec.mount_path),
+                host_root=spec.host_path,
+            )
+
+        for mount in spec.extra_mounts:
+            if mount.mode != BoxHostMountMode.READ_WRITE:
+                continue
+            await self._sync_e2b_tree_to_host(
+                sandbox,
+                remote_root=_adapt_path_for_e2b(mount.mount_path),
+                host_root=mount.host_path,
+            )
+
+    async def _sync_host_tree_to_e2b(self, sandbox, *, host_root: str, remote_root: str) -> None:
+        """Best-effort sync for public E2B, which has no local bind mounts."""
+        if not os.path.isdir(host_root):
+            return
+
+        for root, dirs, files in os.walk(host_root):
+            dirs[:] = [d for d in dirs if d not in {'.git', '__pycache__', '.venv', 'node_modules'}]
+            rel_dir = os.path.relpath(root, host_root)
+            remote_dir = remote_root if rel_dir == '.' else posixpath.join(remote_root, rel_dir.replace(os.sep, '/'))
+            try:
+                await sandbox.commands.run(f'mkdir -p {shlex.quote(remote_dir)}', timeout=10)
+            except Exception as exc:
+                self.logger.debug(f'Failed to create E2B sync dir {remote_dir}: {exc}')
+                continue
+
+            for filename in files:
+                host_file = os.path.join(root, filename)
+                try:
+                    if os.path.getsize(host_file) > _MAX_RAW_OUTPUT_BYTES:
+                        continue
+                    with open(host_file, 'rb') as f:
+                        data = f.read()
+                    remote_file = posixpath.join(remote_dir, filename)
+                    await sandbox.files.write(remote_file, data)
+                except Exception as exc:
+                    self.logger.debug(f'Failed to sync host file to E2B {host_file}: {exc}')
+
+    async def _sync_e2b_tree_to_host(self, sandbox, *, remote_root: str, host_root: str) -> None:
+        """Best-effort download of an E2B mount into the matching host path."""
+        os.makedirs(host_root, exist_ok=True)
+        try:
+            entries = await sandbox.files.list(remote_root, depth=16)
+        except Exception as exc:
+            self.logger.debug(f'Failed to list E2B mount for sync {remote_root}: {exc}')
+            return
+
+        for entry in entries:
+            remote_path = str(getattr(entry, 'path', '') or '')
+            if not remote_path or remote_path == remote_root or not remote_path.startswith(remote_root + '/'):
+                continue
+            rel_path = remote_path[len(remote_root) :].lstrip('/')
+            real_host_root = os.path.realpath(host_root)
+            host_path = os.path.realpath(os.path.join(real_host_root, *rel_path.split('/')))
+            if not (host_path == real_host_root or host_path.startswith(real_host_root + os.sep)):
+                continue
+
+            entry_type = getattr(getattr(entry, 'type', None), 'value', '')
+            try:
+                if entry_type == 'dir':
+                    os.makedirs(host_path, exist_ok=True)
+                elif entry_type == 'file':
+                    os.makedirs(os.path.dirname(host_path), exist_ok=True)
+                    data = await sandbox.files.read(remote_path, format='bytes')
+                    with open(host_path, 'wb') as f:
+                        f.write(bytes(data))
+            except Exception as exc:
+                self.logger.debug(f'Failed to sync E2B file to host {remote_path}: {exc}')
+
+    async def stop_session(self, session: BoxSessionInfo):
+        """Kill the E2B sandbox."""
+        self.logger.info(
+            f'LangBot Box backend stop_session: backend=e2b '
+            f'session_id={session.session_id} sandbox_id={session.backend_session_id}'
+        )
+
+        if not _check_e2b_available():
+            return  # Nothing to do if package not available
+
+        try:
+            await _AsyncSandbox.kill(
+                sandbox_id=session.backend_session_id,
+                api_key=self._api_key,
+                domain=self._api_url,
+            )
+        except Exception as exc:
+            self.logger.warning(f'Failed to kill E2B sandbox: {exc}')
+
+    def _truncate_output(self, output: str, limit: int = _MAX_RAW_OUTPUT_BYTES) -> str:
+        """Truncate output if exceeds the limit."""
+        if len(output.encode('utf-8', errors='replace')) > limit:
+            # Truncate to approximately the limit
+            truncated = output[:limit]
+            truncated += f'\n... [output clipped at {limit} bytes]'
+            return truncated
+        return output
diff --git a/src/langbot_plugin/box/errors.py b/src/langbot_plugin/box/errors.py
new file mode 100644
index 0000000..ecdde7a
--- /dev/null
+++ b/src/langbot_plugin/box/errors.py
@@ -0,0 +1,33 @@
+from __future__ import annotations
+
+
+class BoxError(RuntimeError):
+    """Base error for LangBot Box failures."""
+
+
+class BoxValidationError(BoxError):
+    """Raised when exec tool arguments are invalid."""
+
+
+class BoxBackendUnavailableError(BoxError):
+    """Raised when no supported container backend is available."""
+
+
+class BoxRuntimeUnavailableError(BoxError):
+    """Raised when the standalone Box Runtime service is unavailable."""
+
+
+class BoxSessionConflictError(BoxError):
+    """Raised when an existing session cannot satisfy a new request."""
+
+
+class BoxSessionNotFoundError(BoxError):
+    """Raised when a referenced session does not exist."""
+
+
+class BoxManagedProcessConflictError(BoxError):
+    """Raised when a session already has an active managed process."""
+
+
+class BoxManagedProcessNotFoundError(BoxError):
+    """Raised when a referenced managed process does not exist."""
diff --git a/src/langbot_plugin/box/models.py b/src/langbot_plugin/box/models.py
new file mode 100644
index 0000000..fa34e36
--- /dev/null
+++ b/src/langbot_plugin/box/models.py
@@ -0,0 +1,331 @@
+from __future__ import annotations
+
+import datetime as dt
+import enum
+import ntpath
+import posixpath
+
+import pydantic
+
+
+DEFAULT_BOX_IMAGE = 'rockchin/langbot-sandbox:latest'
+DEFAULT_BOX_MOUNT_PATH = '/workspace'
+
+
+class BoxNetworkMode(str, enum.Enum):
+    OFF = 'off'
+    ON = 'on'
+
+
+class BoxExecutionStatus(str, enum.Enum):
+    COMPLETED = 'completed'
+    TIMED_OUT = 'timed_out'
+
+
+class BoxHostMountMode(str, enum.Enum):
+    NONE = 'none'
+    READ_ONLY = 'ro'
+    READ_WRITE = 'rw'
+
+
+class BoxManagedProcessStatus(str, enum.Enum):
+    RUNNING = 'running'
+    EXITED = 'exited'
+
+
+class BoxMountSpec(pydantic.BaseModel):
+    """A single additional bind mount specification."""
+
+    host_path: str
+    mount_path: str
+    mode: BoxHostMountMode = BoxHostMountMode.READ_WRITE
+
+    @pydantic.field_validator('host_path')
+    @classmethod
+    def validate_host_path(cls, value: str) -> str:
+        value = value.strip()
+        if not (posixpath.isabs(value) or ntpath.isabs(value)):
+            raise ValueError('host_path must be an absolute host path')
+        return value
+
+    @pydantic.field_validator('mount_path')
+    @classmethod
+    def validate_mount_path(cls, value: str) -> str:
+        value = value.strip()
+        if not value.startswith('/'):
+            raise ValueError('mount_path must be an absolute path inside the sandbox')
+        return value
+
+
+class BoxSpec(pydantic.BaseModel):
+    cmd: str = ''
+    workdir: str = DEFAULT_BOX_MOUNT_PATH
+    timeout_sec: int = 30
+    network: BoxNetworkMode = BoxNetworkMode.OFF
+    session_id: str
+    env: dict[str, str] = pydantic.Field(default_factory=dict)
+    image: str = DEFAULT_BOX_IMAGE
+    host_path: str | None = None
+    host_path_mode: BoxHostMountMode = BoxHostMountMode.READ_WRITE
+    mount_path: str = DEFAULT_BOX_MOUNT_PATH
+    extra_mounts: list[BoxMountSpec] = pydantic.Field(default_factory=list)
+    persistent: bool = False
+    # Resource limits
+    cpus: float = 1.0
+    memory_mb: int = 512
+    pids_limit: int = 128
+    read_only_rootfs: bool = True
+    workspace_quota_mb: int = 0
+
+    @pydantic.model_validator(mode='before')
+    @classmethod
+    def populate_workdir_from_mount_path(cls, data):
+        if not isinstance(data, dict):
+            return data
+        if data.get('workdir') not in (None, ''):
+            return data
+        mount_path = data.get('mount_path')
+        if isinstance(mount_path, str) and mount_path.strip():
+            data = dict(data)
+            data['workdir'] = mount_path
+        return data
+
+    @pydantic.field_validator('cmd')
+    @classmethod
+    def validate_cmd(cls, value: str) -> str:
+        return value.strip()
+
+    @pydantic.field_validator('workdir')
+    @classmethod
+    def validate_workdir(cls, value: str) -> str:
+        value = value.strip()
+        if not value.startswith('/'):
+            raise ValueError('workdir must be an absolute path inside the sandbox')
+        return value
+
+    @pydantic.field_validator('timeout_sec')
+    @classmethod
+    def validate_timeout_sec(cls, value: int) -> int:
+        if value <= 0:
+            raise ValueError('timeout_sec must be greater than 0')
+        return value
+
+    @pydantic.field_validator('cpus')
+    @classmethod
+    def validate_cpus(cls, value: float) -> float:
+        if value <= 0:
+            raise ValueError('cpus must be greater than 0')
+        return value
+
+    @pydantic.field_validator('memory_mb')
+    @classmethod
+    def validate_memory_mb(cls, value: int) -> int:
+        if value < 32:
+            raise ValueError('memory_mb must be at least 32')
+        return value
+
+    @pydantic.field_validator('pids_limit')
+    @classmethod
+    def validate_pids_limit(cls, value: int) -> int:
+        if value < 1:
+            raise ValueError('pids_limit must be at least 1')
+        return value
+
+    @pydantic.field_validator('workspace_quota_mb')
+    @classmethod
+    def validate_workspace_quota_mb(cls, value: int) -> int:
+        if value < 0:
+            raise ValueError('workspace_quota_mb must be greater than or equal to 0')
+        return value
+
+    @pydantic.field_validator('session_id')
+    @classmethod
+    def validate_session_id(cls, value: str) -> str:
+        value = value.strip()
+        if not value:
+            raise ValueError('session_id must not be empty')
+        return value
+
+    @pydantic.field_validator('env')
+    @classmethod
+    def validate_env(cls, value: dict[str, str]) -> dict[str, str]:
+        return {str(k): str(v) for k, v in value.items()}
+
+    @pydantic.field_validator('host_path')
+    @classmethod
+    def validate_host_path(cls, value: str | None) -> str | None:
+        if value is None:
+            return None
+        value = value.strip()
+        if not (posixpath.isabs(value) or ntpath.isabs(value)):
+            raise ValueError('host_path must be an absolute host path')
+        return value
+
+    @pydantic.field_validator('mount_path')
+    @classmethod
+    def validate_mount_path(cls, value: str) -> str:
+        value = value.strip()
+        if not value.startswith('/'):
+            raise ValueError('mount_path must be an absolute path inside the sandbox')
+        return value
+
+    @pydantic.model_validator(mode='after')
+    def validate_host_mount_consistency(self) -> 'BoxSpec':
+        if self.host_path is None:
+            return self
+        if self.host_path_mode == BoxHostMountMode.NONE:
+            return self
+        if self.workdir != self.mount_path and not self.workdir.startswith(f'{self.mount_path}/'):
+            raise ValueError('workdir must stay under mount_path when host_path is provided')
+        return self
+
+
+class BoxProfile(pydantic.BaseModel):
+    """Preset sandbox configuration.
+
+    Provides default values for BoxSpec fields and optionally locks fields
+    so that tool-call parameters cannot override them.
+    """
+
+    name: str
+    image: str = DEFAULT_BOX_IMAGE
+    network: BoxNetworkMode = BoxNetworkMode.OFF
+    timeout_sec: int = 30
+    host_path_mode: BoxHostMountMode = BoxHostMountMode.READ_WRITE
+    max_timeout_sec: int = 120
+    # Resource limits
+    cpus: float = 1.0
+    memory_mb: int = 512
+    pids_limit: int = 128
+    read_only_rootfs: bool = True
+    workspace_quota_mb: int = 0
+    locked: frozenset[str] = frozenset()
+
+    model_config = pydantic.ConfigDict(frozen=True)
+
+
+BUILTIN_PROFILES: dict[str, BoxProfile] = {
+    'default': BoxProfile(
+        name='default',
+        network=BoxNetworkMode.OFF,
+        host_path_mode=BoxHostMountMode.READ_WRITE,
+        cpus=1.0,
+        memory_mb=512,
+        pids_limit=128,
+        read_only_rootfs=True,
+        max_timeout_sec=120,
+    ),
+    'offline_readonly': BoxProfile(
+        name='offline_readonly',
+        network=BoxNetworkMode.OFF,
+        host_path_mode=BoxHostMountMode.READ_ONLY,
+        cpus=0.5,
+        memory_mb=256,
+        pids_limit=64,
+        read_only_rootfs=True,
+        max_timeout_sec=60,
+        locked=frozenset({'network', 'host_path_mode', 'read_only_rootfs'}),
+    ),
+    'network_basic': BoxProfile(
+        name='network_basic',
+        network=BoxNetworkMode.ON,
+        host_path_mode=BoxHostMountMode.READ_WRITE,
+        cpus=1.0,
+        memory_mb=512,
+        pids_limit=128,
+        read_only_rootfs=True,
+        max_timeout_sec=120,
+    ),
+    'network_extended': BoxProfile(
+        name='network_extended',
+        network=BoxNetworkMode.ON,
+        host_path_mode=BoxHostMountMode.READ_WRITE,
+        cpus=2.0,
+        memory_mb=1024,
+        pids_limit=256,
+        read_only_rootfs=False,
+        max_timeout_sec=300,
+    ),
+}
+
+
+class BoxSessionInfo(pydantic.BaseModel):
+    session_id: str
+    backend_name: str
+    backend_session_id: str
+    image: str
+    network: BoxNetworkMode
+    host_path: str | None = None
+    host_path_mode: BoxHostMountMode = BoxHostMountMode.READ_WRITE
+    mount_path: str = DEFAULT_BOX_MOUNT_PATH
+    persistent: bool = False
+    cpus: float = 1.0
+    memory_mb: int = 512
+    pids_limit: int = 128
+    read_only_rootfs: bool = True
+    workspace_quota_mb: int = 0
+    created_at: dt.datetime
+    last_used_at: dt.datetime
+
+
+class BoxManagedProcessSpec(pydantic.BaseModel):
+    process_id: str = 'default'
+    command: str
+    args: list[str] = pydantic.Field(default_factory=list)
+    env: dict[str, str] = pydantic.Field(default_factory=dict)
+    cwd: str = DEFAULT_BOX_MOUNT_PATH
+
+    @pydantic.field_validator('command')
+    @classmethod
+    def validate_command(cls, value: str) -> str:
+        value = value.strip()
+        if not value:
+            raise ValueError('command must not be empty')
+        return value
+
+    @pydantic.field_validator('args')
+    @classmethod
+    def validate_args(cls, value: list[str]) -> list[str]:
+        return [str(item) for item in value]
+
+    @pydantic.field_validator('env')
+    @classmethod
+    def validate_env(cls, value: dict[str, str]) -> dict[str, str]:
+        return {str(k): str(v) for k, v in value.items()}
+
+    @pydantic.field_validator('cwd')
+    @classmethod
+    def validate_cwd(cls, value: str) -> str:
+        value = value.strip()
+        if not value.startswith('/'):
+            raise ValueError('cwd must be an absolute path inside the sandbox')
+        return value
+
+
+class BoxManagedProcessInfo(pydantic.BaseModel):
+    session_id: str
+    process_id: str = 'default'
+    status: BoxManagedProcessStatus
+    command: str
+    args: list[str]
+    cwd: str
+    env_keys: list[str]
+    attached: bool = False
+    started_at: dt.datetime
+    exited_at: dt.datetime | None = None
+    exit_code: int | None = None
+    stderr_preview: str = ''
+
+
+class BoxExecutionResult(pydantic.BaseModel):
+    session_id: str
+    backend_name: str
+    status: BoxExecutionStatus
+    exit_code: int | None
+    stdout: str = ''
+    stderr: str = ''
+    duration_ms: int
+
+    @property
+    def ok(self) -> bool:
+        return self.status == BoxExecutionStatus.COMPLETED and self.exit_code == 0
diff --git a/src/langbot_plugin/box/nsjail_backend.py b/src/langbot_plugin/box/nsjail_backend.py
new file mode 100644
index 0000000..e8eced6
--- /dev/null
+++ b/src/langbot_plugin/box/nsjail_backend.py
@@ -0,0 +1,552 @@
+from __future__ import annotations
+
+import asyncio
+import datetime as dt
+import json
+import logging
+import os
+import pathlib
+import shlex
+import shutil
+import signal
+import uuid
+
+from .backend import BaseSandboxBackend, _CommandResult, _MAX_RAW_OUTPUT_BYTES
+from .errors import BoxError
+from .models import (
+    BoxExecutionResult,
+    BoxExecutionStatus,
+    BoxHostMountMode,
+    BoxNetworkMode,
+    BoxSessionInfo,
+    BoxSpec,
+)
+from .security import validate_sandbox_security
+
+# System directories to mount read-only inside the sandbox.
+# Only well-known paths needed for running Python/Node/shell commands.
+_READONLY_SYSTEM_MOUNTS: list[str] = [
+    '/usr',
+    '/lib',
+    '/lib64',
+    '/bin',
+    '/sbin',
+]
+
+# Specific /etc entries required for dynamic linking and TLS.
+_READONLY_ETC_ENTRIES: list[str] = [
+    '/etc/alternatives',
+    '/etc/ld.so.cache',
+    '/etc/ld.so.conf',
+    '/etc/ld.so.conf.d',
+    '/etc/ssl/certs',
+    '/etc/localtime',
+    '/etc/resolv.conf',  # needed when network=ON
+]
+
+_DEFAULT_BASE_DIR = '/tmp/langbot-box-nsjail'
+
+
+class NsjailBackend(BaseSandboxBackend):
+    """Lightweight sandbox backend using nsjail.
+
+    Each ``exec`` invocation spawns an independent nsjail process.  Session
+    state (workspace files) persists via a shared host directory that is
+    bind-mounted into every invocation.
+    """
+
+    name = 'nsjail'
+
+    def __init__(
+        self,
+        logger: logging.Logger,
+        nsjail_bin: str = 'nsjail',
+        base_dir: str = _DEFAULT_BASE_DIR,
+    ):
+        super().__init__(logger)
+        self._nsjail_bin = nsjail_bin
+        self._base_dir = pathlib.Path(base_dir)
+        self._cgroup_v2_available: bool = False
+
+    # ── lifecycle ─────────────────────────────────────────────────────
+
+    async def is_available(self) -> bool:
+        if shutil.which(self._nsjail_bin) is None:
+            self.logger.info('nsjail binary not found in PATH')
+            return False
+
+        # Quick sanity check – nsjail --help exits 0.
+        try:
+            proc = await asyncio.create_subprocess_exec(
+                self._nsjail_bin, '--help',
+                stdout=asyncio.subprocess.DEVNULL,
+                stderr=asyncio.subprocess.DEVNULL,
+            )
+            await asyncio.wait_for(proc.wait(), timeout=5)
+            if proc.returncode != 0:
+                self.logger.info('nsjail --help returned non-zero')
+                return False
+        except Exception as exc:
+            self.logger.info(f'nsjail probe failed: {exc}')
+            return False
+
+        self._cgroup_v2_available = self._detect_cgroup_v2()
+        if not self._cgroup_v2_available:
+            self.logger.warning(
+                'cgroup v2 not available for nsjail; '
+                'falling back to rlimit-based resource limits'
+            )
+
+        self._base_dir.mkdir(parents=True, exist_ok=True)
+        return True
+
+    async def start_session(self, spec: BoxSpec) -> BoxSessionInfo:
+        validate_sandbox_security(spec)
+
+        now = dt.datetime.now(dt.timezone.utc)
+        session_dir_name = f'{self.instance_id}_{spec.session_id}_{uuid.uuid4().hex[:8]}'
+        session_dir = self._base_dir / session_dir_name
+
+        # Per-session writable directories.
+        root_dir = session_dir / 'root'
+        workspace_dir = session_dir / 'workspace'
+        tmp_dir = session_dir / 'tmp'
+        home_dir = session_dir / 'home'
+
+        for d in (root_dir, workspace_dir, tmp_dir, home_dir):
+            d.mkdir(parents=True, exist_ok=True)
+
+        # If host_path is specified, we will use it directly instead of the
+        # per-session workspace when building nsjail args (see _build_mounts).
+        meta = {
+            'session_id': spec.session_id,
+            'instance_id': self.instance_id,
+            'host_path': spec.host_path,
+            'host_path_mode': spec.host_path_mode.value if spec.host_path else None,
+            'mount_path': spec.mount_path,
+            'network': spec.network.value,
+            'cpus': spec.cpus,
+            'memory_mb': spec.memory_mb,
+            'pids_limit': spec.pids_limit,
+            'created_at': now.isoformat(),
+        }
+        (session_dir / 'meta.json').write_text(json.dumps(meta, indent=2))
+
+        self.logger.info(
+            f'LangBot Box backend start_session: backend=nsjail '
+            f'session_id={spec.session_id} session_dir={session_dir} '
+            f'network={spec.network.value} '
+            f'host_path={spec.host_path} host_path_mode={spec.host_path_mode.value} mount_path={spec.mount_path} '
+            f'cpus={spec.cpus} memory_mb={spec.memory_mb} pids_limit={spec.pids_limit} '
+            f'workspace_quota_mb={spec.workspace_quota_mb}'
+        )
+
+        return BoxSessionInfo(
+            session_id=spec.session_id,
+            backend_name=self.name,
+            backend_session_id=str(session_dir),
+            # Keep the requested logical image in metadata so runtime session
+            # reuse sees later specs as compatible. nsjail still executes
+            # against host-mounted system paths rather than a container image.
+            image=spec.image,
+            network=spec.network,
+            host_path=spec.host_path,
+            host_path_mode=spec.host_path_mode,
+            mount_path=spec.mount_path,
+            cpus=spec.cpus,
+            memory_mb=spec.memory_mb,
+            pids_limit=spec.pids_limit,
+            read_only_rootfs=spec.read_only_rootfs,
+            workspace_quota_mb=spec.workspace_quota_mb,
+            persistent=spec.persistent,
+            created_at=now,
+            last_used_at=now,
+        )
+
+    async def exec(self, session: BoxSessionInfo, spec: BoxSpec) -> BoxExecutionResult:
+        start = dt.datetime.now(dt.timezone.utc)
+        session_dir = pathlib.Path(session.backend_session_id)
+
+        args = self._build_nsjail_args(session, spec, session_dir)
+
+        cmd_preview = spec.cmd.strip()
+        if len(cmd_preview) > 400:
+            cmd_preview = f'{cmd_preview[:397]}...'
+        self.logger.info(
+            f'LangBot Box backend exec: backend=nsjail '
+            f'session_id={session.session_id} session_dir={session_dir} '
+            f'workdir={spec.workdir} timeout_sec={spec.timeout_sec} '
+            f'env_keys={sorted(spec.env.keys())} cmd={cmd_preview}'
+        )
+
+        result = await self._run_nsjail(args, timeout_sec=spec.timeout_sec)
+        duration_ms = int((dt.datetime.now(dt.timezone.utc) - start).total_seconds() * 1000)
+
+        if result.timed_out:
+            return BoxExecutionResult(
+                session_id=session.session_id,
+                backend_name=self.name,
+                status=BoxExecutionStatus.TIMED_OUT,
+                exit_code=None,
+                stdout=result.stdout,
+                stderr=result.stderr or f'Command timed out after {spec.timeout_sec} seconds.',
+                duration_ms=duration_ms,
+            )
+
+        return BoxExecutionResult(
+            session_id=session.session_id,
+            backend_name=self.name,
+            status=BoxExecutionStatus.COMPLETED,
+            exit_code=result.return_code,
+            stdout=result.stdout,
+            stderr=result.stderr,
+            duration_ms=duration_ms,
+        )
+
+    async def stop_session(self, session: BoxSessionInfo):
+        session_dir = pathlib.Path(session.backend_session_id)
+        self.logger.info(
+            f'LangBot Box backend stop_session: backend=nsjail '
+            f'session_id={session.session_id} session_dir={session_dir}'
+        )
+
+        # Kill any lingering nsjail processes whose cwd is inside session_dir.
+        await self._kill_session_processes(session_dir)
+
+        try:
+            if session_dir.exists():
+                shutil.rmtree(session_dir)
+        except Exception as exc:
+            self.logger.warning(f'Failed to remove nsjail session dir {session_dir}: {exc}')
+
+    async def start_managed_process(
+        self, session: BoxSessionInfo, spec
+    ) -> asyncio.subprocess.Process:
+        session_dir = pathlib.Path(session.backend_session_id)
+
+        # Build a BoxSpec-like object so we can reuse _build_nsjail_args.
+        # ManagedProcessSpec has command/args/cwd/env but not the full BoxSpec.
+        inner_cmd = ' '.join([shlex.quote(spec.command), *[shlex.quote(a) for a in spec.args]])
+        pseudo_spec = BoxSpec(
+            cmd=inner_cmd,
+            workdir=spec.cwd,
+            timeout_sec=86400,  # not used here
+            network=session.network,
+            session_id=session.session_id,
+            env=spec.env,
+            host_path=session.host_path,
+            host_path_mode=session.host_path_mode,
+            mount_path=session.mount_path,
+            cpus=session.cpus,
+            memory_mb=session.memory_mb,
+            pids_limit=session.pids_limit,
+            read_only_rootfs=session.read_only_rootfs,
+        )
+
+        args = self._build_nsjail_args(session, pseudo_spec, session_dir)
+
+        self.logger.info(
+            f'LangBot Box backend start_managed_process: backend=nsjail '
+            f'session_id={session.session_id} session_dir={session_dir} '
+            f'cwd={spec.cwd} env_keys={sorted(spec.env.keys())} '
+            f'command={spec.command} args={spec.args}'
+        )
+
+        return await asyncio.create_subprocess_exec(
+            *args,
+            stdin=asyncio.subprocess.PIPE,
+            stdout=asyncio.subprocess.PIPE,
+            stderr=asyncio.subprocess.PIPE,
+        )
+
+    async def cleanup_orphaned_containers(self, current_instance_id: str = ''):
+        if not self._base_dir.exists():
+            return
+
+        for entry in self._base_dir.iterdir():
+            if not entry.is_dir():
+                continue
+
+            # Session dirs are named: <instance_id>_<session_id>_<suffix>
+            # If it doesn't start with the current instance_id, it's orphaned.
+            if entry.name.startswith(f'{current_instance_id}_'):
+                continue
+
+            self.logger.info(f'Cleaning up orphaned nsjail session dir: {entry}')
+            try:
+                await self._kill_session_processes(entry)
+                shutil.rmtree(entry)
+            except Exception as exc:
+                self.logger.warning(f'Failed to clean up orphaned nsjail dir {entry}: {exc}')
+
+    # ── nsjail argument construction ──────────────────────────────────
+
+    def _build_nsjail_args(
+        self,
+        session: BoxSessionInfo,
+        spec: BoxSpec,
+        session_dir: pathlib.Path,
+    ) -> list[str]:
+        args: list[str] = [self._nsjail_bin]
+
+        # Mode: one-shot execution.
+        args.extend(['--mode', 'o'])
+
+        # nsjail enables the relevant clone namespaces by default. Some
+        # versions do not expose positive --clone_new* flags, only disable
+        # flags, so rely on defaults for broad compatibility.
+
+        # Use a per-session chroot root so nsjail can create mount targets
+        # without needing write access to the host root.
+        root_dir = session_dir / 'root'
+        root_dir.mkdir(parents=True, exist_ok=True)
+        self._ensure_chroot_mount_targets(root_dir, session, spec)
+        args.extend(['--chroot', str(root_dir)])
+
+        # Network namespace.
+        if spec.network != BoxNetworkMode.OFF:
+            args.append('--disable_clone_newnet')
+
+        # Read-only system mounts.
+        args.extend(self._build_readonly_mounts(spec.network))
+
+        # Writable per-session mounts.
+        args.extend(self._build_writable_mounts(session, spec, session_dir))
+
+        # Isolated /proc and minimal /dev.
+        args.extend(['--mount', 'none:/proc:proc:rw'])
+        args.extend(['--mount', 'none:/dev:tmpfs:rw'])
+
+        # Working directory.
+        args.extend(['--cwd', spec.workdir])
+
+        # Environment variables.
+        args.extend(['--env', 'PYTHONUNBUFFERED=1'])
+        args.extend(['--env', 'HOME=/home'])
+        args.extend(['--env', 'PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin'])
+        for key, value in spec.env.items():
+            args.extend(['--env', f'{key}={value}'])
+
+        # Resource limits.
+        args.extend(self._build_resource_limits(spec))
+
+        # Suppress nsjail's own log output.
+        args.append('--really_quiet')
+
+        # The actual command.
+        quoted_workdir = shlex.quote(spec.workdir)
+        user_cmd = f'mkdir -p {quoted_workdir} && cd {quoted_workdir} && {spec.cmd}'
+        args.extend(['--', '/bin/sh', '-lc', user_cmd])
+
+        return args
+
+    def _build_readonly_mounts(self, network: BoxNetworkMode) -> list[str]:
+        args: list[str] = []
+
+        for path in _READONLY_SYSTEM_MOUNTS:
+            if os.path.exists(path):
+                args.extend(['--bindmount_ro', f'{path}:{path}'])
+
+        for path in _READONLY_ETC_ENTRIES:
+            # /etc/resolv.conf is only needed when network is ON.
+            if path == '/etc/resolv.conf' and network == BoxNetworkMode.OFF:
+                continue
+            if os.path.exists(path):
+                args.extend(['--bindmount_ro', f'{path}:{path}'])
+
+        return args
+
+    def _build_writable_mounts(
+        self,
+        session: BoxSessionInfo,
+        spec: BoxSpec,
+        session_dir: pathlib.Path,
+    ) -> list[str]:
+        args: list[str] = []
+
+        # Workspace mount.
+        if spec.host_path is not None and spec.host_path_mode != BoxHostMountMode.NONE:
+            if spec.host_path_mode == BoxHostMountMode.READ_ONLY:
+                args.extend(['--bindmount_ro', f'{spec.host_path}:{spec.mount_path}'])
+            else:
+                args.extend(['--bindmount', f'{spec.host_path}:{spec.mount_path}'])
+        else:
+            workspace_dir = session_dir / 'workspace'
+            args.extend(['--bindmount', f'{workspace_dir}:{spec.mount_path}'])
+
+        for mount in spec.extra_mounts:
+            if mount.mode == BoxHostMountMode.READ_ONLY:
+                args.extend(['--bindmount_ro', f'{mount.host_path}:{mount.mount_path}'])
+            elif mount.mode == BoxHostMountMode.READ_WRITE:
+                args.extend(['--bindmount', f'{mount.host_path}:{mount.mount_path}'])
+
+        # /tmp and /home are always per-session writable.
+        tmp_dir = session_dir / 'tmp'
+        home_dir = session_dir / 'home'
+        args.extend(['--bindmount', f'{tmp_dir}:/tmp'])
+        args.extend(['--bindmount', f'{home_dir}:/home'])
+
+        return args
+
+    def _ensure_chroot_mount_targets(
+        self,
+        root_dir: pathlib.Path,
+        session: BoxSessionInfo,
+        spec: BoxSpec,
+    ) -> None:
+        mount_paths = {
+            '/proc',
+            '/dev',
+            '/tmp',
+            '/home',
+            spec.mount_path,
+            session.mount_path,
+        }
+        mount_paths.update(_READONLY_SYSTEM_MOUNTS)
+        mount_paths.update(_READONLY_ETC_ENTRIES)
+        for mount in spec.extra_mounts:
+            mount_paths.add(mount.mount_path)
+
+        for mount_path in mount_paths:
+            if not mount_path:
+                continue
+            target = root_dir / mount_path.lstrip('/')
+            try:
+                if os.path.isfile(mount_path):
+                    target.parent.mkdir(parents=True, exist_ok=True)
+                    target.touch(exist_ok=True)
+                else:
+                    target.mkdir(parents=True, exist_ok=True)
+            except Exception as exc:
+                self.logger.debug(f'Failed to prepare nsjail mount target {target}: {exc}')
+
+    def _build_resource_limits(self, spec: BoxSpec) -> list[str]:
+        args: list[str] = []
+
+        if self._cgroup_v2_available:
+            # cgroup v2 – precise limits.
+            memory_bytes = spec.memory_mb * 1024 * 1024
+            args.extend(['--cgroup_mem_max', str(memory_bytes)])
+            args.extend(['--cgroup_pids_max', str(spec.pids_limit)])
+            cpu_ms = int(spec.cpus * 1000)
+            args.extend(['--cgroup_cpu_ms_per_sec', str(cpu_ms)])
+        else:
+            # rlimit fallback – best-effort.
+            args.extend(['--rlimit_as', str(spec.memory_mb)])
+            args.extend(['--rlimit_nproc', str(spec.pids_limit)])
+
+        # Always set these rlimits regardless of cgroup mode.
+        args.extend(['--rlimit_fsize', '512'])    # max file size 512 MB
+        args.extend(['--rlimit_nofile', '256'])    # max open fds
+
+        return args
+
+    # ── process execution ─────────────────────────────────────────────
+
+    async def _run_nsjail(
+        self,
+        args: list[str],
+        timeout_sec: int,
+    ) -> _CommandResult:
+        process = await asyncio.create_subprocess_exec(
+            *args,
+            stdout=asyncio.subprocess.PIPE,
+            stderr=asyncio.subprocess.PIPE,
+        )
+        stdout_task = asyncio.create_task(self._read_stream(process.stdout))
+        stderr_task = asyncio.create_task(self._read_stream(process.stderr))
+
+        timed_out = False
+        try:
+            await asyncio.wait_for(process.wait(), timeout=timeout_sec)
+        except asyncio.TimeoutError:
+            process.kill()
+            timed_out = True
+            await process.wait()
+
+        stdout_bytes, stdout_total = await stdout_task
+        stderr_bytes, stderr_total = await stderr_task
+
+        return _CommandResult(
+            return_code=process.returncode if not timed_out else -1,
+            stdout=self._clip_captured_bytes(stdout_bytes, stdout_total),
+            stderr=self._clip_captured_bytes(stderr_bytes, stderr_total),
+            timed_out=timed_out,
+        )
+
+    # ── helpers ───────────────────────────────────────────────────────
+
+    @staticmethod
+    def _detect_cgroup_v2() -> bool:
+        """Check whether the host runs cgroup v2 and we can write to it."""
+        cgroup_mount = pathlib.Path('/sys/fs/cgroup')
+        if not cgroup_mount.exists():
+            return False
+        # cgroup v2 has a single hierarchy with cgroup.controllers file.
+        controllers = cgroup_mount / 'cgroup.controllers'
+        if not controllers.exists():
+            return False
+        # Check if we can write to a cgroup subtree (needed for nsjail).
+        # A rough heuristic: if the user owns a cgroup directory we're probably
+        # running under systemd user delegation.
+        user_slice = cgroup_mount / f'user.slice/user-{os.getuid()}.slice'
+        if user_slice.exists() and os.access(user_slice, os.W_OK):
+            return True
+        # If running as root (uid 0), cgroup v2 is always usable.
+        if os.getuid() == 0:
+            return True
+        # Conservative: if we can't confirm writability, report unavailable.
+        return False
+
+    async def _kill_session_processes(self, session_dir: pathlib.Path) -> None:
+        """Best-effort kill of nsjail processes associated with a session dir.
+
+        We scan /proc for nsjail processes whose command line contains the
+        session directory path.
+        """
+        session_path_str = str(session_dir)
+        proc_dir = pathlib.Path('/proc')
+        if not proc_dir.exists():
+            return
+
+        for pid_dir in proc_dir.iterdir():
+            if not pid_dir.name.isdigit():
+                continue
+            try:
+                cmdline = (pid_dir / 'cmdline').read_bytes().decode('utf-8', errors='replace')
+                if self._nsjail_bin in cmdline and session_path_str in cmdline:
+                    pid = int(pid_dir.name)
+                    os.kill(pid, signal.SIGKILL)
+                    self.logger.info(f'Killed orphaned nsjail process {pid}')
+            except (OSError, ValueError):
+                continue
+
+    @staticmethod
+    def _clip_captured_bytes(
+        data: bytes, total_size: int, limit: int = _MAX_RAW_OUTPUT_BYTES
+    ) -> str:
+        text = data.decode('utf-8', errors='replace').strip()
+        if total_size > limit:
+            text += f'\n... [raw output clipped at {limit} bytes, {total_size - limit} bytes discarded]'
+        return text
+
+    @staticmethod
+    async def _read_stream(
+        stream: asyncio.StreamReader | None,
+        limit: int = _MAX_RAW_OUTPUT_BYTES,
+    ) -> tuple[bytes, int]:
+        if stream is None:
+            return b'', 0
+
+        chunks = bytearray()
+        total_size = 0
+        while True:
+            chunk = await stream.read(65536)
+            if not chunk:
+                break
+            total_size += len(chunk)
+            remaining = limit - len(chunks)
+            if remaining > 0:
+                chunks.extend(chunk[:remaining])
+
+        return bytes(chunks), total_size
diff --git a/src/langbot_plugin/box/runtime.py b/src/langbot_plugin/box/runtime.py
new file mode 100644
index 0000000..7550816
--- /dev/null
+++ b/src/langbot_plugin/box/runtime.py
@@ -0,0 +1,598 @@
+from __future__ import annotations
+
+import asyncio
+import collections
+import dataclasses
+import datetime as dt
+import json
+import logging
+import os
+import uuid
+from typing import TYPE_CHECKING
+
+from .backend import BaseSandboxBackend, DockerBackend
+from .nsjail_backend import NsjailBackend
+from .errors import (
+    BoxBackendUnavailableError,
+    BoxManagedProcessNotFoundError,
+    BoxSessionConflictError,
+    BoxSessionNotFoundError,
+    BoxValidationError,
+)
+from .models import (
+    BoxExecutionResult,
+    BoxExecutionStatus,
+    BoxManagedProcessInfo,
+    BoxManagedProcessSpec,
+    BoxManagedProcessStatus,
+    BoxSessionInfo,
+    BoxSpec,
+)
+from .skill_store import BoxSkillStore
+
+if TYPE_CHECKING:
+    from .e2b_backend import E2BSandboxBackend
+
+_UTC = dt.timezone.utc
+_MANAGED_PROCESS_STDERR_PREVIEW_LIMIT = 4000
+
+
+@dataclasses.dataclass(slots=True)
+class _ManagedProcess:
+    spec: BoxManagedProcessSpec
+    process: asyncio.subprocess.Process
+    started_at: dt.datetime
+    attach_lock: asyncio.Lock
+    stderr_chunks: collections.deque[str]
+    stderr_total_len: int = 0
+    exit_code: int | None = None
+    exited_at: dt.datetime | None = None
+
+    @property
+    def is_running(self) -> bool:
+        return self.exit_code is None and self.process.returncode is None
+
+
+@dataclasses.dataclass(slots=True)
+class _RuntimeSession:
+    info: BoxSessionInfo
+    lock: asyncio.Lock
+    managed_processes: dict[str, _ManagedProcess] = dataclasses.field(
+        default_factory=dict
+    )
+
+
+class BoxRuntime:
+    def __init__(
+        self,
+        logger: logging.Logger,
+        backends: list[BaseSandboxBackend] | None = None,
+        session_ttl_sec: int = 300,
+    ):
+        self.logger = logger
+
+        # Load configuration from environment variable (passed by LangBot)
+        self._box_config: dict = {}
+        config_json = os.getenv("LANGBOT_BOX_CONFIG", "")
+        if config_json:
+            try:
+                self._box_config = json.loads(config_json)
+            except json.JSONDecodeError:
+                logger.warning(
+                    f"Failed to parse LANGBOT_BOX_CONFIG: {config_json[:100]}"
+                )
+
+        # Build backend list
+        if backends is None:
+            backends = [
+                DockerBackend(logger),
+                NsjailBackend(logger),
+                self._create_e2b_backend(logger),
+            ]
+
+        self.backends = backends
+        self.session_ttl_sec = session_ttl_sec
+        self._backend: BaseSandboxBackend | None = None
+        self._sessions: dict[str, _RuntimeSession] = {}
+        self._lock = asyncio.Lock()
+        self.instance_id = uuid.uuid4().hex[:12]
+        self.skill_store = BoxSkillStore(self._box_config)
+
+    def _create_e2b_backend(self, logger: logging.Logger) -> "E2BSandboxBackend | None":
+        """Create E2B backend if package is installed."""
+        try:
+            from .e2b_backend import E2BSandboxBackend
+
+            return E2BSandboxBackend(logger)
+        except ImportError:
+            logger.debug("e2b package not installed, E2B backend unavailable")
+            return None
+
+    async def initialize(self):
+        # Apply configuration from env var to all backends
+        if self._box_config:
+            self._apply_config_to_backends(self._box_config)
+
+        self._backend = await self._select_backend()
+        if self._backend is not None:
+            self._backend.instance_id = self.instance_id
+            try:
+                await self._backend.cleanup_orphaned_containers(self.instance_id)
+            except Exception as exc:
+                self.logger.warning(
+                    f"LangBot Box orphan container cleanup failed: {exc}"
+                )
+
+    def init(self, config: dict) -> None:
+        """Initialize with full box configuration from LangBot.
+
+        Called via RPC (INIT action) when connecting over WebSocket.
+        """
+        self._box_config.update(config)
+        self._apply_config_to_backends(config)
+        self.skill_store.update_config(self._box_config)
+        if not self._sessions:
+            self._backend = None
+
+    def _apply_config_to_backends(self, config: dict) -> None:
+        """Apply configuration sections to corresponding backends."""
+        for backend in self.backends:
+            if backend is None:
+                continue
+            backend_config = config.get(backend.name, {})
+            if backend_config and hasattr(backend, "configure"):
+                backend.configure(backend_config)
+
+    async def execute(self, spec: BoxSpec) -> BoxExecutionResult:
+        if not spec.cmd:
+            raise BoxValidationError("cmd must not be empty")
+        session = await self._get_or_create_session(spec)
+
+        async with session.lock:
+            self.logger.info(
+                "LangBot Box execute: "
+                f"session_id={spec.session_id} "
+                f"backend_session_id={session.info.backend_session_id} "
+                f"backend={session.info.backend_name} "
+                f"workdir={spec.workdir} "
+                f"timeout_sec={spec.timeout_sec}"
+            )
+            result = await (await self._get_backend()).exec(session.info, spec)
+
+        async with self._lock:
+            now = dt.datetime.now(_UTC)
+            if spec.session_id in self._sessions:
+                self._sessions[spec.session_id].info.last_used_at = now
+
+            if result.status == BoxExecutionStatus.TIMED_OUT:
+                await self._drop_session_locked(spec.session_id)
+
+        return result
+
+    async def shutdown(self):
+        async with self._lock:
+            session_ids = list(self._sessions.keys())
+            for session_id in session_ids:
+                session = self._sessions.get(session_id)
+                if session is not None and session.info.persistent:
+                    continue
+                await self._drop_session_locked(session_id)
+
+    async def create_session(self, spec: BoxSpec) -> dict:
+        session = await self._get_or_create_session(spec)
+        return self._session_to_dict(session.info)
+
+    async def delete_session(self, session_id: str) -> None:
+        async with self._lock:
+            if session_id not in self._sessions:
+                raise BoxSessionNotFoundError(f"session {session_id} not found")
+            await self._drop_session_locked(session_id)
+
+    async def start_managed_process(
+        self, session_id: str, spec: BoxManagedProcessSpec
+    ) -> dict:
+        async with self._lock:
+            runtime_session = self._sessions.get(session_id)
+            if runtime_session is None:
+                raise BoxSessionNotFoundError(f"session {session_id} not found")
+
+        async with runtime_session.lock:
+            process_id = spec.process_id
+            existing = runtime_session.managed_processes.get(process_id)
+            if existing is not None and existing.is_running:
+                # Terminate the stale process before starting a new one.
+                # This happens when LangBot restarts while the Box runtime
+                # keeps the persistent session alive.
+                self.logger.info(
+                    f"LangBot Box terminating stale managed process before restart: "
+                    f"session_id={session_id} process_id={process_id}"
+                )
+                await self._terminate_managed_process(existing)
+                del runtime_session.managed_processes[process_id]
+
+            backend = await self._get_backend()
+            process = await backend.start_managed_process(runtime_session.info, spec)
+            managed_process = _ManagedProcess(
+                spec=spec,
+                process=process,
+                started_at=dt.datetime.now(_UTC),
+                attach_lock=asyncio.Lock(),
+                stderr_chunks=collections.deque(),
+            )
+            runtime_session.managed_processes[process_id] = managed_process
+            runtime_session.info.last_used_at = dt.datetime.now(_UTC)
+            asyncio.create_task(
+                self._drain_managed_process_stderr(
+                    runtime_session.info.session_id, process_id, managed_process
+                )
+            )
+            asyncio.create_task(
+                self._watch_managed_process(
+                    runtime_session.info.session_id, process_id, managed_process
+                )
+            )
+            return self._managed_process_to_dict(
+                runtime_session.info.session_id, process_id, managed_process
+            )
+
+    def get_managed_process(self, session_id: str, process_id: str = "default") -> dict:
+        runtime_session = self._sessions.get(session_id)
+        if runtime_session is None:
+            raise BoxSessionNotFoundError(f"session {session_id} not found")
+        managed_process = runtime_session.managed_processes.get(process_id)
+        if managed_process is None:
+            raise BoxManagedProcessNotFoundError(
+                f"session {session_id} has no managed process with process_id={process_id}"
+            )
+        return self._managed_process_to_dict(session_id, process_id, managed_process)
+
+    async def stop_managed_process(
+        self, session_id: str, process_id: str = "default"
+    ) -> None:
+        runtime_session = self._sessions.get(session_id)
+        if runtime_session is None:
+            raise BoxSessionNotFoundError(f"session {session_id} not found")
+
+        async with runtime_session.lock:
+            managed_process = runtime_session.managed_processes.pop(process_id, None)
+            if managed_process is None:
+                raise BoxManagedProcessNotFoundError(
+                    f"session {session_id} has no managed process with process_id={process_id}"
+                )
+            await self._terminate_managed_process(managed_process)
+            runtime_session.info.last_used_at = dt.datetime.now(_UTC)
+            self.logger.info(
+                f"LangBot Box managed process stopped: session_id={session_id} process_id={process_id}"
+            )
+
+    # ── Observability ─────────────────────────────────────────────────
+
+    async def get_backend_info(self) -> dict:
+        if self._backend is None:
+            self._backend = await self._select_backend()
+        backend = self._backend
+        if backend is None:
+            return {"name": None, "available": False}
+        try:
+            available = await backend.is_available()
+        except Exception:
+            available = False
+        return {"name": backend.name, "available": available}
+
+    def get_sessions(self) -> list[dict]:
+        return [self._session_to_dict(s.info) for s in self._sessions.values()]
+
+    def get_session(self, session_id: str) -> dict:
+        runtime_session = self._sessions.get(session_id)
+        if runtime_session is None:
+            raise BoxSessionNotFoundError(f"session {session_id} not found")
+        result = self._session_to_dict(runtime_session.info)
+        if runtime_session.managed_processes:
+            managed_processes = {
+                pid: self._managed_process_to_dict(session_id, pid, mp)
+                for pid, mp in runtime_session.managed_processes.items()
+            }
+            result["managed_processes"] = managed_processes
+            if "default" in managed_processes:
+                result["managed_process"] = managed_processes["default"]
+        return result
+
+    async def get_status(self) -> dict:
+        backend_info = await self.get_backend_info()
+        return {
+            "backend": backend_info,
+            "active_sessions": len(self._sessions),
+            "managed_processes": sum(
+                1
+                for runtime_session in self._sessions.values()
+                for mp in runtime_session.managed_processes.values()
+                if mp.is_running
+            ),
+            "session_ttl_sec": self.session_ttl_sec,
+        }
+
+    async def _get_or_create_session(self, spec: BoxSpec) -> _RuntimeSession:
+        async with self._lock:
+            await self._reap_expired_sessions_locked()
+
+            existing = self._sessions.get(spec.session_id)
+            if existing is not None:
+                self._assert_session_compatible(existing.info, spec)
+                backend = await self._get_backend()
+                if not await backend.is_session_alive(existing.info):
+                    self.logger.warning(
+                        "LangBot Box session backend disappeared, recreating: "
+                        f"session_id={spec.session_id} "
+                        f"backend_session_id={existing.info.backend_session_id} "
+                        f"backend={existing.info.backend_name}"
+                    )
+                    await self._drop_session_locked(spec.session_id)
+                    existing = None
+
+            if existing is not None:
+                existing.info.last_used_at = dt.datetime.now(_UTC)
+                self.logger.info(
+                    "LangBot Box session reused: "
+                    f"session_id={spec.session_id} "
+                    f"backend_session_id={existing.info.backend_session_id} "
+                    f"backend={existing.info.backend_name}"
+                )
+                return existing
+
+            backend = await self._get_backend()
+            info = await backend.start_session(spec)
+            runtime_session = _RuntimeSession(info=info, lock=asyncio.Lock())
+            self._sessions[spec.session_id] = runtime_session
+            self.logger.info(
+                "LangBot Box session created: "
+                f"session_id={spec.session_id} "
+                f"backend_session_id={info.backend_session_id} "
+                f"backend={info.backend_name} "
+                f"image={info.image} "
+                f"network={info.network.value} "
+                f"host_path={info.host_path} "
+                f"host_path_mode={info.host_path_mode.value} "
+                f"mount_path={info.mount_path} "
+                f"workspace_quota_mb={info.workspace_quota_mb}"
+            )
+            return runtime_session
+
+    async def _get_backend(self) -> BaseSandboxBackend:
+        if self._backend is None:
+            self._backend = await self._select_backend()
+        if self._backend is None:
+            raise BoxBackendUnavailableError(
+                "LangBot Box backend unavailable. Install and start Docker or nsjail before using exec."
+            )
+        return self._backend
+
+    # Backends grouped under each top-level box.backend choice.
+    # 'local' picks the first available local container backend (docker → nsjail).
+    _LOCAL_BACKEND_NAMES = ("docker", "nsjail")
+
+    async def _select_backend(self) -> BaseSandboxBackend | None:
+        # Backend selection comes from box.backend only.
+        # Accepted values: 'local', 'docker', 'nsjail', 'e2b'. 'local' fans out
+        # to local container backends; everything else must match one backend exactly.
+        forced = (self._box_config.get("backend") or "").strip()
+        source_label = "box.backend"
+
+        candidates: list[BaseSandboxBackend]
+        if forced == "local":
+            candidates = [
+                b
+                for b in self.backends
+                if b is not None and b.name in self._LOCAL_BACKEND_NAMES
+            ]
+            if not candidates:
+                self.logger.error(
+                    f"LangBot Box: no local backend registered "
+                    f"({source_label}={forced})"
+                )
+                return None
+        elif forced:
+            candidates = [
+                b for b in self.backends if b is not None and b.name == forced
+            ]
+            if not candidates:
+                available_names = [b.name for b in self.backends if b is not None]
+                self.logger.error(
+                    f'LangBot Box backend "{forced}" not found '
+                    f"({source_label}={forced}, available: {available_names})"
+                )
+                return None
+        else:
+            candidates = [b for b in self.backends if b is not None]
+
+        for backend in candidates:
+            try:
+                await backend.initialize()
+                if await backend.is_available():
+                    label = (
+                        f"{backend.name} (forced via {source_label}={forced})"
+                        if forced
+                        else backend.name
+                    )
+                    self.logger.info(f"LangBot Box using backend: {label}")
+                    return backend
+            except Exception as exc:
+                self.logger.warning(
+                    f"LangBot Box backend {backend.name} probe failed: {exc}"
+                )
+
+        if forced:
+            self.logger.error(
+                f'LangBot Box backend "{forced}" probed but not available '
+                f"({source_label}={forced})"
+            )
+
+        self.logger.warning(
+            "LangBot Box backend unavailable: no supported backend (Docker, nsjail, E2B) is ready"
+        )
+        return None
+
+    async def _reap_expired_sessions_locked(self):
+        if self.session_ttl_sec <= 0:
+            return
+
+        deadline = dt.datetime.now(_UTC) - dt.timedelta(seconds=self.session_ttl_sec)
+        expired_session_ids = [
+            session_id
+            for session_id, session in self._sessions.items()
+            if not session.info.persistent
+            and session.info.last_used_at < deadline
+            and not any(mp.is_running for mp in session.managed_processes.values())
+        ]
+
+        for session_id in expired_session_ids:
+            await self._drop_session_locked(session_id)
+
+    async def _drop_session_locked(self, session_id: str):
+        runtime_session = self._sessions.pop(session_id, None)
+        if runtime_session is None or self._backend is None:
+            return
+
+        for mp in runtime_session.managed_processes.values():
+            await self._terminate_managed_process(mp)
+
+        try:
+            self.logger.info(
+                "LangBot Box session cleanup: "
+                f"session_id={session_id} "
+                f"backend_session_id={runtime_session.info.backend_session_id} "
+                f"backend={runtime_session.info.backend_name}"
+            )
+            await self._backend.stop_session(runtime_session.info)
+        except Exception as exc:
+            self.logger.warning(f"Failed to clean up box session {session_id}: {exc}")
+
+    def _assert_session_compatible(self, session: BoxSessionInfo, spec: BoxSpec):
+        _COMPAT_FIELDS = (
+            "network",
+            "image",
+            "host_path",
+            "host_path_mode",
+            "mount_path",
+            "persistent",
+            "cpus",
+            "memory_mb",
+            "pids_limit",
+            "read_only_rootfs",
+            "workspace_quota_mb",
+        )
+        for field in _COMPAT_FIELDS:
+            session_val = getattr(session, field)
+            spec_val = getattr(spec, field)
+            if session_val != spec_val:
+                display = (
+                    session_val.value if hasattr(session_val, "value") else session_val
+                )
+                raise BoxSessionConflictError(
+                    f"Box session {spec.session_id} already exists with {field}={display}"
+                )
+
+    async def _drain_managed_process_stderr(
+        self, session_id: str, process_id: str, managed_process: _ManagedProcess
+    ) -> None:
+        stream = managed_process.process.stderr
+        if stream is None:
+            return
+
+        try:
+            while True:
+                chunk = await stream.readline()
+                if not chunk:
+                    break
+                text = chunk.decode("utf-8", errors="replace").rstrip()
+                if not text:
+                    continue
+                managed_process.stderr_chunks.append(text)
+                managed_process.stderr_total_len += (
+                    len(text) + 1
+                )  # +1 for '\n' separator
+                while (
+                    managed_process.stderr_total_len
+                    > _MANAGED_PROCESS_STDERR_PREVIEW_LIMIT
+                    and managed_process.stderr_chunks
+                ):
+                    removed = managed_process.stderr_chunks.popleft()
+                    managed_process.stderr_total_len -= len(removed) + 1
+                self.logger.info(
+                    f"LangBot Box managed process stderr: session_id={session_id} process_id={process_id} {text}"
+                )
+        except Exception as exc:
+            self.logger.warning(
+                f"Failed to drain managed process stderr for {session_id}/{process_id}: {exc}"
+            )
+
+    async def _watch_managed_process(
+        self, session_id: str, process_id: str, managed_process: _ManagedProcess
+    ) -> None:
+        return_code = await managed_process.process.wait()
+        managed_process.exit_code = return_code
+        managed_process.exited_at = dt.datetime.now(_UTC)
+        runtime_session = self._sessions.get(session_id)
+        if runtime_session is not None:
+            runtime_session.info.last_used_at = managed_process.exited_at
+        self.logger.info(
+            f"LangBot Box managed process exited: session_id={session_id} process_id={process_id} return_code={return_code}"
+        )
+
+    async def _terminate_managed_process(
+        self, managed_process: _ManagedProcess
+    ) -> None:
+        if not managed_process.is_running:
+            return
+
+        process = managed_process.process
+        try:
+            if process.stdin is not None:
+                process.stdin.close()
+        except Exception:
+            pass
+
+        try:
+            if process.returncode is None:
+                try:
+                    process.terminate()
+                except ProcessLookupError:
+                    pass
+            await asyncio.wait_for(asyncio.shield(process.wait()), timeout=5)
+        except asyncio.TimeoutError:
+            if process.returncode is None:
+                try:
+                    process.kill()
+                except ProcessLookupError:
+                    pass
+            await process.wait()
+        finally:
+            managed_process.exit_code = process.returncode
+            managed_process.exited_at = dt.datetime.now(_UTC)
+
+    def _managed_process_to_dict(
+        self, session_id: str, process_id: str, managed_process: _ManagedProcess
+    ) -> dict:
+        stderr_preview = "\n".join(managed_process.stderr_chunks)
+        status = (
+            BoxManagedProcessStatus.RUNNING
+            if managed_process.is_running
+            else BoxManagedProcessStatus.EXITED
+        )
+        return BoxManagedProcessInfo(
+            session_id=session_id,
+            process_id=process_id,
+            status=status,
+            command=managed_process.spec.command,
+            args=managed_process.spec.args,
+            cwd=managed_process.spec.cwd,
+            env_keys=sorted(managed_process.spec.env.keys()),
+            attached=managed_process.attach_lock.locked(),
+            started_at=managed_process.started_at,
+            exited_at=managed_process.exited_at,
+            exit_code=managed_process.exit_code,
+            stderr_preview=stderr_preview,
+        ).model_dump(mode="json")
+
+    @staticmethod
+    def _session_to_dict(info: BoxSessionInfo) -> dict:
+        return info.model_dump(mode="json")
diff --git a/src/langbot_plugin/box/security.py b/src/langbot_plugin/box/security.py
new file mode 100644
index 0000000..7b3b98e
--- /dev/null
+++ b/src/langbot_plugin/box/security.py
@@ -0,0 +1,52 @@
+from __future__ import annotations
+
+import os
+import sys
+
+from .errors import BoxValidationError
+from .models import BoxSpec
+
+_BLOCKED_HOST_PATHS_POSIX = frozenset(
+    {
+        '/etc',
+        '/proc',
+        '/sys',
+        '/dev',
+        '/root',
+        '/boot',
+        '/run',
+        '/var/run',
+        '/run/docker.sock',
+        '/var/run/docker.sock',
+    }
+)
+
+_BLOCKED_HOST_PATHS_WINDOWS = frozenset(
+    {
+        r'C:\Windows',
+        r'C:\Program Files',
+        r'C:\Program Files (x86)',
+        r'C:\ProgramData',
+        r'\\.\pipe\docker_engine',
+    }
+)
+
+BLOCKED_HOST_PATHS = (
+    _BLOCKED_HOST_PATHS_POSIX | _BLOCKED_HOST_PATHS_WINDOWS
+    if sys.platform == 'win32'
+    else _BLOCKED_HOST_PATHS_POSIX
+)
+
+
+def validate_sandbox_security(spec: BoxSpec) -> None:
+    """Validate that a BoxSpec does not request dangerous container config.
+
+    Raises BoxValidationError when the spec contains a blocked host_path.
+    """
+    if spec.host_path:
+        real = os.path.realpath(spec.host_path)
+        sep = os.sep
+        _norm = os.path.normcase
+        for blocked in BLOCKED_HOST_PATHS:
+            if _norm(real) == _norm(blocked) or _norm(real).startswith(_norm(blocked) + sep):
+                raise BoxValidationError(f'host_path {spec.host_path} is blocked for security')
diff --git a/src/langbot_plugin/box/server.py b/src/langbot_plugin/box/server.py
new file mode 100644
index 0000000..601b028
--- /dev/null
+++ b/src/langbot_plugin/box/server.py
@@ -0,0 +1,494 @@
+"""Box Runtime service exposing BoxRuntime via action RPC.
+
+This module is the implementation of the `box` CLI subcommand. The only
+supported entry point is the `lbp` CLI, which mirrors the plugin runtime's
+`rt` subcommand:
+
+    lbp box        # WebSocket control transport (default)
+    lbp box -s     # stdio control transport
+
+`main()` is invoked by the CLI with the parsed argument namespace, exactly
+as `lbp rt` drives ``langbot_plugin.runtime.app.main``. There is no
+``python -m langbot_plugin.box`` / ``python -m langbot_plugin.box.server``
+launch path.
+
+All WebSocket endpoints share a single port (default 5410):
+    /rpc/ws                                                      — Action RPC (control channel)
+    /v1/sessions/{session_id}/managed-process/{process_id}/ws    — Managed process stdio relay
+    /v1/sessions/{session_id}/managed-process/ws                 — Legacy (process_id defaults to 'default')
+"""
+
+from __future__ import annotations
+
+import argparse
+import asyncio
+import datetime as dt
+import logging
+import sys
+from typing import Any
+
+import pydantic
+from aiohttp import web
+
+from langbot_plugin.entities.io.actions.enums import CommonAction
+from langbot_plugin.entities.io.errors import ConnectionClosedError
+from langbot_plugin.entities.io.resp import ActionResponse
+from langbot_plugin.runtime.io.connection import Connection
+from langbot_plugin.runtime.io.handler import Handler
+from langbot_plugin.utils.log import configure_process_logging
+
+from .actions import LangBotToBoxAction
+from .errors import (
+    BoxManagedProcessConflictError,
+    BoxManagedProcessNotFoundError,
+    BoxSessionNotFoundError,
+)
+from .models import BoxExecutionResult, BoxManagedProcessSpec, BoxSpec
+from .runtime import BoxRuntime
+
+logger = logging.getLogger("langbot.box.server")
+
+
+def _result_to_dict(result: BoxExecutionResult) -> dict:
+    return result.model_dump(mode="json")
+
+
+# ── aiohttp WebSocket → Connection adapter ───────────────────────────
+
+
+class AiohttpWSConnection(Connection):
+    """Adapt an aiohttp ``WebSocketResponse`` to the SDK ``Connection`` interface.
+
+    This allows ``BoxServerHandler`` (and therefore ``Handler``) to work over
+    an aiohttp WebSocket without any changes to the handler/IO layer.
+    """
+
+    def __init__(self, ws: web.WebSocketResponse) -> None:
+        self._ws = ws
+        self._send_lock = asyncio.Lock()
+
+    async def send(self, message: str) -> None:
+        async with self._send_lock:
+            try:
+                await self._ws.send_str(message)
+            except ConnectionResetError:
+                raise ConnectionClosedError("Connection closed during send")
+
+    async def receive(self) -> str:
+        msg = await self._ws.receive()
+        if msg.type == web.WSMsgType.TEXT:
+            return msg.data
+        if msg.type in (
+            web.WSMsgType.CLOSE,
+            web.WSMsgType.CLOSING,
+            web.WSMsgType.CLOSED,
+            web.WSMsgType.ERROR,
+        ):
+            raise ConnectionClosedError("Connection closed")
+        raise ConnectionClosedError(f"Unexpected message type: {msg.type}")
+
+    async def close(self) -> None:
+        await self._ws.close()
+
+
+# ── BoxServerHandler ─────────────────────────────────────────────────
+
+
+class BoxServerHandler(Handler):
+    """Server-side handler that registers box actions backed by BoxRuntime."""
+
+    name = "BoxServerHandler"
+
+    def __init__(self, connection: Connection, runtime: BoxRuntime):
+        super().__init__(connection)
+        self._runtime = runtime
+        self._register_actions()
+
+    def _register_actions(self) -> None:
+        @self.action(CommonAction.PING)
+        async def ping(data: dict[str, Any]) -> ActionResponse:
+            return ActionResponse.success({})
+
+        @self.action(LangBotToBoxAction.HEALTH)
+        async def health(data: dict[str, Any]) -> ActionResponse:
+            info = await self._runtime.get_backend_info()
+            return ActionResponse.success(info)
+
+        @self.action(LangBotToBoxAction.STATUS)
+        async def status(data: dict[str, Any]) -> ActionResponse:
+            result = await self._runtime.get_status()
+            return ActionResponse.success(result)
+
+        @self.action(LangBotToBoxAction.EXEC)
+        async def exec_cmd(data: dict[str, Any]) -> ActionResponse:
+            try:
+                spec = BoxSpec.model_validate(data)
+            except pydantic.ValidationError as exc:
+                return ActionResponse.error(f"BoxValidationError: {exc}")
+            result = await self._runtime.execute(spec)
+            return ActionResponse.success(_result_to_dict(result))
+
+        @self.action(LangBotToBoxAction.CREATE_SESSION)
+        async def create_session(data: dict[str, Any]) -> ActionResponse:
+            try:
+                spec = BoxSpec.model_validate(data)
+            except pydantic.ValidationError as exc:
+                return ActionResponse.error(f"BoxValidationError: {exc}")
+            info = await self._runtime.create_session(spec)
+            return ActionResponse.success(info)
+
+        @self.action(LangBotToBoxAction.GET_SESSION)
+        async def get_session(data: dict[str, Any]) -> ActionResponse:
+            return ActionResponse.success(self._runtime.get_session(data["session_id"]))
+
+        @self.action(LangBotToBoxAction.GET_SESSIONS)
+        async def get_sessions(data: dict[str, Any]) -> ActionResponse:
+            return ActionResponse.success({"sessions": self._runtime.get_sessions()})
+
+        @self.action(LangBotToBoxAction.DELETE_SESSION)
+        async def delete_session(data: dict[str, Any]) -> ActionResponse:
+            await self._runtime.delete_session(data["session_id"])
+            return ActionResponse.success({"deleted": data["session_id"]})
+
+        @self.action(LangBotToBoxAction.START_MANAGED_PROCESS)
+        async def start_managed_process(data: dict[str, Any]) -> ActionResponse:
+            session_id = data["session_id"]
+            try:
+                spec = BoxManagedProcessSpec.model_validate(data["spec"])
+            except pydantic.ValidationError as exc:
+                return ActionResponse.error(f"BoxValidationError: {exc}")
+            info = await self._runtime.start_managed_process(session_id, spec)
+            return ActionResponse.success(info)
+
+        @self.action(LangBotToBoxAction.GET_MANAGED_PROCESS)
+        async def get_managed_process(data: dict[str, Any]) -> ActionResponse:
+            return ActionResponse.success(
+                self._runtime.get_managed_process(
+                    data["session_id"],
+                    data.get("process_id", "default"),
+                )
+            )
+
+        @self.action(LangBotToBoxAction.STOP_MANAGED_PROCESS)
+        async def stop_managed_process(data: dict[str, Any]) -> ActionResponse:
+            await self._runtime.stop_managed_process(
+                data["session_id"], data.get("process_id", "default")
+            )
+            return ActionResponse.success(
+                {"stopped": data.get("process_id", "default")}
+            )
+
+        @self.action(LangBotToBoxAction.GET_BACKEND_INFO)
+        async def get_backend_info(data: dict[str, Any]) -> ActionResponse:
+            info = await self._runtime.get_backend_info()
+            return ActionResponse.success(info)
+
+        @self.action(LangBotToBoxAction.LIST_SKILLS)
+        async def list_skills(data: dict[str, Any]) -> ActionResponse:
+            return ActionResponse.success(
+                {"skills": self._runtime.skill_store.list_skills()}
+            )
+
+        @self.action(LangBotToBoxAction.GET_SKILL)
+        async def get_skill(data: dict[str, Any]) -> ActionResponse:
+            skill = self._runtime.skill_store.get_skill(data["name"])
+            return ActionResponse.success({"skill": skill})
+
+        @self.action(LangBotToBoxAction.CREATE_SKILL)
+        async def create_skill(data: dict[str, Any]) -> ActionResponse:
+            try:
+                skill = self._runtime.skill_store.create_skill(data["skill"])
+            except Exception as exc:
+                return ActionResponse.error(f"BoxValidationError: {exc}")
+            return ActionResponse.success({"skill": skill})
+
+        @self.action(LangBotToBoxAction.UPDATE_SKILL)
+        async def update_skill(data: dict[str, Any]) -> ActionResponse:
+            try:
+                skill = self._runtime.skill_store.update_skill(
+                    data["name"], data["skill"]
+                )
+            except Exception as exc:
+                return ActionResponse.error(f"BoxValidationError: {exc}")
+            return ActionResponse.success({"skill": skill})
+
+        @self.action(LangBotToBoxAction.DELETE_SKILL)
+        async def delete_skill(data: dict[str, Any]) -> ActionResponse:
+            try:
+                result = self._runtime.skill_store.delete_skill(data["name"])
+            except Exception as exc:
+                return ActionResponse.error(f"BoxValidationError: {exc}")
+            return ActionResponse.success(result)
+
+        @self.action(LangBotToBoxAction.SCAN_SKILL_DIRECTORY)
+        async def scan_skill_directory(data: dict[str, Any]) -> ActionResponse:
+            try:
+                skill = self._runtime.skill_store.scan_directory(data["path"])
+            except Exception as exc:
+                return ActionResponse.error(f"BoxValidationError: {exc}")
+            return ActionResponse.success(skill)
+
+        @self.action(LangBotToBoxAction.LIST_SKILL_FILES)
+        async def list_skill_files(data: dict[str, Any]) -> ActionResponse:
+            try:
+                result = self._runtime.skill_store.list_skill_files(
+                    data["name"],
+                    data.get("path", "."),
+                    include_hidden=bool(data.get("include_hidden", False)),
+                    max_entries=int(data.get("max_entries", 200)),
+                )
+            except Exception as exc:
+                return ActionResponse.error(f"BoxValidationError: {exc}")
+            return ActionResponse.success(result)
+
+        @self.action(LangBotToBoxAction.READ_SKILL_FILE)
+        async def read_skill_file(data: dict[str, Any]) -> ActionResponse:
+            try:
+                result = self._runtime.skill_store.read_skill_file(
+                    data["name"], data["path"]
+                )
+            except Exception as exc:
+                return ActionResponse.error(f"BoxValidationError: {exc}")
+            return ActionResponse.success(result)
+
+        @self.action(LangBotToBoxAction.WRITE_SKILL_FILE)
+        async def write_skill_file(data: dict[str, Any]) -> ActionResponse:
+            try:
+                result = self._runtime.skill_store.write_skill_file(
+                    data["name"], data["path"], data.get("content", "")
+                )
+            except Exception as exc:
+                return ActionResponse.error(f"BoxValidationError: {exc}")
+            return ActionResponse.success(result)
+
+        @self.action(LangBotToBoxAction.PREVIEW_SKILL_ZIP)
+        async def preview_skill_zip(data: dict[str, Any]) -> ActionResponse:
+            try:
+                file_bytes = await self.read_local_file(data["file_key"])
+                await self.delete_local_file(data["file_key"])
+                result = self._runtime.skill_store.preview_zip_upload(
+                    file_bytes=file_bytes,
+                    filename=data.get("filename", "skill.zip"),
+                    source_subdir=data.get("source_subdir") or "",
+                    target_suffix=data.get("target_suffix", "upload"),
+                )
+            except Exception as exc:
+                return ActionResponse.error(f"BoxValidationError: {exc}")
+            return ActionResponse.success({"skills": result})
+
+        @self.action(LangBotToBoxAction.INSTALL_SKILL_ZIP)
+        async def install_skill_zip(data: dict[str, Any]) -> ActionResponse:
+            try:
+                file_bytes = await self.read_local_file(data["file_key"])
+                await self.delete_local_file(data["file_key"])
+                result = self._runtime.skill_store.install_zip_upload(
+                    file_bytes=file_bytes,
+                    filename=data.get("filename", "skill.zip"),
+                    source_paths=data.get("source_paths") or [],
+                    source_path=data.get("source_path") or "",
+                    source_subdir=data.get("source_subdir") or "",
+                    target_suffix=data.get("target_suffix", "upload"),
+                )
+            except Exception as exc:
+                return ActionResponse.error(f"BoxValidationError: {exc}")
+            return ActionResponse.success({"skills": result})
+
+        @self.action(LangBotToBoxAction.INIT)
+        async def init(data: dict[str, Any]) -> ActionResponse:
+            self._runtime.init(data)
+            return ActionResponse.success({"initialized": True})
+
+        @self.action(LangBotToBoxAction.SHUTDOWN)
+        async def shutdown(data: dict[str, Any]) -> ActionResponse:
+            await self._runtime.shutdown()
+            return ActionResponse.success({})
+
+
+# ── Managed process WebSocket relay ──────────────────────────────────
+
+
+def _error_response(exc: Exception) -> web.Response:
+    return web.json_response(
+        {"error": {"code": type(exc).__name__, "message": str(exc)}},
+        status=400,
+    )
+
+
+async def handle_managed_process_ws(request: web.Request) -> web.StreamResponse:
+    runtime: BoxRuntime = request.app["runtime"]
+    session_id = request.match_info["session_id"]
+    process_id = request.match_info.get("process_id", "default")
+
+    runtime_session = runtime._sessions.get(session_id)
+    if runtime_session is None:
+        return _error_response(
+            BoxSessionNotFoundError(f"session {session_id} not found")
+        )
+
+    managed_process = runtime_session.managed_processes.get(process_id)
+    if managed_process is None:
+        return _error_response(
+            BoxManagedProcessNotFoundError(
+                f"session {session_id} has no managed process with process_id={process_id}"
+            )
+        )
+    if not managed_process.is_running:
+        return _error_response(
+            BoxManagedProcessConflictError(
+                f"managed process {process_id} in session {session_id} is not running"
+            )
+        )
+
+    ws = web.WebSocketResponse(protocols=("mcp",))
+    await ws.prepare(request)
+
+    async with managed_process.attach_lock:
+        process = managed_process.process
+        stdout = process.stdout
+        stdin = process.stdin
+        if stdout is None or stdin is None:
+            await ws.close(message=b"managed process stdio unavailable")
+            return ws
+
+        async def _stdout_to_ws() -> None:
+            while True:
+                line = await stdout.readline()
+                if not line:
+                    break
+                await ws.send_str(line.decode("utf-8", errors="replace").rstrip("\n"))
+                runtime_session.info.last_used_at = dt.datetime.now(dt.timezone.utc)
+
+        async def _ws_to_stdin() -> None:
+            async for msg in ws:
+                if msg.type == web.WSMsgType.TEXT:
+                    stdin.write((msg.data + "\n").encode("utf-8"))
+                    await stdin.drain()
+                    runtime_session.info.last_used_at = dt.datetime.now(dt.timezone.utc)
+                elif msg.type in (
+                    web.WSMsgType.CLOSE,
+                    web.WSMsgType.CLOSING,
+                    web.WSMsgType.CLOSED,
+                    web.WSMsgType.ERROR,
+                ):
+                    break
+
+        stdout_task = asyncio.create_task(_stdout_to_ws())
+        stdin_task = asyncio.create_task(_ws_to_stdin())
+        try:
+            done, pending = await asyncio.wait(
+                [stdout_task, stdin_task],
+                return_when=asyncio.FIRST_COMPLETED,
+            )
+            for task in pending:
+                task.cancel()
+            for task in done:
+                task.result()
+        finally:
+            await ws.close()
+
+    return ws
+
+
+# ── Action RPC WebSocket handler ─────────────────────────────────────
+
+
+async def handle_rpc_ws(request: web.Request) -> web.StreamResponse:
+    """Handle action RPC over a single aiohttp WebSocket connection."""
+    runtime: BoxRuntime = request.app["runtime"]
+
+    ws = web.WebSocketResponse()
+    await ws.prepare(request)
+
+    connection = AiohttpWSConnection(ws)
+    handler = BoxServerHandler(connection, runtime)
+    await handler.run()
+
+    return ws
+
+
+# ── App factory ──────────────────────────────────────────────────────
+
+
+def create_app(runtime: BoxRuntime) -> web.Application:
+    """Create the aiohttp app with all WebSocket routes on a single port."""
+    app = web.Application()
+    app["runtime"] = runtime
+    app.router.add_get("/rpc/ws", handle_rpc_ws)
+    app.router.add_get(
+        "/v1/sessions/{session_id}/managed-process/{process_id}/ws",
+        handle_managed_process_ws,
+    )
+    # Backward-compatible route (defaults to process_id='default')
+    app.router.add_get(
+        "/v1/sessions/{session_id}/managed-process/ws", handle_managed_process_ws
+    )
+    return app
+
+
+def create_ws_relay_app(runtime: BoxRuntime) -> web.Application:
+    """Backward-compatible alias for older callers.
+
+    The relay and action RPC endpoints now live in one aiohttp app.
+    """
+    return create_app(runtime)
+
+
+# ── Entry point ──────────────────────────────────────────────────────
+
+
+async def _run_server(host: str, port: int, mode: str) -> None:
+    runtime = BoxRuntime(logger=logger)
+    await runtime.initialize()
+
+    # Start aiohttp — serves managed-process relay and (in ws mode)
+    # also the action RPC endpoint, all on the same port.
+    runner: web.AppRunner | None = None
+    try:
+        ws_app = create_app(runtime)
+        runner = web.AppRunner(ws_app)
+        await runner.setup()
+        site = web.TCPSite(runner, host, port)
+        await site.start()
+        logger.info(f"Box server listening on {host}:{port}")
+    except OSError as exc:
+        logger.warning(f"Box server failed to bind {host}:{port}: {exc}")
+        logger.warning("Managed process WebSocket attach will be unavailable.")
+
+    try:
+        if mode == "stdio":
+            from langbot_plugin.runtime.io.controllers.stdio.server import (
+                StdioServerController,
+            )
+
+            async def new_connection_callback(connection: Connection) -> None:
+                handler = BoxServerHandler(connection, runtime)
+                await handler.run()
+
+            ctrl = StdioServerController()
+            await ctrl.run(new_connection_callback)
+        else:
+            # In ws mode, action RPC is served via aiohttp on /rpc/ws.
+            # Keep the server alive until cancelled.
+            logger.info(f"Box action RPC available at ws://{host}:{port}/rpc/ws")
+            stop_event = asyncio.Event()
+            await stop_event.wait()
+    finally:
+        await runtime.shutdown()
+        if runner is not None:
+            await runner.cleanup()
+
+
+def main(args: argparse.Namespace) -> None:
+    """Run the Box runtime service.
+
+    Invoked by the `box` CLI subcommand with the parsed argument namespace,
+    mirroring how `lbp rt` drives ``langbot_plugin.runtime.app.main``. The
+    argument schema is defined once, on the `box` subparser in
+    ``langbot_plugin.cli``.
+    """
+    # Mode selection mirrors the plugin runtime (`lbp rt`): WebSocket by
+    # default, stdio when `-s`/`--stdio-control` is passed.
+    control_mode = "stdio" if args.stdio_control else "ws"
+
+    configure_process_logging(stream=sys.stderr)
+    asyncio.run(_run_server(args.host, args.ws_control_port, control_mode))
diff --git a/src/langbot_plugin/box/skill_store.py b/src/langbot_plugin/box/skill_store.py
new file mode 100644
index 0000000..ad8eb15
--- /dev/null
+++ b/src/langbot_plugin/box/skill_store.py
@@ -0,0 +1,647 @@
+from __future__ import annotations
+
+import datetime as dt
+import io
+import os
+import posixpath
+import shutil
+import tempfile
+import zipfile
+from pathlib import Path
+from typing import Optional
+
+import yaml
+
+
+_FRONTMATTER_FIELDS = (
+    'name',
+    'display_name',
+    'description',
+)
+
+_PUBLIC_SKILL_FIELDS = (
+    'name',
+    'display_name',
+    'description',
+    'instructions',
+    'package_root',
+    'entry_file',
+    'created_at',
+    'updated_at',
+)
+
+
+def parse_frontmatter(content: str) -> tuple[dict, str]:
+    if not content.startswith('---'):
+        return {}, content
+
+    lines = content.splitlines(keepends=True)
+    if not lines or lines[0].strip() != '---':
+        return {}, content
+
+    for index in range(1, len(lines)):
+        if lines[index].strip() == '---':
+            metadata_text = ''.join(lines[1:index])
+            instructions = ''.join(lines[index + 1 :]).lstrip('\n')
+            metadata = yaml.safe_load(metadata_text) or {}
+            if not isinstance(metadata, dict):
+                metadata = {}
+            return metadata, instructions
+
+    return {}, content
+
+
+def build_skill_md(metadata: dict, instructions: str) -> str:
+    frontmatter = {}
+    for key in _FRONTMATTER_FIELDS:
+        value = metadata.get(key)
+        if value is None:
+            continue
+        if isinstance(value, str) and not value.strip():
+            continue
+        frontmatter[key] = value
+
+    if not frontmatter:
+        return instructions
+
+    frontmatter_text = yaml.dump(frontmatter, default_flow_style=False, allow_unicode=True, sort_keys=False).strip()
+    return f'---\n{frontmatter_text}\n---\n\n{instructions}'
+
+
+class BoxSkillStore:
+    """Skill package storage owned by the Box runtime process."""
+
+    def __init__(self, config: dict | None = None):
+        self._config = config or {}
+
+    def update_config(self, config: dict) -> None:
+        self._config = config or {}
+
+    @property
+    def root(self) -> str:
+        local_config = self._config.get('local') or {}
+        host_root = str(local_config.get('host_root') or './data/box').strip()
+        skills_root = str(local_config.get('skills_root') or 'skills').strip()
+
+        host_root_path = Path(host_root).expanduser()
+        if not host_root_path.is_absolute():
+            host_root_path = Path.cwd() / host_root_path
+        host_root_path = host_root_path.resolve()
+
+        skills_root_path = Path(skills_root).expanduser()
+        if not skills_root_path.is_absolute():
+            skills_root_path = host_root_path / skills_root_path
+        return str(skills_root_path.resolve())
+
+    def list_skills(self) -> list[dict]:
+        os.makedirs(self.root, exist_ok=True)
+        skills: list[dict] = []
+        for package_root, entry_file in self._discover_skill_directories(self.root, max_depth=6):
+            try:
+                skills.append(self._load_skill_package(package_root, entry_file))
+            except Exception:
+                continue
+        skills.sort(key=lambda item: item.get('updated_at', ''), reverse=True)
+        return [self._serialize_skill(skill) for skill in skills]
+
+    def get_skill(self, skill_name: str) -> Optional[dict]:
+        for skill in self.list_skills():
+            if skill.get('name') == skill_name:
+                return skill
+        return None
+
+    def create_skill(self, data: dict) -> dict:
+        name = self._validate_skill_name(data.get('name', ''))
+        if self.get_skill(name):
+            raise ValueError(f'Skill with name "{name}" already exists')
+
+        package_root = self._normalize_package_root(data.get('package_root', ''))
+        managed_root = self._managed_skill_path(name)
+        target_root = managed_root
+        imported_skill_data: dict | None = None
+
+        if package_root and self._managed_install_root_for_package(package_root):
+            if not os.path.isdir(package_root):
+                raise ValueError(f'Directory does not exist: {package_root}')
+            target_root = package_root
+            imported_skill_data = self._read_skill_package(target_root)
+        elif package_root and package_root != managed_root:
+            if not os.path.isdir(package_root):
+                raise ValueError(f'Directory does not exist: {package_root}')
+            if os.path.exists(managed_root):
+                raise ValueError(f'Skill directory already exists: {managed_root}')
+            os.makedirs(os.path.dirname(managed_root), exist_ok=True)
+            shutil.copytree(package_root, managed_root)
+            imported_skill_data = self._read_skill_package(managed_root)
+        else:
+            os.makedirs(managed_root, exist_ok=True)
+
+        metadata = {
+            'name': name,
+            'display_name': self._resolve_create_field(data, 'display_name', imported_skill_data, default=''),
+            'description': self._resolve_create_field(data, 'description', imported_skill_data, default=''),
+        }
+        instructions = self._resolve_create_field(data, 'instructions', imported_skill_data, default='')
+        self._write_skill_md(target_root, metadata, instructions)
+
+        created = self.get_skill(name)
+        if not created:
+            raise ValueError(f'Failed to create skill "{name}"')
+        return created
+
+    def update_skill(self, skill_name: str, data: dict) -> dict:
+        skill = self.get_skill(skill_name)
+        if not skill:
+            raise ValueError(f'Skill "{skill_name}" not found')
+
+        requested_name = str(data.get('name', skill['name']) or skill['name']).strip()
+        if requested_name != skill['name']:
+            raise ValueError('Renaming skills is not supported')
+
+        requested_package_root = str(data.get('package_root', '') or '').strip()
+        existing_package_root = self._normalize_package_root(skill['package_root'])
+        if requested_package_root and self._normalize_package_root(requested_package_root) != existing_package_root:
+            raise ValueError('Updating package_root is not supported; recreate the skill to import a different package')
+
+        metadata = {
+            'name': skill['name'],
+            'display_name': data.get('display_name', skill.get('display_name', '')),
+            'description': data.get('description', skill.get('description', '')),
+        }
+        instructions = str(data.get('instructions', skill.get('instructions', '')) or '')
+        self._write_skill_md(skill['package_root'], metadata, instructions)
+
+        updated = self.get_skill(skill_name)
+        if not updated:
+            raise ValueError(f'Skill "{skill_name}" not found after update')
+        return updated
+
+    def delete_skill(self, skill_name: str) -> dict:
+        skill = self.get_skill(skill_name)
+        if not skill:
+            raise ValueError(f'Skill "{skill_name}" not found')
+
+        package_root = self._normalize_package_root(skill['package_root'])
+        managed_install_root = self._managed_install_root_for_package(package_root)
+        if not managed_install_root:
+            raise ValueError('Only managed skills under the Box skills root can be deleted')
+
+        shutil.rmtree(managed_install_root, ignore_errors=True)
+        return {'deleted': skill_name}
+
+    def scan_directory(self, path: str) -> dict:
+        if not os.path.isdir(path):
+            raise ValueError(f'Directory does not exist: {path}')
+
+        discovered = self._discover_skill_directories(path, max_depth=2)
+        if not discovered:
+            raise ValueError(f'No SKILL.md found in {path} or its subdirectories (max depth: 2)')
+        if len(discovered) > 1:
+            candidates = ', '.join(found_path for found_path, _entry in discovered)
+            raise ValueError(
+                f'Multiple skill directories found in {path}. Please choose a more specific path: {candidates}'
+            )
+
+        package_root, entry_file = discovered[0]
+        return self._load_skill_package(package_root, entry_file)
+
+    def list_skill_files(
+        self,
+        skill_name: str,
+        path: str = '.',
+        include_hidden: bool = False,
+        max_entries: int = 200,
+    ) -> dict:
+        skill = self._require_skill(skill_name)
+        target_dir, relative_path = self._resolve_skill_path(skill, path, expect_directory=True)
+        entries: list[dict] = []
+        with os.scandir(target_dir) as iterator:
+            for entry in sorted(iterator, key=lambda item: item.name):
+                if not include_hidden and entry.name.startswith('.'):
+                    continue
+                entry_rel_path = entry.name if relative_path in ('', '.') else os.path.join(relative_path, entry.name)
+                is_dir = entry.is_dir()
+                entries.append(
+                    {
+                        'path': entry_rel_path.replace(os.sep, '/'),
+                        'name': entry.name,
+                        'is_dir': is_dir,
+                        'size': None if is_dir else entry.stat().st_size,
+                    }
+                )
+                if len(entries) >= max_entries:
+                    break
+
+        return {
+            'skill': {'name': skill['name']},
+            'base_path': '.' if relative_path in ('', '.') else relative_path.replace(os.sep, '/'),
+            'entries': entries,
+            'truncated': len(entries) >= max_entries,
+        }
+
+    def read_skill_file(self, skill_name: str, path: str) -> dict:
+        skill = self._require_skill(skill_name)
+        target_path, relative_path = self._resolve_skill_path(skill, path, expect_directory=False)
+        if not os.path.isfile(target_path):
+            raise ValueError(f'Skill file not found: {relative_path}')
+
+        try:
+            with open(target_path, 'r', encoding='utf-8') as f:
+                content = f.read()
+        except UnicodeDecodeError as exc:
+            raise ValueError(f'Skill file is not valid UTF-8 text: {relative_path}') from exc
+
+        return {
+            'skill': {'name': skill['name']},
+            'path': relative_path.replace(os.sep, '/'),
+            'content': content,
+        }
+
+    def write_skill_file(self, skill_name: str, path: str, content: str) -> dict:
+        skill = self._require_skill(skill_name)
+        target_path, relative_path = self._resolve_skill_path(skill, path, expect_directory=False)
+        os.makedirs(os.path.dirname(target_path), exist_ok=True)
+        with open(target_path, 'w', encoding='utf-8') as f:
+            f.write(content)
+
+        return {
+            'skill': {'name': skill['name']},
+            'path': relative_path.replace(os.sep, '/'),
+            'bytes_written': len(content.encode('utf-8')),
+        }
+
+    def preview_zip_upload(
+        self,
+        *,
+        file_bytes: bytes,
+        filename: str,
+        source_subdir: str = '',
+        target_suffix: str = 'upload',
+    ) -> list[dict]:
+        if not file_bytes:
+            raise ValueError('Uploaded file is empty')
+
+        tmp_dir = tempfile.mkdtemp(prefix='langbot_box_skill_preview_')
+        try:
+            skill_root = self._extract_uploaded_skill_to_temp(file_bytes, tmp_dir)
+            skill_root = self._resolve_source_subdir_root(skill_root, source_subdir)
+            return self._preview_skill_candidates(
+                skill_root,
+                base_target_name=self._uploaded_skill_target_stem(filename),
+                suffix=target_suffix,
+            )
+        finally:
+            shutil.rmtree(tmp_dir, ignore_errors=True)
+
+    def install_zip_upload(
+        self,
+        *,
+        file_bytes: bytes,
+        filename: str,
+        source_paths: list[str] | None = None,
+        source_path: str = '',
+        source_subdir: str = '',
+        target_suffix: str = 'upload',
+    ) -> list[dict]:
+        if not file_bytes:
+            raise ValueError('Uploaded file is empty')
+
+        tmp_dir = tempfile.mkdtemp(prefix='langbot_box_skill_upload_')
+        try:
+            skill_root = self._extract_uploaded_skill_to_temp(file_bytes, tmp_dir)
+            skill_root = self._resolve_source_subdir_root(skill_root, source_subdir)
+            previews = self._preview_skill_candidates(
+                skill_root,
+                base_target_name=self._uploaded_skill_target_stem(filename),
+                suffix=target_suffix,
+            )
+            selected_previews = self._select_preview_candidates(
+                previews,
+                {'source_paths': source_paths or [], 'source_path': source_path},
+            )
+            scanned = self._install_preview_candidates(skill_root, selected_previews)
+            return [self.get_skill(skill['name']) or self._serialize_skill(skill) for skill in scanned]
+        finally:
+            shutil.rmtree(tmp_dir, ignore_errors=True)
+
+    def _require_skill(self, skill_name: str) -> dict:
+        skill = self.get_skill(skill_name)
+        if not skill:
+            raise ValueError(f'Skill "{skill_name}" not found')
+        return skill
+
+    @staticmethod
+    def _serialize_skill(skill: dict) -> dict:
+        return {field: skill.get(field) for field in _PUBLIC_SKILL_FIELDS if field in skill}
+
+    def _load_skill_package(self, package_root: str, entry_file: str = 'SKILL.md') -> dict:
+        package_root = self._normalize_package_root(package_root)
+        entry_path = os.path.join(package_root, entry_file)
+        with open(entry_path, 'r', encoding='utf-8') as f:
+            content = f.read()
+
+        metadata, instructions = parse_frontmatter(content)
+        dir_name = os.path.basename(os.path.normpath(package_root))
+        stat = os.stat(entry_path)
+        return {
+            'name': str(metadata.get('name') or dir_name).strip(),
+            'display_name': str(metadata.get('display_name') or metadata.get('name') or dir_name).strip(),
+            'description': str(metadata.get('description') or '').strip(),
+            'instructions': instructions,
+            'package_root': package_root,
+            'entry_file': entry_file,
+            'created_at': dt.datetime.fromtimestamp(stat.st_ctime, tz=dt.timezone.utc).isoformat(),
+            'updated_at': dt.datetime.fromtimestamp(stat.st_mtime, tz=dt.timezone.utc).isoformat(),
+        }
+
+    def _read_skill_package(self, package_root: str) -> dict:
+        entry = self._find_skill_entry(package_root)
+        if entry is None:
+            raise ValueError(f'No SKILL.md found in {package_root}')
+
+        skill = self._load_skill_package(entry[0], entry[1])
+        return {
+            'entry_file': skill.get('entry_file', 'SKILL.md'),
+            'display_name': skill.get('display_name', ''),
+            'description': skill.get('description', ''),
+            'instructions': skill.get('instructions', ''),
+        }
+
+    def _write_skill_md(self, package_root: str, metadata: dict, instructions: str) -> None:
+        package_root = self._normalize_package_root(package_root)
+        os.makedirs(package_root, exist_ok=True)
+        content = build_skill_md(metadata, instructions)
+        with open(os.path.join(package_root, 'SKILL.md'), 'w', encoding='utf-8') as f:
+            f.write(content)
+
+    def _managed_skill_path(self, skill_name: str) -> str:
+        return self._normalize_package_root(os.path.join(self.root, skill_name))
+
+    def _managed_install_root_for_package(self, package_root: str) -> str:
+        managed_root = self._normalize_package_root(self.root)
+        package_root = self._normalize_package_root(package_root)
+        if not package_root or package_root == managed_root:
+            return ''
+
+        prefix = f'{managed_root}{os.sep}'
+        if not package_root.startswith(prefix):
+            return ''
+
+        relative = os.path.relpath(package_root, managed_root)
+        top_level = relative.split(os.sep, 1)[0]
+        if top_level in ('', '.', '..'):
+            return ''
+        return os.path.join(managed_root, top_level)
+
+    def _build_preview_target_dir(self, base_target_name: str, source_path: str, suffix: str) -> str:
+        relative = str(source_path or '').strip().replace('\\', '/').strip('/')
+        leaf_name = relative.split('/')[-1] if relative else ''
+        target_name = base_target_name
+        if leaf_name and leaf_name != base_target_name:
+            target_name = f'{base_target_name}-{leaf_name}'
+        if suffix:
+            target_name = f'{target_name}-{suffix}'
+        return os.path.join(self.root, target_name)
+
+    def _preview_skill_candidates(self, root_path: str, *, base_target_name: str, suffix: str) -> list[dict]:
+        discovered = self._discover_skill_directories(root_path, max_depth=2)
+        if not discovered:
+            raise ValueError(f'No SKILL.md found in {root_path} or its subdirectories (max depth: 2)')
+
+        previews: list[dict] = []
+        for package_root, entry_file in discovered:
+            skill = self._load_skill_package(package_root, entry_file)
+            relative_path = os.path.relpath(package_root, root_path)
+            if relative_path in ('', '.'):
+                relative_path = ''
+            skill['source_path'] = relative_path.replace(os.sep, '/')
+            skill['package_root'] = self._build_preview_target_dir(base_target_name, relative_path, suffix)
+            previews.append(skill)
+
+        previews.sort(key=lambda item: item['source_path'])
+        return [self._serialize_skill_with_source(preview) for preview in previews]
+
+    @staticmethod
+    def _serialize_skill_with_source(skill: dict) -> dict:
+        data = BoxSkillStore._serialize_skill(skill)
+        if 'source_path' in skill:
+            data['source_path'] = skill['source_path']
+        return data
+
+    def _select_preview_candidates(self, previews: list[dict], data: dict) -> list[dict]:
+        normalized_paths: list[str] = []
+        raw_source_paths = data.get('source_paths', [])
+        if isinstance(raw_source_paths, list):
+            for source_path in raw_source_paths:
+                normalized = str(source_path or '').strip().replace('\\', '/').strip('/')
+                if normalized not in normalized_paths:
+                    normalized_paths.append(normalized)
+
+        legacy_source_path = str(data.get('source_path', '') or '').strip().replace('\\', '/').strip('/')
+        if legacy_source_path and legacy_source_path not in normalized_paths:
+            normalized_paths.append(legacy_source_path)
+
+        if len(previews) == 1 and not normalized_paths:
+            return previews
+
+        if not normalized_paths:
+            candidates = ', '.join(item['source_path'] or '.' for item in previews)
+            raise ValueError(f'Multiple skills found. Please choose one or more source_paths: {candidates}')
+
+        selected: list[dict] = []
+        available = {preview['source_path']: preview for preview in previews}
+        for normalized_path in normalized_paths:
+            preview = available.get(normalized_path)
+            if preview is None:
+                candidates = ', '.join(item['source_path'] or '.' for item in previews)
+                raise ValueError(f'Invalid source_path "{normalized_path}". Available: {candidates}')
+            selected.append(preview)
+
+        return selected
+
+    def _install_preview_candidates(self, root_path: str, selected_previews: list[dict]) -> list[dict]:
+        target_dirs: list[str] = []
+        for preview in selected_previews:
+            target_dir = self._normalize_package_root(preview['package_root'])
+            if target_dir in target_dirs:
+                raise ValueError(f'Duplicate target directory selected: {target_dir}')
+            if os.path.exists(target_dir):
+                raise ValueError(f'Skill directory already exists: {target_dir}')
+            target_dirs.append(target_dir)
+
+        installed_scans: list[dict] = []
+        created_dirs: list[str] = []
+        try:
+            for preview in selected_previews:
+                target_dir = self._normalize_package_root(preview['package_root'])
+                source_root = self._preview_source_root(root_path, preview['source_path'])
+                os.makedirs(os.path.dirname(target_dir), exist_ok=True)
+                shutil.copytree(source_root, target_dir)
+                created_dirs.append(target_dir)
+                installed_scans.append(self.scan_directory(target_dir))
+        except Exception:
+            for target_dir in created_dirs:
+                shutil.rmtree(target_dir, ignore_errors=True)
+            raise
+
+        return installed_scans
+
+    def _extract_uploaded_skill_to_temp(self, file_bytes: bytes, tmp_dir: str) -> str:
+        extract_dir = os.path.join(tmp_dir, 'extracted')
+        try:
+            with zipfile.ZipFile(io.BytesIO(file_bytes), 'r') as zf:
+                self._safe_extract_zip(zf, extract_dir)
+        except zipfile.BadZipFile as exc:
+            raise ValueError('Uploaded file must be a valid .zip archive') from exc
+
+        entries = os.listdir(extract_dir)
+        if len(entries) == 1 and os.path.isdir(os.path.join(extract_dir, entries[0])):
+            return os.path.join(extract_dir, entries[0])
+        return extract_dir
+
+    @staticmethod
+    def _uploaded_skill_target_stem(filename: str) -> str:
+        stem = os.path.splitext(os.path.basename(str(filename or '').strip()))[0]
+        safe_stem = ''.join(ch if ch.isalnum() or ch in ('-', '_') else '-' for ch in stem).strip('-_')
+        return safe_stem or 'uploaded-skill'
+
+    @staticmethod
+    def _preview_source_root(root_path: str, source_path: str) -> str:
+        normalized = str(source_path or '').strip().replace('\\', '/').strip('/')
+        if not normalized:
+            return root_path
+        return os.path.join(root_path, normalized)
+
+    @staticmethod
+    def _resolve_source_subdir_root(root_path: str, source_subdir: str) -> str:
+        normalized = str(source_subdir or '').strip().replace('\\', '/').strip('/')
+        if not normalized:
+            return root_path
+
+        normalized_path = os.path.normpath(normalized)
+        if normalized_path.startswith('..') or normalized_path == '..' or os.path.isabs(normalized_path):
+            raise ValueError('source_subdir must stay within the uploaded archive')
+
+        target_root = os.path.realpath(os.path.join(root_path, normalized_path))
+        archive_root = os.path.realpath(root_path)
+        if target_root != archive_root and not target_root.startswith(f'{archive_root}{os.sep}'):
+            raise ValueError('source_subdir must stay within the uploaded archive')
+        if not os.path.isdir(target_root):
+            raise ValueError(f'source_subdir does not exist in the uploaded archive: {normalized}')
+        return target_root
+
+    @staticmethod
+    def _safe_extract_zip(archive: zipfile.ZipFile, target_dir: str) -> None:
+        target_root = os.path.realpath(target_dir)
+        os.makedirs(target_root, exist_ok=True)
+
+        for member in archive.infolist():
+            member_name = member.filename
+            if not member_name or member_name.endswith('/'):
+                continue
+
+            normalized = posixpath.normpath(member_name)
+            if normalized.startswith('../') or normalized == '..' or os.path.isabs(normalized):
+                raise ValueError(f'Archive contains an unsafe path: {member_name}')
+
+            destination = os.path.realpath(os.path.join(target_root, normalized))
+            if destination != target_root and not destination.startswith(f'{target_root}{os.sep}'):
+                raise ValueError(f'Archive contains an unsafe path: {member_name}')
+
+        archive.extractall(target_root)
+
+    def _resolve_skill_path(self, skill: dict, path: str, *, expect_directory: bool) -> tuple[str, str]:
+        package_root = self._normalize_package_root(skill.get('package_root', ''))
+        if not package_root:
+            raise ValueError(f'Skill "{skill.get("name", "")}" has no package_root')
+
+        relative_path = str(path or '.').strip() or '.'
+        if os.path.isabs(relative_path):
+            raise ValueError('path must be relative to the skill package root')
+
+        normalized_relative = os.path.normpath(relative_path)
+        if normalized_relative.startswith('..') or normalized_relative == '..':
+            raise ValueError('path must stay within the skill package root')
+
+        target_path = os.path.realpath(os.path.join(package_root, normalized_relative))
+        if target_path != package_root and not target_path.startswith(f'{package_root}{os.sep}'):
+            raise ValueError('path must stay within the skill package root')
+
+        if expect_directory:
+            if not os.path.isdir(target_path):
+                raise ValueError(f'Skill directory not found: {relative_path}')
+        else:
+            parent_dir = os.path.dirname(target_path) or package_root
+            if parent_dir != package_root and not parent_dir.startswith(f'{package_root}{os.sep}'):
+                raise ValueError('path must stay within the skill package root')
+
+        return target_path, normalized_relative
+
+    @staticmethod
+    def _find_skill_entry(path: str) -> Optional[tuple[str, str]]:
+        for candidate in ('SKILL.md', 'skill.md'):
+            if os.path.isfile(os.path.join(path, candidate)):
+                return path, candidate
+        return None
+
+    def _discover_skill_directories(self, root_path: str, max_depth: int = 2) -> list[tuple[str, str]]:
+        discovered: list[tuple[str, str]] = []
+        queue: list[tuple[str, int]] = [(root_path, 0)]
+        seen: set[str] = set()
+
+        while queue:
+            current_path, depth = queue.pop(0)
+            normalized_path = os.path.abspath(current_path)
+            if normalized_path in seen:
+                continue
+            seen.add(normalized_path)
+
+            found = self._find_skill_entry(normalized_path)
+            if found:
+                discovered.append(found)
+                continue
+
+            if depth >= max_depth:
+                continue
+
+            try:
+                entries = sorted(os.scandir(normalized_path), key=lambda entry: entry.name)
+            except OSError:
+                continue
+
+            for entry in entries:
+                if entry.is_dir():
+                    queue.append((entry.path, depth + 1))
+
+        return discovered
+
+    @staticmethod
+    def _validate_skill_name(name: str) -> str:
+        name = str(name or '').strip()
+        if not name:
+            raise ValueError('Skill name is required')
+        if not name.replace('-', '').replace('_', '').isalnum():
+            raise ValueError('Skill name can only contain letters, numbers, hyphens and underscores')
+        if len(name) > 64:
+            raise ValueError('Skill name cannot exceed 64 characters')
+        return name
+
+    @staticmethod
+    def _normalize_package_root(package_root: str) -> str:
+        package_root = str(package_root).strip()
+        if not package_root:
+            return ''
+        return os.path.realpath(os.path.abspath(package_root))
+
+    @staticmethod
+    def _resolve_create_field(data: dict, field: str, imported_skill_data: dict | None, *, default: str) -> str:
+        raw_value = data.get(field) if field in data else None
+        if raw_value is None:
+            if imported_skill_data is not None:
+                return str(imported_skill_data.get(field, default) or default)
+            return default
+
+        value = str(raw_value or '')
+        if imported_skill_data is not None and not value.strip():
+            return str(imported_skill_data.get(field, default) or default)
+        return value
diff --git a/src/langbot_plugin/cli/__init__.py b/src/langbot_plugin/cli/__init__.py
index 7f913f2..dc48f27 100644
--- a/src/langbot_plugin/cli/__init__.py
+++ b/src/langbot_plugin/cli/__init__.py
@@ -33,6 +33,10 @@
         - [--stdio-control -s]: Use stdio for control connection
         - [--ws-control-port]: The port for control connection
         - [--ws-debug-port]: The port for debug connection
+    box: Run the sandbox box runtime
+        - [--host]: Bind address, default is 0.0.0.0
+        - [--stdio-control]: Use stdio for control connection
+        - [--ws-control-port]: The port for control connection, default is 5410
 """
 
 
@@ -120,6 +124,24 @@ def main():
         help="Skip checking and installing dependencies for all installed plugins",
     )
 
+    # box command
+    box_parser = subparsers.add_parser("box", help="Run the sandbox box runtime")
+    box_parser.add_argument(
+        "--host", default="0.0.0.0", help="Bind address"
+    )
+    box_parser.add_argument(
+        "-s",
+        "--stdio-control",
+        action="store_true",
+        help="Use stdio for control connection",
+    )
+    box_parser.add_argument(
+        "--ws-control-port",
+        type=int,
+        default=5410,
+        help="The port for control connection",
+    )
+
     args = parser.parse_args()
 
     if not args.command:
@@ -148,6 +170,10 @@ def main():
             publish_process()
         case "rt":
             runtime_app.main(args)
+        case "box":
+            from langbot_plugin.box.server import main as box_main
+
+            box_main(args)
         case _:
             cli_print("unknown_command", args.command)
             sys.exit(1)
diff --git a/src/langbot_plugin/runtime/io/handler.py b/src/langbot_plugin/runtime/io/handler.py
index 1436c94..eecc997 100644
--- a/src/langbot_plugin/runtime/io/handler.py
+++ b/src/langbot_plugin/runtime/io/handler.py
@@ -16,6 +16,7 @@
 import os
 import hashlib
 import base64
+import uuid
 import aiofiles
 import aiofiles.os
 import logging
@@ -70,11 +71,9 @@ def __init__(
         @self.action(CommonAction.FILE_CHUNK)
         async def file_chunk(data: dict[str, Any]) -> ActionResponse:
             file_key = data["file_key"]
-            file_length = data["file_length"]
             chunk_base64 = data["chunk_base64"]
             chunk_index = data["chunk_index"]
             chunk_amount = data["chunk_amount"]
-            chunk_size = data["chunk_size"]
             # append the chunk to the file
             async with aiofiles.open(
                 os.path.join(FILE_STORAGE_DIR, file_key), "ab"
@@ -268,8 +267,10 @@ def decorator(
     # ====== file transfer ======
     async def send_file(self, file_bytes: bytes, file_extension: str) -> str:
         """Send a file to the peer, chunk by chunk, in base64."""
-        hash_value = hashlib.sha256(file_bytes).hexdigest()
-        file_key = f"{hash_value}.{file_extension}"
+        hash_value = hashlib.sha256(file_bytes).hexdigest()[:16]
+        extension = file_extension.strip(".")
+        suffix = f".{extension}" if extension else ""
+        file_key = f"{hash_value}-{uuid.uuid4().hex}{suffix}"
         file_length = len(file_bytes)
         chunk_amount = max(
             1, (file_length + FILE_CHUNK_LENGTH - 1) // FILE_CHUNK_LENGTH
@@ -308,4 +309,7 @@ async def read_local_file(self, file_key: str) -> bytes:
             return await f.read()
 
     async def delete_local_file(self, file_key: str) -> None:
-        await aiofiles.os.remove(os.path.join(FILE_STORAGE_DIR, file_key))
+        try:
+            await aiofiles.os.remove(os.path.join(FILE_STORAGE_DIR, file_key))
+        except FileNotFoundError:
+            return
diff --git a/src/langbot_plugin/version.py b/src/langbot_plugin/version.py
index d7b30e1..3bf78d8 100644
--- a/src/langbot_plugin/version.py
+++ b/src/langbot_plugin/version.py
@@ -1 +1 @@
-__version__ = "0.3.6"
+__version__ = "0.4.0-beta.1"
diff --git a/tests/box/__init__.py b/tests/box/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/box/test_backend_selection.py b/tests/box/test_backend_selection.py
new file mode 100644
index 0000000..2439394
--- /dev/null
+++ b/tests/box/test_backend_selection.py
@@ -0,0 +1,275 @@
+"""Unit tests for BoxRuntime backend selection mechanism."""
+
+from __future__ import annotations
+
+import logging
+import datetime as dt
+from unittest import mock
+
+import pytest
+
+from langbot_plugin.box.backend import BaseSandboxBackend
+from langbot_plugin.box.models import BoxSessionInfo, BoxSpec
+from langbot_plugin.box.runtime import BoxRuntime
+
+
+@pytest.fixture
+def logger():
+    return logging.getLogger('test.runtime')
+
+
+class MockBackend(BaseSandboxBackend):
+    """Mock backend for testing."""
+
+    def __init__(self, logger: logging.Logger, name: str, available: bool = True):
+        super().__init__(logger)
+        self.name = name
+        self._available = available
+        self._alive = True
+        self.started_sessions = 0
+        self.stopped_sessions = 0
+
+    async def is_available(self) -> bool:
+        return self._available
+
+    async def start_session(self, spec):
+        self.started_sessions += 1
+        now = dt.datetime.now(dt.timezone.utc)
+        return BoxSessionInfo(
+            session_id=spec.session_id,
+            backend_name=self.name,
+            backend_session_id=f'{self.name}-{self.started_sessions}',
+            image=spec.image,
+            network=spec.network,
+            host_path=spec.host_path,
+            host_path_mode=spec.host_path_mode,
+            mount_path=spec.mount_path,
+            persistent=spec.persistent,
+            cpus=spec.cpus,
+            memory_mb=spec.memory_mb,
+            pids_limit=spec.pids_limit,
+            read_only_rootfs=spec.read_only_rootfs,
+            workspace_quota_mb=spec.workspace_quota_mb,
+            created_at=now,
+            last_used_at=now,
+        )
+
+    async def exec(self, session, spec):
+        pass
+
+    async def stop_session(self, session):
+        self.stopped_sessions += 1
+
+    async def is_session_alive(self, session) -> bool:
+        return self._alive
+
+
+# ── E2B backend creation ────────────────────────────────────────────────
+
+def test_e2b_backend_created_if_package_installed(logger):
+    """E2B backend is created when package is installed."""
+    with mock.patch('os.getenv', return_value=''):
+        runtime = BoxRuntime(logger)
+        # E2B backend exists (package installed)
+        e2b_backend = runtime.backends[2]
+        assert e2b_backend is not None
+        assert e2b_backend.name == 'e2b'
+
+
+def test_e2b_backend_none_if_package_not_installed(logger):
+    """E2B backend is None when package is not installed."""
+    with (
+        mock.patch('os.getenv', return_value=''),
+        mock.patch.object(BoxRuntime, '_create_e2b_backend', return_value=None),
+    ):
+        runtime = BoxRuntime(logger)
+        # Third backend is None (package not installed)
+        assert runtime.backends[2] is None
+        # Filtered list for selection
+        active_backends = [b for b in runtime.backends if b is not None]
+        assert len(active_backends) == 2
+
+
+def test_e2b_import_failure_returns_none(logger):
+    """Import failure for e2b package returns None, not fatal."""
+    with mock.patch('os.getenv', return_value=''):
+        # _create_e2b_backend handles ImportError internally
+        runtime = BoxRuntime(logger)
+        # Should have Docker, nsjail, and E2B (if package installed) or None
+        active_backends = [b for b in runtime.backends if b is not None]
+        assert len(active_backends) >= 2
+
+
+# ── box.backend configuration ──────────────────────────────────────────
+
+@pytest.mark.anyio
+async def test_box_backend_config_forces_specific_backend(logger):
+    """box.backend config forces selection of named backend."""
+    backend_e2b = MockBackend(logger, 'e2b', available=True)
+    backend_docker = MockBackend(logger, 'docker', available=True)
+    backend_nsjail = MockBackend(logger, 'nsjail', available=False)
+
+    runtime = BoxRuntime(logger, backends=[backend_e2b, backend_docker, backend_nsjail])
+    runtime.init({'backend': 'docker'})
+
+    with mock.patch('os.getenv', return_value=None):
+        selected = await runtime._select_backend()
+
+    assert selected.name == 'docker'
+    assert selected is backend_docker
+
+
+@pytest.mark.anyio
+async def test_box_backend_config_unavailable_returns_none(logger):
+    """When box.backend specifies unavailable backend, returns None."""
+    backend_e2b = MockBackend(logger, 'e2b', available=False)
+    backend_docker = MockBackend(logger, 'docker', available=True)
+
+    runtime = BoxRuntime(logger, backends=[backend_e2b, backend_docker])
+    runtime.init({'backend': 'e2b'})
+
+    with mock.patch('os.getenv', return_value=None):
+        selected = await runtime._select_backend()
+
+    assert selected is None
+
+
+@pytest.mark.anyio
+async def test_box_backend_config_not_found_returns_none(logger):
+    """When box.backend specifies unknown backend name, returns None."""
+    backend_docker = MockBackend(logger, 'docker', available=True)
+
+    runtime = BoxRuntime(logger, backends=[backend_docker])
+    runtime.init({'backend': 'unknown'})
+
+    with mock.patch('os.getenv', return_value=None):
+        selected = await runtime._select_backend()
+
+    assert selected is None
+
+
+@pytest.mark.anyio
+async def test_box_backend_config_no_fallback(logger):
+    """When box.backend is set but backend unavailable, does NOT fallback."""
+    backend_e2b = MockBackend(logger, 'e2b', available=False)
+    backend_docker = MockBackend(logger, 'docker', available=True)
+
+    runtime = BoxRuntime(logger, backends=[backend_e2b, backend_docker])
+    runtime.init({'backend': 'e2b'})
+
+    with mock.patch('os.getenv', return_value=None):
+        selected = await runtime._select_backend()
+
+    # Should return None, not fallback to docker
+    assert selected is None
+
+
+@pytest.mark.anyio
+async def test_box_backend_env_var_is_ignored(logger):
+    """BOX_BACKEND is not an independent override; use box.backend instead."""
+    backend_docker = MockBackend(logger, 'docker', available=True)
+    backend_e2b = MockBackend(logger, 'e2b', available=True)
+
+    runtime = BoxRuntime(logger, backends=[backend_docker, backend_e2b])
+    runtime.init({'backend': 'docker'})
+
+    with mock.patch('os.getenv', side_effect=lambda k: 'e2b' if k == 'BOX_BACKEND' else None):
+        selected = await runtime._select_backend()
+
+    assert selected is backend_docker
+
+
+# ── Auto-detect backend selection ───────────────────────────────────────
+
+@pytest.mark.anyio
+async def test_auto_detect_first_available(logger):
+    """Without box.backend, selects first available backend."""
+    backend_e2b = MockBackend(logger, 'e2b', available=False)
+    backend_docker = MockBackend(logger, 'docker', available=True)
+    backend_nsjail = MockBackend(logger, 'nsjail', available=False)
+
+    runtime = BoxRuntime(logger, backends=[backend_e2b, backend_docker, backend_nsjail])
+
+    with mock.patch('os.getenv', return_value=None):
+        selected = await runtime._select_backend()
+
+    assert selected.name == 'docker'
+
+
+@pytest.mark.anyio
+async def test_auto_detect_none_when_all_unavailable(logger):
+    """Returns None when all backends are unavailable."""
+    backend_docker = MockBackend(logger, 'docker', available=False)
+    backend_nsjail = MockBackend(logger, 'nsjail', available=False)
+
+    runtime = BoxRuntime(logger, backends=[backend_docker, backend_nsjail])
+
+    with mock.patch('os.getenv', return_value=None):
+        selected = await runtime._select_backend()
+
+    assert selected is None
+
+
+@pytest.mark.anyio
+async def test_init_config_reselects_backend_before_sessions(logger):
+    """INIT config from LangBot can change the selected backend."""
+    backend_docker = MockBackend(logger, 'docker', available=True)
+    backend_e2b = MockBackend(logger, 'e2b', available=True)
+
+    runtime = BoxRuntime(logger, backends=[backend_docker, backend_e2b])
+
+    with mock.patch('os.getenv', return_value=None):
+        await runtime.initialize()
+        assert runtime._backend is backend_docker
+
+        runtime.init({'backend': 'e2b'})
+        assert runtime._backend is None
+
+        selected = await runtime._get_backend()
+
+    assert selected is backend_e2b
+
+
+@pytest.mark.anyio
+async def test_create_session_recreates_disappeared_backend_session(logger):
+    """A stale in-memory session is dropped if its backend session vanished."""
+    backend = MockBackend(logger, 'docker', available=True)
+    runtime = BoxRuntime(logger, backends=[backend])
+    spec = BoxSpec(session_id='mcp-shared', cmd='true', persistent=True, read_only_rootfs=False)
+
+    with mock.patch('os.getenv', return_value=None):
+        first = await runtime.create_session(spec)
+        backend._alive = False
+        second = await runtime.create_session(spec)
+
+    assert first['backend_session_id'] == 'docker-1'
+    assert second['backend_session_id'] == 'docker-2'
+    assert backend.started_sessions == 2
+    assert backend.stopped_sessions == 1
+
+
+# ── Custom backends list ────────────────────────────────────────────────
+
+def test_custom_backends_list_preserved(logger):
+    """Providing custom backends list overrides auto-detection."""
+    custom_backend = MockBackend(logger, 'custom', available=True)
+
+    runtime = BoxRuntime(logger, backends=[custom_backend])
+
+    assert len(runtime.backends) == 1
+    assert runtime.backends[0].name == 'custom'
+
+
+@pytest.mark.anyio
+async def test_custom_backends_with_box_backend_config(logger):
+    """box.backend works with custom backends list."""
+    backend_a = MockBackend(logger, 'a', available=True)
+    backend_b = MockBackend(logger, 'b', available=True)
+
+    runtime = BoxRuntime(logger, backends=[backend_a, backend_b])
+    runtime.init({'backend': 'b'})
+
+    with mock.patch('os.getenv', return_value=None):
+        selected = await runtime._select_backend()
+
+    assert selected.name == 'b'
diff --git a/tests/box/test_e2b_backend.py b/tests/box/test_e2b_backend.py
new file mode 100644
index 0000000..0252c35
--- /dev/null
+++ b/tests/box/test_e2b_backend.py
@@ -0,0 +1,482 @@
+"""Unit tests for E2BSandboxBackend.
+
+These tests do NOT require e2b package to be installed – they mock the E2B SDK
+to verify parameter mapping, session lifecycle, and availability detection.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+from unittest import mock
+
+import pytest
+
+from langbot_plugin.box.e2b_backend import (
+    E2BSandboxBackend,
+    _adapt_path_for_e2b,
+    _check_e2b_available,
+)
+from langbot_plugin.box.models import (
+    BoxExecutionStatus,
+    BoxHostMountMode,
+    BoxNetworkMode,
+    BoxSessionInfo,
+    BoxSpec,
+)
+
+
+@pytest.fixture
+def logger():
+    return logging.getLogger('test.e2b')
+
+
+@pytest.fixture
+def backend(logger):
+    b = E2BSandboxBackend(logger=logger)
+    b.instance_id = 'test123'
+    return b
+
+
+@pytest.fixture
+def mock_e2b_module():
+    """Mock the e2b module for tests."""
+    mock_async_sandbox = mock.MagicMock()
+    mock_async_sandbox.sandbox_id = 'sandbox-test-123'
+
+    # Mock AsyncSandbox.create
+    mock_async_sandbox.create = mock.AsyncMock(return_value=mock_async_sandbox)
+
+    # Mock AsyncSandbox.connect
+    mock_async_sandbox.connect = mock.AsyncMock(return_value=mock_async_sandbox)
+
+    # Mock AsyncSandbox.kill
+    mock_async_sandbox.kill = mock.AsyncMock(return_value=True)
+
+    # Mock commands.run result
+    mock_command_result = mock.MagicMock()
+    mock_command_result.stdout = 'output'
+    mock_command_result.stderr = ''
+    mock_command_result.exit_code = 0
+
+    mock_commands = mock.MagicMock()
+    mock_commands.run = mock.AsyncMock(return_value=mock_command_result)
+    mock_async_sandbox.commands = mock_commands
+
+    # Mock the module import
+    with (
+        mock.patch('langbot_plugin.box.e2b_backend._e2b_available', None),
+        mock.patch('langbot_plugin.box.e2b_backend._AsyncSandbox', None),
+        mock.patch('langbot_plugin.box.e2b_backend._CommandResult', None),
+    ):
+        # Simulate successful import
+        import langbot_plugin.box.e2b_backend as e2b_backend
+        e2b_backend._e2b_available = True
+        e2b_backend._AsyncSandbox = mock_async_sandbox
+        yield mock_async_sandbox
+
+
+# ── Path adaptation ────────────────────────────────────────────────────
+
+def test_adapt_path_workspace():
+    """_adapt_path_for_e2b maps /workspace to /home/user/workspace."""
+    assert _adapt_path_for_e2b('/workspace') == '/home/user/workspace'
+    assert _adapt_path_for_e2b('/workspace/subdir') == '/home/user/workspace/subdir'
+
+
+def test_adapt_path_other_paths_unchanged():
+    """_adapt_path_for_e2b doesn't modify paths not starting with /workspace."""
+    assert _adapt_path_for_e2b('/home/user') == '/home/user'
+    assert _adapt_path_for_e2b('/tmp') == '/tmp'
+    assert _adapt_path_for_e2b('/code') == '/code'
+
+
+# ── is_available ──────────────────────────────────────────────────────
+
+@pytest.mark.anyio
+async def test_is_available_no_package(backend):
+    """is_available returns False when e2b package is not installed."""
+    with mock.patch('langbot_plugin.box.e2b_backend._check_e2b_available', return_value=False):
+        assert await backend.is_available() is False
+
+
+@pytest.mark.anyio
+async def test_is_available_no_api_key(backend):
+    """is_available returns False when E2B_API_KEY is not set."""
+    backend._api_key = None
+    with mock.patch('langbot_plugin.box.e2b_backend._check_e2b_available', return_value=True):
+        assert await backend.is_available() is False
+
+
+@pytest.mark.anyio
+async def test_is_available_with_api_key(backend):
+    """is_available returns True when both package and API key are available."""
+    backend._api_key = 'test-api-key'
+    with mock.patch('langbot_plugin.box.e2b_backend._check_e2b_available', return_value=True):
+        assert await backend.is_available() is True
+
+
+@pytest.mark.anyio
+async def test_configure_from_langbot(backend, mock_e2b_module):
+    """configure() applies settings from LangBot config.yaml."""
+    backend.configure({
+        'api_key': 'config-api-key',
+        'api_url': 'http://127.0.0.1:3000',
+        'template': 'python-3.11',
+    })
+    await backend.initialize()
+
+    # Environment variable takes precedence, so if not set, use config
+    assert backend._api_key == 'config-api-key'
+    assert backend._api_url == 'http://127.0.0.1:3000'
+    assert backend._template == 'python-3.11'
+
+
+@pytest.mark.anyio
+async def test_env_vars_override_config(backend, mock_e2b_module):
+    """Environment variables take precedence over config.yaml values."""
+    with mock.patch.dict('os.environ', {'E2B_API_KEY': 'env-api-key', 'E2B_API_URL': 'http://env-url'}):
+        backend.configure({
+            'api_key': 'config-api-key',
+            'api_url': 'http://config-url',
+        })
+        await backend.initialize()
+
+        # Environment variables should win
+        assert backend._api_key == 'env-api-key'
+        assert backend._api_url == 'http://env-url'
+
+
+# ── start_session ─────────────────────────────────────────────────────
+
+@pytest.mark.anyio
+async def test_start_session_basic(backend, mock_e2b_module):
+    """start_session creates sandbox with default parameters."""
+    backend._api_key = 'test-api-key'
+    spec = BoxSpec(session_id='sess1', cmd='echo hi')
+
+    info = await backend.start_session(spec)
+
+    assert info.backend_name == 'e2b'
+    assert info.session_id == 'sess1'
+    assert info.backend_session_id == 'sandbox-test-123'
+    # Session metadata keeps LangBot's logical mount path so later specs
+    # with /workspace can reuse the same session.
+    assert info.mount_path == '/workspace'
+
+    # Verify AsyncSandbox.create was called with api_key
+    mock_e2b_module.create.assert_called_once()
+    call_kwargs = mock_e2b_module.create.call_args.kwargs
+    assert call_kwargs.get('api_key') == 'test-api-key'
+
+
+@pytest.mark.anyio
+async def test_start_session_with_template(backend, mock_e2b_module):
+    """start_session passes template parameter when image is specified."""
+    backend._api_key = 'test-api-key'
+    spec = BoxSpec(
+        session_id='sess2',
+        cmd='python script.py',
+        image='python-3.11',
+    )
+
+    info = await backend.start_session(spec)
+
+    assert info.image == 'python-3.11'
+
+    # Verify template was passed
+    call_kwargs = mock_e2b_module.create.call_args.kwargs
+    assert call_kwargs.get('template') == 'python-3.11'
+
+
+@pytest.mark.anyio
+async def test_start_session_with_envs(backend, mock_e2b_module):
+    """start_session passes environment variables."""
+    backend._api_key = 'test-api-key'
+    spec = BoxSpec(
+        session_id='sess3',
+        cmd='echo $FOO',
+        env={'FOO': 'bar', 'DEBUG': '1'},
+    )
+
+    info = await backend.start_session(spec)
+
+    call_kwargs = mock_e2b_module.create.call_args.kwargs
+    assert call_kwargs.get('envs') == {'FOO': 'bar', 'DEBUG': '1'}
+
+
+@pytest.mark.anyio
+async def test_start_session_with_api_url(backend, mock_e2b_module):
+    """start_session passes domain for CubeSandbox self-deployment."""
+    backend._api_key = 'dummy'
+    backend._api_url = 'http://127.0.0.1:3000'
+    spec = BoxSpec(session_id='sess4', cmd='ls')
+
+    info = await backend.start_session(spec)
+
+    call_kwargs = mock_e2b_module.create.call_args.kwargs
+    assert call_kwargs.get('domain') == 'http://127.0.0.1:3000'
+
+
+@pytest.mark.anyio
+async def test_start_session_custom_mount_path(backend, mock_e2b_module):
+    """start_session adapts custom mount_path."""
+    backend._api_key = 'test-api-key'
+    spec = BoxSpec(
+        session_id='sess5',
+        cmd='ls',
+        mount_path='/workspace/myproject',
+    )
+
+    info = await backend.start_session(spec)
+
+    # Session metadata keeps the logical mount path; command execution adapts
+    # it to E2B's internal writable path.
+    assert info.mount_path == '/workspace/myproject'
+
+
+# ── CubeSandbox host-mount metadata ───────────────────────────────────
+
+@pytest.mark.anyio
+async def test_start_session_host_mount_rw(backend, mock_e2b_module):
+    """host_path with rw mode generates correct metadata."""
+    backend._api_key = 'test-api-key'
+    spec = BoxSpec(
+        session_id='sess-hp-rw',
+        cmd='ls',
+        host_path='/data/project',
+        host_path_mode=BoxHostMountMode.READ_WRITE,
+        mount_path='/workspace',
+    )
+
+    info = await backend.start_session(spec)
+
+    call_kwargs = mock_e2b_module.create.call_args.kwargs
+    metadata = call_kwargs.get('metadata', {})
+
+    assert 'host-mount' in metadata
+    host_mount = json.loads(metadata['host-mount'])
+    assert len(host_mount) == 1
+    assert host_mount[0]['hostPath'] == '/data/project'
+    # mountPath should be adapted
+    assert host_mount[0]['mountPath'] == '/home/user/workspace'
+    assert host_mount[0]['readOnly'] is False
+
+
+@pytest.mark.anyio
+async def test_start_session_host_mount_ro(backend, mock_e2b_module):
+    """host_path with ro mode generates readOnly=True in metadata."""
+    backend._api_key = 'test-api-key'
+    spec = BoxSpec(
+        session_id='sess-hp-ro',
+        cmd='cat file.txt',
+        host_path='/data/source',
+        host_path_mode=BoxHostMountMode.READ_ONLY,
+        mount_path='/src',  # Non-workspace path stays unchanged
+    )
+
+    info = await backend.start_session(spec)
+
+    call_kwargs = mock_e2b_module.create.call_args.kwargs
+    metadata = call_kwargs.get('metadata', {})
+
+    host_mount = json.loads(metadata['host-mount'])
+    assert host_mount[0]['readOnly'] is True
+    # Non-workspace path stays unchanged
+    assert host_mount[0]['mountPath'] == '/src'
+
+
+@pytest.mark.anyio
+async def test_start_session_no_host_mount_when_none(backend, mock_e2b_module):
+    """host_path_mode=none skips host-mount metadata."""
+    backend._api_key = 'test-api-key'
+    spec = BoxSpec(
+        session_id='sess-hp-none',
+        cmd='ls',
+        host_path='/data',
+        host_path_mode=BoxHostMountMode.NONE,
+    )
+
+    info = await backend.start_session(spec)
+
+    call_kwargs = mock_e2b_module.create.call_args.kwargs
+    assert 'host-mount' not in call_kwargs.get('metadata', {})
+
+
+@pytest.mark.anyio
+async def test_start_session_no_host_mount_when_empty(backend, mock_e2b_module):
+    """Empty host_path skips host-mount metadata."""
+    backend._api_key = 'test-api-key'
+    spec = BoxSpec(session_id='sess-no-hp', cmd='ls')
+
+    info = await backend.start_session(spec)
+
+    call_kwargs = mock_e2b_module.create.call_args.kwargs
+    assert 'metadata' not in call_kwargs or 'host-mount' not in call_kwargs.get('metadata', {})
+
+
+# ── exec ──────────────────────────────────────────────────────────────
+
+@pytest.mark.anyio
+async def test_exec_success(backend, mock_e2b_module):
+    """exec runs command and returns result."""
+    backend._api_key = 'test-api-key'
+
+    session = BoxSessionInfo(
+        session_id='exec-sess',
+        backend_name='e2b',
+        backend_session_id='sandbox-123',
+        image='base',
+        network=BoxNetworkMode.OFF,
+        created_at='2024-01-01T00:00:00+00:00',
+        last_used_at='2024-01-01T00:00:00+00:00',
+    )
+    spec = BoxSpec(session_id='exec-sess', cmd='echo hello', workdir='/workspace', env={'FOO': 'bar'})
+
+    result = await backend.exec(session, spec)
+
+    assert result.status == BoxExecutionStatus.COMPLETED
+    assert result.exit_code == 0
+    assert result.stdout == 'output'
+
+    # Verify connect and run were called
+    mock_e2b_module.connect.assert_called_once()
+    mock_e2b_module.commands.run.assert_called_once()
+
+    # Verify command includes path adaptation
+    run_kwargs = mock_e2b_module.commands.run.call_args.kwargs
+    assert '/home/user/workspace' in run_kwargs['cmd']
+
+
+@pytest.mark.anyio
+async def test_exec_timeout(backend, mock_e2b_module):
+    """exec handles timeout correctly."""
+    backend._api_key = 'test-api-key'
+
+    # Mock timeout error
+    mock_e2b_module.commands.run = mock.AsyncMock(
+        side_effect=Exception('Command timed out after 30 seconds')
+    )
+
+    session = BoxSessionInfo(
+        session_id='timeout-sess',
+        backend_name='e2b',
+        backend_session_id='sandbox-456',
+        image='base',
+        network=BoxNetworkMode.OFF,
+        created_at='2024-01-01T00:00:00+00:00',
+        last_used_at='2024-01-01T00:00:00+00:00',
+    )
+    spec = BoxSpec(session_id='timeout-sess', cmd='sleep 100', timeout_sec=30)
+
+    result = await backend.exec(session, spec)
+
+    assert result.status == BoxExecutionStatus.TIMED_OUT
+    assert result.exit_code is None
+    assert 'timed out' in result.stderr.lower()
+
+
+@pytest.mark.anyio
+async def test_exec_truncates_large_output(backend, mock_e2b_module):
+    """exec truncates output exceeding the limit."""
+    backend._api_key = 'test-api-key'
+
+    # Create large output (over 1MB)
+    large_output = 'x' * (2 * 1024 * 1024)  # 2MB
+    mock_command_result = mock.MagicMock()
+    mock_command_result.stdout = large_output
+    mock_command_result.stderr = ''
+    mock_command_result.exit_code = 0
+
+    mock_commands = mock.MagicMock()
+    mock_commands.run = mock.AsyncMock(return_value=mock_command_result)
+    mock_e2b_module.commands = mock_commands
+
+    session = BoxSessionInfo(
+        session_id='truncate-sess',
+        backend_name='e2b',
+        backend_session_id='sandbox-789',
+        image='base',
+        network=BoxNetworkMode.OFF,
+        created_at='2024-01-01T00:00:00+00:00',
+        last_used_at='2024-01-01T00:00:00+00:00',
+    )
+    spec = BoxSpec(session_id='truncate-sess', cmd='cat large_file')
+
+    result = await backend.exec(session, spec)
+
+    assert 'clipped' in result.stdout
+
+
+# ── stop_session ──────────────────────────────────────────────────────
+
+@pytest.mark.anyio
+async def test_stop_session(backend, mock_e2b_module):
+    """stop_session kills the sandbox."""
+    backend._api_key = 'test-api-key'
+
+    session = BoxSessionInfo(
+        session_id='stop-sess',
+        backend_name='e2b',
+        backend_session_id='sandbox-to-kill',
+        image='base',
+        network=BoxNetworkMode.OFF,
+        created_at='2024-01-01T00:00:00+00:00',
+        last_used_at='2024-01-01T00:00:00+00:00',
+    )
+
+    await backend.stop_session(session)
+
+    # Verify AsyncSandbox.kill was called
+    mock_e2b_module.kill.assert_called_once()
+
+
+@pytest.mark.anyio
+async def test_stop_session_handles_error(backend, mock_e2b_module):
+    """stop_session logs error but doesn't raise on kill failure."""
+    backend._api_key = 'test-api-key'
+
+    mock_e2b_module.kill = mock.AsyncMock(side_effect=Exception('Sandbox not found'))
+
+    session = BoxSessionInfo(
+        session_id='stop-fail',
+        backend_name='e2b',
+        backend_session_id='sandbox-missing',
+        image='base',
+        network=BoxNetworkMode.OFF,
+        created_at='2024-01-01T00:00:00+00:00',
+        last_used_at='2024-01-01T00:00:00+00:00',
+    )
+
+    # Should not raise
+    await backend.stop_session(session)
+
+
+# ── _check_e2b_available ──────────────────────────────────────────────
+
+def test_check_e2b_available_caches_result():
+    """_check_e2b_available caches the import check result."""
+    # Reset the cache
+    import langbot_plugin.box.e2b_backend as e2b_backend
+    e2b_backend._e2b_available = None
+
+    # First call
+    with mock.patch.dict('sys.modules', {'e2b': mock.MagicMock()}):
+        result1 = _check_e2b_available()
+
+    # Second call should use cached result
+    result2 = _check_e2b_available()
+
+    assert result1 == result2
+
+
+def test_check_e2b_available_returns_false_on_import_error():
+    """_check_e2b_available returns False when import fails."""
+    import langbot_plugin.box.e2b_backend as e2b_backend
+    e2b_backend._e2b_available = None
+    e2b_backend._AsyncSandbox = None
+
+    with mock.patch('builtins.__import__', side_effect=ImportError('No e2b')):
+        result = _check_e2b_available()
+
+    assert result is False
diff --git a/tests/box/test_nsjail_backend.py b/tests/box/test_nsjail_backend.py
new file mode 100644
index 0000000..2a45b19
--- /dev/null
+++ b/tests/box/test_nsjail_backend.py
@@ -0,0 +1,452 @@
+"""Unit tests for NsjailBackend.
+
+These tests do NOT require nsjail to be installed – they mock subprocess
+calls and filesystem checks to verify argument construction, session
+directory management, and cgroup detection logic.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+import pathlib
+from unittest import mock
+
+import pytest
+
+from langbot_plugin.box.nsjail_backend import (
+    NsjailBackend,
+    _READONLY_ETC_ENTRIES,
+    _READONLY_SYSTEM_MOUNTS,
+)
+from langbot_plugin.box.models import (
+    BoxExecutionStatus,
+    BoxHostMountMode,
+    BoxMountSpec,
+    BoxNetworkMode,
+    BoxSessionInfo,
+    BoxSpec,
+)
+
+
+@pytest.fixture
+def logger():
+    return logging.getLogger('test.nsjail')
+
+
+@pytest.fixture
+def tmp_base(tmp_path: pathlib.Path):
+    return tmp_path / 'nsjail-base'
+
+
+@pytest.fixture
+def backend(logger, tmp_base):
+    b = NsjailBackend(logger=logger, base_dir=str(tmp_base))
+    b.instance_id = 'test123'
+    return b
+
+
+# ── is_available ──────────────────────────────────────────────────────
+
+@pytest.mark.anyio
+async def test_is_available_no_binary(backend):
+    with mock.patch('shutil.which', return_value=None):
+        assert await backend.is_available() is False
+
+
+@pytest.mark.anyio
+async def test_is_available_binary_exists(backend, tmp_base):
+    with (
+        mock.patch('shutil.which', return_value='/usr/bin/nsjail'),
+        mock.patch('asyncio.create_subprocess_exec') as mock_exec,
+    ):
+        mock_proc = mock.AsyncMock()
+        mock_proc.returncode = 0
+        mock_proc.wait = mock.AsyncMock(return_value=0)
+        mock_exec.return_value = mock_proc
+
+        result = await backend.is_available()
+        assert result is True
+        assert tmp_base.exists()
+
+
+# ── start_session ─────────────────────────────────────────────────────
+
+@pytest.mark.anyio
+async def test_start_session_creates_directories(backend, tmp_base):
+    tmp_base.mkdir(parents=True, exist_ok=True)
+    spec = BoxSpec(session_id='sess1', cmd='echo hi')
+
+    info = await backend.start_session(spec)
+
+    session_dir = pathlib.Path(info.backend_session_id)
+    assert session_dir.exists()
+    assert (session_dir / 'root').is_dir()
+    assert (session_dir / 'workspace').is_dir()
+    assert (session_dir / 'tmp').is_dir()
+    assert (session_dir / 'home').is_dir()
+    assert (session_dir / 'meta.json').exists()
+
+    assert info.backend_name == 'nsjail'
+    assert info.session_id == 'sess1'
+    assert info.image == spec.image
+    assert info.read_only_rootfs is True
+
+
+@pytest.mark.anyio
+async def test_start_session_with_host_path(backend, tmp_base):
+    tmp_base.mkdir(parents=True, exist_ok=True)
+    spec = BoxSpec(
+        session_id='sess2',
+        cmd='ls',
+        host_path='/some/path',
+        host_path_mode=BoxHostMountMode.READ_WRITE,
+        mount_path='/project',
+    )
+
+    info = await backend.start_session(spec)
+    assert info.host_path == '/some/path'
+    assert info.host_path_mode == BoxHostMountMode.READ_WRITE
+    assert info.mount_path == '/project'
+
+
+# ── stop_session ──────────────────────────────────────────────────────
+
+@pytest.mark.anyio
+async def test_stop_session_removes_directory(backend, tmp_base):
+    tmp_base.mkdir(parents=True, exist_ok=True)
+    spec = BoxSpec(session_id='sess-rm', cmd='echo')
+
+    info = await backend.start_session(spec)
+    session_dir = pathlib.Path(info.backend_session_id)
+    assert session_dir.exists()
+
+    await backend.stop_session(info)
+    assert not session_dir.exists()
+
+
+# ── nsjail argument construction ──────────────────────────────────────
+
+def test_build_nsjail_args_basic(backend, tmp_base):
+    tmp_base.mkdir(parents=True, exist_ok=True)
+    session_dir = tmp_base / 'test_session'
+    for d in ('root', 'workspace', 'tmp', 'home'):
+        (session_dir / d).mkdir(parents=True)
+
+    spec = BoxSpec(session_id='s1', cmd='echo hello', env={'FOO': 'bar'})
+    session = BoxSessionInfo(
+        session_id='s1',
+        backend_name='nsjail',
+        backend_session_id=str(session_dir),
+        image=spec.image,
+        network=BoxNetworkMode.OFF,
+        created_at='2024-01-01T00:00:00+00:00',
+        last_used_at='2024-01-01T00:00:00+00:00',
+    )
+
+    args = backend._build_nsjail_args(session, spec, session_dir)
+
+    assert args[0] == 'nsjail'
+    assert '--mode' in args
+    assert args[args.index('--mode') + 1] == 'o'
+    assert '--chroot' in args
+    assert args[args.index('--chroot') + 1] == str(session_dir / 'root')
+    assert '--clone_newnet' not in args
+    assert '--clone_newuser' not in args
+    assert '--clone_newns' not in args
+    assert '--disable_clone_newnet' not in args
+    assert '--really_quiet' in args
+
+    # Writable mounts should reference session directories.
+    rw_binds = [args[i + 1] for i, a in enumerate(args) if a == '--bindmount']
+    workspace_mount = f'{session_dir}/workspace:/workspace'
+    assert workspace_mount in rw_binds
+
+    # Custom env should be present.
+    env_values = [args[i + 1] for i, a in enumerate(args) if a == '--env']
+    assert 'FOO=bar' in env_values
+
+    # Command is the last part after '--'.
+    separator_idx = args.index('--')
+    assert args[separator_idx + 1] == '/bin/sh'
+
+    # Mount target directories are created under the per-session chroot root.
+    assert (session_dir / 'root' / 'workspace').is_dir()
+    assert (session_dir / 'root' / 'tmp').is_dir()
+    assert (session_dir / 'root' / 'home').is_dir()
+
+
+def test_build_nsjail_args_network_on(backend, tmp_base):
+    tmp_base.mkdir(parents=True, exist_ok=True)
+    session_dir = tmp_base / 'test_session_net'
+    for d in ('root', 'workspace', 'tmp', 'home'):
+        (session_dir / d).mkdir(parents=True)
+
+    session = BoxSessionInfo(
+        session_id='s2',
+        backend_name='nsjail',
+        backend_session_id=str(session_dir),
+        image='host',
+        network=BoxNetworkMode.ON,
+        created_at='2024-01-01T00:00:00+00:00',
+        last_used_at='2024-01-01T00:00:00+00:00',
+    )
+    spec = BoxSpec(session_id='s2', cmd='curl http://example.com', network=BoxNetworkMode.ON)
+
+    args = backend._build_nsjail_args(session, spec, session_dir)
+
+    assert '--disable_clone_newnet' in args
+    assert '--clone_newnet' not in args
+
+
+def test_build_nsjail_args_host_path_ro(backend, tmp_base):
+    tmp_base.mkdir(parents=True, exist_ok=True)
+    session_dir = tmp_base / 'test_hp'
+    for d in ('root', 'workspace', 'tmp', 'home'):
+        (session_dir / d).mkdir(parents=True)
+
+    session = BoxSessionInfo(
+        session_id='s3',
+        backend_name='nsjail',
+        backend_session_id=str(session_dir),
+        image='host',
+        network=BoxNetworkMode.OFF,
+        host_path='/data/project',
+        host_path_mode=BoxHostMountMode.READ_ONLY,
+        created_at='2024-01-01T00:00:00+00:00',
+        last_used_at='2024-01-01T00:00:00+00:00',
+    )
+    spec = BoxSpec(
+        session_id='s3',
+        cmd='ls',
+        host_path='/data/project',
+        host_path_mode=BoxHostMountMode.READ_ONLY,
+    )
+
+    args = backend._build_nsjail_args(session, spec, session_dir)
+
+    ro_binds = [args[i + 1] for i, a in enumerate(args) if a == '--bindmount_ro']
+    assert '/data/project:/workspace' in ro_binds
+
+
+def test_build_nsjail_args_uses_custom_mount_path(backend, tmp_base):
+    tmp_base.mkdir(parents=True, exist_ok=True)
+    session_dir = tmp_base / 'test_custom_mount'
+    for d in ('root', 'workspace', 'tmp', 'home'):
+        (session_dir / d).mkdir(parents=True)
+
+    session = BoxSessionInfo(
+        session_id='s4',
+        backend_name='nsjail',
+        backend_session_id=str(session_dir),
+        image='host',
+        network=BoxNetworkMode.OFF,
+        host_path='/data/project',
+        host_path_mode=BoxHostMountMode.READ_WRITE,
+        mount_path='/project',
+        created_at='2024-01-01T00:00:00+00:00',
+        last_used_at='2024-01-01T00:00:00+00:00',
+    )
+    spec = BoxSpec(
+        session_id='s4',
+        cmd='pwd',
+        workdir='/project/src',
+        host_path='/data/project',
+        host_path_mode=BoxHostMountMode.READ_WRITE,
+        mount_path='/project',
+    )
+
+    args = backend._build_nsjail_args(session, spec, session_dir)
+
+    rw_binds = [args[i + 1] for i, a in enumerate(args) if a == '--bindmount']
+    assert '/data/project:/project' in rw_binds
+    assert args[args.index('--cwd') + 1] == '/project/src'
+    assert (session_dir / 'root' / 'project').is_dir()
+
+
+def test_build_nsjail_args_extra_mounts_prepare_targets(backend, tmp_base):
+    tmp_base.mkdir(parents=True, exist_ok=True)
+    session_dir = tmp_base / 'test_extra_mount'
+    for d in ('root', 'workspace', 'tmp', 'home'):
+        (session_dir / d).mkdir(parents=True)
+
+    session = BoxSessionInfo(
+        session_id='s5',
+        backend_name='nsjail',
+        backend_session_id=str(session_dir),
+        image='host',
+        network=BoxNetworkMode.OFF,
+        created_at='2024-01-01T00:00:00+00:00',
+        last_used_at='2024-01-01T00:00:00+00:00',
+    )
+    spec = BoxSpec(
+        session_id='s5',
+        cmd='ls /workspace/.skills/demo',
+        extra_mounts=[
+            BoxMountSpec(
+                host_path='/data/skills/demo',
+                mount_path='/workspace/.skills/demo',
+                mode=BoxHostMountMode.READ_WRITE,
+            )
+        ],
+    )
+
+    args = backend._build_nsjail_args(session, spec, session_dir)
+
+    rw_binds = [args[i + 1] for i, a in enumerate(args) if a == '--bindmount']
+    assert '/data/skills/demo:/workspace/.skills/demo' in rw_binds
+    assert (session_dir / 'root' / 'workspace' / '.skills' / 'demo').is_dir()
+
+
+def test_build_resource_limits_cgroup(backend):
+    backend._cgroup_v2_available = True
+    spec = BoxSpec(session_id='s', cmd='x', cpus=2.0, memory_mb=1024, pids_limit=256)
+
+    args = backend._build_resource_limits(spec)
+
+    assert '--cgroup_mem_max' in args
+    mem_idx = args.index('--cgroup_mem_max')
+    assert args[mem_idx + 1] == str(1024 * 1024 * 1024)
+
+    pids_idx = args.index('--cgroup_pids_max')
+    assert args[pids_idx + 1] == '256'
+
+    cpu_idx = args.index('--cgroup_cpu_ms_per_sec')
+    assert args[cpu_idx + 1] == '2000'
+
+
+def test_build_resource_limits_rlimit_fallback(backend):
+    backend._cgroup_v2_available = False
+    spec = BoxSpec(session_id='s', cmd='x', memory_mb=512, pids_limit=128)
+
+    args = backend._build_resource_limits(spec)
+
+    assert '--rlimit_as' in args
+    as_idx = args.index('--rlimit_as')
+    assert args[as_idx + 1] == '512'
+
+    nproc_idx = args.index('--rlimit_nproc')
+    assert args[nproc_idx + 1] == '128'
+
+    # cgroup flags should NOT be present.
+    assert '--cgroup_mem_max' not in args
+
+
+# ── exec ──────────────────────────────────────────────────────────────
+
+@pytest.mark.anyio
+async def test_exec_success(backend, tmp_base):
+    tmp_base.mkdir(parents=True, exist_ok=True)
+    spec = BoxSpec(session_id='exec1', cmd='echo hello')
+    info = await backend.start_session(spec)
+
+    with mock.patch.object(backend, '_run_nsjail') as mock_run:
+        from langbot_plugin.box.backend import _CommandResult
+        mock_run.return_value = _CommandResult(
+            return_code=0, stdout='hello\n', stderr='', timed_out=False
+        )
+
+        result = await backend.exec(info, spec)
+
+    assert result.status == BoxExecutionStatus.COMPLETED
+    assert result.exit_code == 0
+    assert result.stdout == 'hello\n'
+    assert result.backend_name == 'nsjail'
+
+
+@pytest.mark.anyio
+async def test_exec_timeout(backend, tmp_base):
+    tmp_base.mkdir(parents=True, exist_ok=True)
+    spec = BoxSpec(session_id='exec2', cmd='sleep 100', timeout_sec=1)
+    info = await backend.start_session(spec)
+
+    with mock.patch.object(backend, '_run_nsjail') as mock_run:
+        from langbot_plugin.box.backend import _CommandResult
+        mock_run.return_value = _CommandResult(
+            return_code=-1, stdout='', stderr='', timed_out=True
+        )
+
+        result = await backend.exec(info, spec)
+
+    assert result.status == BoxExecutionStatus.TIMED_OUT
+    assert result.exit_code is None
+
+
+# ── cgroup detection ──────────────────────────────────────────────────
+
+def test_detect_cgroup_v2_no_mount():
+    with mock.patch.object(pathlib.Path, 'exists', return_value=False):
+        assert NsjailBackend._detect_cgroup_v2() is False
+
+
+def test_detect_cgroup_v2_root_user():
+    orig_exists = pathlib.Path.exists
+
+    def always_exists(self):
+        return True
+
+    with (
+        mock.patch('os.getuid', return_value=0),
+        mock.patch.object(pathlib.Path, 'exists', always_exists),
+    ):
+        assert NsjailBackend._detect_cgroup_v2() is True
+
+
+def test_detect_cgroup_v2_user_slice_must_be_writable():
+    orig_exists = pathlib.Path.exists
+
+    def fake_exists(self):
+        path = str(self)
+        return path == '/sys/fs/cgroup' or path.endswith('cgroup.controllers') or 'user.slice' in path
+
+    with (
+        mock.patch('os.getuid', return_value=1000),
+        mock.patch.object(pathlib.Path, 'exists', fake_exists),
+        mock.patch('os.access', return_value=False),
+    ):
+        assert NsjailBackend._detect_cgroup_v2() is False
+
+    with (
+        mock.patch('os.getuid', return_value=1000),
+        mock.patch.object(pathlib.Path, 'exists', fake_exists),
+        mock.patch('os.access', return_value=True),
+    ):
+        assert NsjailBackend._detect_cgroup_v2() is True
+
+
+# ── cleanup_orphaned_containers ───────────────────────────────────────
+
+@pytest.mark.anyio
+async def test_cleanup_orphaned_removes_old_sessions(backend, tmp_base):
+    tmp_base.mkdir(parents=True, exist_ok=True)
+
+    # Create a dir from a different instance.
+    old_dir = tmp_base / 'oldinst_sess1_abc'
+    old_dir.mkdir()
+    (old_dir / 'workspace').mkdir()
+
+    # Create a dir from current instance.
+    current_dir = tmp_base / 'test123_sess2_def'
+    current_dir.mkdir()
+    (current_dir / 'workspace').mkdir()
+
+    with mock.patch.object(backend, '_kill_session_processes', new_callable=mock.AsyncMock):
+        await backend.cleanup_orphaned_containers('test123')
+
+    assert not old_dir.exists()
+    assert current_dir.exists()
+
+
+# ── output clipping ──────────────────────────────────────────────────
+
+def test_clip_captured_bytes_within_limit():
+    data = b'hello world'
+    result = NsjailBackend._clip_captured_bytes(data, len(data))
+    assert result == 'hello world'
+
+
+def test_clip_captured_bytes_exceeds_limit():
+    data = b'hello'
+    result = NsjailBackend._clip_captured_bytes(data, 2_000_000, limit=1_000_000)
+    assert 'clipped' in result
+    assert '1000000' in result
diff --git a/tests/box/test_skill_store.py b/tests/box/test_skill_store.py
new file mode 100644
index 0000000..dcba96f
--- /dev/null
+++ b/tests/box/test_skill_store.py
@@ -0,0 +1,88 @@
+from __future__ import annotations
+
+import io
+import zipfile
+
+from langbot_plugin.box.skill_store import BoxSkillStore
+
+
+def _skill_zip(name: str = 'demo') -> bytes:
+    buffer = io.BytesIO()
+    with zipfile.ZipFile(buffer, 'w') as zf:
+        zf.writestr(
+            f'{name}/SKILL.md',
+            '---\n'
+            f'name: {name}\n'
+            f'display_name: {name.title()}\n'
+            'description: Demo skill\n'
+            '---\n\n'
+            'Use this skill for tests.\n',
+        )
+        zf.writestr(f'{name}/notes.txt', 'hello')
+    return buffer.getvalue()
+
+
+def _nested_skill_zip() -> bytes:
+    buffer = io.BytesIO()
+    with zipfile.ZipFile(buffer, 'w') as zf:
+        zf.writestr(
+            'repo/packages/alpha/SKILL.md',
+            '---\nname: alpha\ndisplay_name: Alpha\n---\n\nAlpha instructions.\n',
+        )
+        zf.writestr(
+            'repo/packages/beta/SKILL.md',
+            '---\nname: beta\ndisplay_name: Beta\n---\n\nBeta instructions.\n',
+        )
+    return buffer.getvalue()
+
+
+def test_skill_store_installs_zip_under_configured_relative_skills_root(tmp_path):
+    store = BoxSkillStore({
+        'local': {
+            'host_root': str(tmp_path),
+            'skills_root': 'custom-skills',
+        }
+    })
+
+    preview = store.preview_zip_upload(file_bytes=_skill_zip(), filename='demo.zip')
+    assert preview[0]['package_root'] == str(tmp_path / 'custom-skills' / 'demo-upload')
+
+    installed = store.install_zip_upload(file_bytes=_skill_zip(), filename='demo.zip')
+    assert installed[0]['name'] == 'demo'
+    assert installed[0]['package_root'] == str(tmp_path / 'custom-skills' / 'demo-upload')
+
+    files = store.list_skill_files('demo')
+    assert {entry['name'] for entry in files['entries']} == {'SKILL.md', 'notes.txt'}
+
+    content = store.read_skill_file('demo', 'notes.txt')
+    assert content['content'] == 'hello'
+
+    store.write_skill_file('demo', 'notes.txt', 'updated')
+    assert store.read_skill_file('demo', 'notes.txt')['content'] == 'updated'
+
+
+def test_skill_store_supports_source_subdir_before_selecting_candidates(tmp_path):
+    store = BoxSkillStore({
+        'local': {
+            'host_root': str(tmp_path),
+            'skills_root': 'skills',
+        }
+    })
+
+    preview = store.preview_zip_upload(
+        file_bytes=_nested_skill_zip(),
+        filename='repo.zip',
+        source_subdir='packages',
+    )
+
+    assert [skill['source_path'] for skill in preview] == ['alpha', 'beta']
+
+    installed = store.install_zip_upload(
+        file_bytes=_nested_skill_zip(),
+        filename='repo.zip',
+        source_subdir='packages',
+        source_paths=['beta'],
+    )
+
+    assert [skill['name'] for skill in installed] == ['beta']
+    assert installed[0]['package_root'] == str(tmp_path / 'skills' / 'repo-beta-upload')