diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml index 34f89f572..c33684012 100644 --- a/.github/workflows/run-tests.yml +++ b/.github/workflows/run-tests.yml @@ -15,14 +15,10 @@ on: branches: - master - develop - paths: - - 'src/langbot/**' - - 'tests/**' - - '.github/workflows/run-tests.yml' - - 'pyproject.toml' - - 'uv.lock' - - 'run_tests.sh' - - 'scripts/test-*.sh' + - 'feat/**' + # No path filter on push: every push to the branches above runs the + # full unit-test suite. feat/** branches in particular must be tested + # on every push (they accumulate large changes before a PR exists). jobs: test: diff --git a/README_CN.md b/README_CN.md index c735fcc17..54b002800 100644 --- a/README_CN.md +++ b/README_CN.md @@ -25,7 +25,7 @@ 文档APICloud | -插件市场 | +扩展市场路线图 diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index d3ba8ad90..3fccee390 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -18,6 +18,40 @@ services: networks: - langbot_network + # The Box sandbox runtime is optional. It is only started when you run + # ``docker compose --profile box up`` (or ``docker compose --profile all + # up``). With Box off, LangBot keeps the dashboard / skills list visible + # (read-only) but disables sandbox tools, skill add/edit and stdio MCP — + # set ``box.enabled: false`` in ``data/config.yaml`` (or + # ``BOX__ENABLED=false`` in the langbot service env below) to match. + langbot_box: + image: rockchin/langbot:latest + container_name: langbot_box + profiles: ["box", "all"] + volumes: + # Keep the source and target path identical because langbot_box uses the + # host Docker socket to create sandbox containers. Override + # LANGBOT_BOX_ROOT with an absolute path if you do not want the default. + - ${LANGBOT_BOX_ROOT:-${PWD}/data/box}:${LANGBOT_BOX_ROOT:-${PWD}/data/box} + # Mount container runtime socket for Box sandbox backend. + # Uncomment the one that matches your container runtime: + # - /var/run/podman/podman.sock:/var/run/podman/podman.sock # Podman + - /var/run/docker.sock:/var/run/docker.sock # Docker + restart: on-failure + environment: + - TZ=Asia/Shanghai + # The Box runtime does NOT read box.local.* from config.yaml or env; it + # receives its configuration from LangBot via the INIT RPC action. + # Do not add LANGBOT_BOX_* / BOX__* here — they would be silently ignored. + # Launched through the same CLI entry point as the plugin runtime + # (`langbot_plugin.cli.__init__ `). WebSocket is the default + # control transport — mirrors `rt`, which also runs with no flag. Pass + # `-s` / `--stdio-control` only for the stdio mode LangBot uses outside + # containers. + command: ["uv", "run", "--no-sync", "-m", "langbot_plugin.cli.__init__", "box"] + networks: + - langbot_network + langbot: image: rockchin/langbot:latest container_name: langbot @@ -26,6 +60,13 @@ services: restart: on-failure environment: - TZ=Asia/Shanghai + # Unified env-override convention: SECTION__SUBSECTION__KEY overrides the + # matching config.yaml field (see LoadConfigStage). These map onto + # box.local.* and are forwarded to the Box runtime via INIT RPC. + - BOX__LOCAL__HOST_ROOT=${LANGBOT_BOX_ROOT:-${PWD}/data/box} + - BOX__LOCAL__DEFAULT_WORKSPACE=default + - BOX__LOCAL__SKILLS_ROOT=skills + - BOX__LOCAL__ALLOWED_MOUNT_ROOTS=${LANGBOT_BOX_ROOT:-${PWD}/data/box} ports: - 5300:5300 # For web ui and webhook callback - 2280-2285:2280-2285 # For platform reverse connection @@ -34,4 +75,4 @@ services: networks: langbot_network: - driver: bridge \ No newline at end of file + driver: bridge diff --git a/docs/review/box-architecture.md b/docs/review/box-architecture.md new file mode 100644 index 000000000..7a7940a6a --- /dev/null +++ b/docs/review/box-architecture.md @@ -0,0 +1,594 @@ +# Box 系统架构深度分析 + +> 更新日期: 2026-05-19 +> 分支: `feat/sandbox` (LangBot + langbot-plugin-sdk) +> 相关文档: [问题清单](./box-issues.md) | [Session 作用域](./box-session-scope.md) | [Runtime 对比](./box-vs-plugin-runtime.md) | [测试覆盖](./box-test-coverage.md) | [toB 分析](./box-tob-analysis.md) + +--- + +## 1. 全局架构 + +``` +┌──────────────────────────────────────────────────────────────────┐ +│ LangBot 主进程 │ +│ │ +│ LocalAgentRunner ──> ToolManager ──> NativeToolLoader │ +│ │ │ │ │ +│ │ │ exec / read / write / edit │ +│ │ │ glob / grep │ +│ │ │ │ +│ │ ├──> MCPLoader ──> BoxStdioSession │ +│ │ │ (shared 容器, 多 process) │ +│ │ │ │ +│ │ ├──> SkillToolLoader (activate 工具) │ +│ │ │ │ +│ │ ├──> SkillAuthoringToolLoader │ +│ │ │ │ +│ │ └──> PluginToolLoader │ +│ │ │ +│ BoxService (门面) │ +│ ├─ Profile 管理 (locked 字段) │ +│ ├─ Host mount 校验 (allowed_mount_roots) │ +│ ├─ Workspace quota 检查 │ +│ ├─ 输出截断 (head+tail) │ +│ ├─ Session ID 模板解析 (resolve_box_session_id) │ +│ ├─ 技能挂载组装 (build_skill_extra_mounts) │ +│ ├─ 重连循环 (_reconnect_loop, 指数退避) │ +│ └─ BoxRuntimeConnector │ +│ ├─ 心跳 loop (20s ping) │ +│ └─ ActionRPCBoxClient │ +│ │ Action RPC (stdio 或 WebSocket) │ +│ │ +│ SkillManager (skill_mgr) │ +│ └─ 从 Box runtime 拉取 skills, 不可用时回落 data/skills │ +└──────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌──────────────────────────────────────────────────────────────────┐ +│ Box Runtime 进程 (SDK 侧) │ +│ │ +│ BoxServerHandler (Action RPC 处理, INIT 配置注入) │ +│ │ │ +│ BoxRuntime (session 管理 / 进程生命周期 / TTL reaper) │ +│ │ └─ session.managed_processes: dict[pid, _ManagedProcess] +│ │ │ +│ Backend (启动时根据 box.backend 配置选择): │ +│ DockerBackend ──┐ │ +│ PodmanBackend ──┤── CLISandboxBackend │ +│ NsjailBackend ──┘ (本地 CLI 或 fallback 到容器内 CLI) │ +│ E2BBackend (云沙箱, 需要 E2B_API_KEY) │ +│ │ +│ BoxSkillStore │ +│ ├─ list / get / create / update / delete │ +│ ├─ scan_skill_directory / read_skill_file / write_skill_file │ +│ └─ preview_skill_zip / install_skill_zip (zip 或 GitHub) │ +│ │ +│ aiohttp 单端口服务 (默认 :5410): │ +│ /rpc/ws — Action RPC │ +│ /v1/sessions/{id}/managed-process/ws — 默认 process │ +│ /v1/sessions/{id}/managed-process/{pid}/ws — 指定 process │ +└──────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌──────────────────────────────────────────────────────────────────┐ +│ 容器 / 沙箱 (Docker/Podman 容器, nsjail sandbox, 或 E2B 远程沙箱) │ +│ - 隔离文件系统 / 网络 / PID 命名空间 │ +│ - 资源限制 (CPU, 内存, PID 数, 可选 workspace 配额) │ +│ - 主挂载 (host_path → mount_path) + 任意条 extra_mounts │ +│ └─ Skills 通过 extra_mounts 挂在 /workspace/.skills/ │ +│ - exec: 用户命令在此执行 │ +│ - managed process: 多个长驻进程并存 (MCP Server / 自定义服务) │ +└──────────────────────────────────────────────────────────────────┘ +``` + +**核心设计原则**: +- Box Runtime 作为独立进程运行,通过 Action RPC 与 LangBot 主进程通信,两者复用 SDK 的 IO 层(Handler → Connection → Controller) +- 一个 session_id 对应一个容器/沙箱实例。同一 session 内可并存多条 mount 与多个 managed process +- Skill / 默认 exec / MCP Server 共享同一个 session 容器(详见 [box-session-scope.md](./box-session-scope.md)) + +--- + +## 2. LangBot 侧模块 + +### 2.1 BoxService (`pkg/box/service.py`, 722 行) + +应用层门面,协调 Profile、安全校验、配额、连接、Skill 挂载与 Session 模板: + +主要公开方法(按定义顺序): + +``` +BoxService + ├─ initialize() 连接 Box Runtime + 默认 workspace 准备 + ├─ _on_runtime_disconnect(connector) 触发重连 + ├─ _reconnect_loop(connector) 指数退避重连 + ├─ available (property) 连接状态 + │ + ├─ resolve_box_session_id(query) 从 pipeline 模板解析 session_id + ├─ build_skill_extra_mounts(query) 组装 pipeline-bound skill 的挂载列表 + │ + ├─ execute_tool(parameters, query) Agent 调用 exec 时的入口 + │ ├─ _apply_profile / build_spec + │ ├─ _validate_host_mount + │ ├─ _enforce_workspace_quota (phase=pre) + │ ├─ client.execute(spec) + │ ├─ _enforce_workspace_quota (phase=post) + │ └─ _truncate (stdout/stderr) + │ + ├─ execute_spec_payload(spec_payload, ...) 内部入口(其他 loader 调用) + ├─ create_session(spec_payload, ...) 显式创建 session + ├─ start_managed_process(session_id, ...) 启动 managed process + ├─ get_managed_process(session_id, pid) 查询进程状态(pid 默认 'default') + ├─ stop_managed_process(session_id, pid) 单独停止某个 managed process + ├─ get_managed_process_websocket_url(...) 返回 WS attach URL + │ + ├─ list_skills() / get_skill(name) Skill 元数据 + ├─ create_skill / update_skill / delete_skill Skill CRUD + ├─ scan_skill_directory(path) 扫描目录 + ├─ list_skill_files / read_skill_file / write_skill_file + ├─ preview_skill_zip / install_skill_zip zip / GitHub 安装 + │ + ├─ shutdown() / dispose() 清理:RPC SHUTDOWN + 进程终止 + ├─ get_status() / get_sessions() / get_recent_errors() + └─ get_system_guidance() LLM 系统提示 +``` + +**Profile 系统**: 4 个内置 Profile(`default` / `offline_readonly` / `network_basic` / `network_extended`),`locked` frozenset 字段不可被 LLM 覆盖。参数合并顺序:Profile defaults → LLM 请求参数 → locked 强制值。 + +**输出截断**: 默认 4000 字符上限,保留前 60% + 后 40%,中间插入 `[...truncated...]`。 + +**Skill 挂载合并**: `execute_tool()` 调用时,`build_skill_extra_mounts(query)` 会把当前 pipeline-bound 的所有 skill 的 `package_root` 作为 `extra_mounts` 加入 BoxSpec,挂在 `/workspace/.skills/`。LLM 通过 `activate` 工具显式激活某个 skill 后,工具调用才允许引用这个 skill 的虚拟路径。 + +### 2.2 BoxRuntimeConnector (`pkg/box/connector.py`, 357 行) + +管理与 Box Runtime 的通信连接: + +- **本地 stdio**: Unix/macOS 默认路径,fork `python -m langbot_plugin.cli.__init__ box -s --ws-control-port {port}` 子进程(与 plugin runtime 统一走 `lbp` CLI 入口) +- **本地 subprocess + WS**: Windows 本地(asyncio ProactorEventLoop 不支持 stdio pipe) +- **远程 WebSocket**: Docker 部署 / `box.runtime.endpoint` 显式配置时,连接 `ws://{host}:{port}/rpc/ws` +- **同步等待**: `asyncio.Event` + `wait_for(timeout=30s)` 模式确认连接 +- **心跳**: `_heartbeat_loop()` 每 20s 调用 `ping()`,失败仅 DEBUG 日志(断开检测靠 connection close) +- **重连**: `runtime_disconnect_callback` 由 BoxService 提供,触发 `_reconnect_loop` +- **INIT 注入**: 连接建立后立即下发当前 `box.*` 配置子树(剔除 `runtime` 私有字段),Runtime 据此初始化 backend + +> **历史改进**: 2026-04-16 版本本文档曾列 P0 「Box 无心跳 / 无重连」,已修复(commit `2dfd9d5d`、`c6882cf`、`5029d9c` 等)。 + +### 2.3 BoxWorkspaceSession 工具 (`pkg/box/workspace.py`, 413 行) + +此文件目前提供两类能力: + +1. **路径与命令重写工具函数** — `normalize_host_path` / `rewrite_mounted_path` / `unwrap_venv_path` / `rewrite_venv_command` / `infer_workspace_host_path`,被 MCP loader 与 Skill 路径解析共用。 +2. **`BoxWorkspaceSession`** — 围绕 BoxService 的轻量包装,专供 MCP-in-Box 场景使用(管理一个共享 session 的 session_id、构建挂载 payload、stage host 文件到共享 workspace)。 + +**变化点**: 早期 Skill exec 会为每个 skill 创建独立 BoxWorkspaceSession(独占 session);当前实现已转为 `extra_mounts` 模式,Skill 不再独占容器,只追加挂载。这部分 wrapping 逻辑已从 native loader 移除。 + +### 2.4 policy.py (`pkg/box/policy.py`, 98 行) — 仍是死代码 + +三层安全策略设计(`SandboxPolicy` / `ToolPolicy` / `ElevatedPolicy`),全项目无任何导入或调用。详见 [问题清单 #1](./box-issues.md)。 + +### 2.5 SkillManager (`pkg/skill/manager.py`, 186 行) + +``` +SkillManager + ├─ initialize() 调用 reload_skills() + ├─ reload_skills() 先从 Box runtime list_skills(), + │ 不可用则回落 data/skills/ 扫描 + ├─ refresh_skill_from_disk() 单 skill 重新加载 + ├─ get_skill_by_name(name) + └─ get_managed_skills_root() 返回 Box 视角的 skills_root 路径 +``` + +skill 元数据通过 `parse_frontmatter` 解析 `SKILL.md` 头部(`name` / `description` / `instructions`),不再做整体扫描的代价(典型 < 50 个)。 + +### 2.6 Skill activation (`pkg/skill/activation.py`, 33 行) + Skill loader 辅助 + +历史上 skill 通过 LLM 在文本中输出 `[ACTIVATE_SKILL:name]` 标记激活;当前已改为 **Tool Call 机制**: + +- `SkillToolLoader` (`pkg/provider/tools/loaders/skill.py`, 157 行) 暴露 `activate` 工具,参数为 skill 名 +- 工具实现调用 `register_activated_skill(query, skill_data)`,将激活态写入 `query.variables['_activated_skills']` +- 这种 KV-cache-friendly 模式对齐 Claude Code 设计;详见 [box-session-scope.md §4.3](./box-session-scope.md) 的 Tool Call 描述 + +`activation.py` 现仅保留对外辅助函数(pipeline 层调用 loader 的 `register_activated_skill`)。 + +--- + +## 3. SDK 侧模块 + +### 3.1 BoxRuntime (`box/runtime.py`, 599 行) + +核心编排器,管理 session 生命周期与 backend 调度: + +``` +Session 生命周期: + + Client EXEC / CREATE_SESSION + │ + ▼ + _get_or_create_session(spec) + ├─ _reap_expired_sessions_locked() 清理 TTL 过期 session + ├─ 已存在? → _assert_session_compatible() → 复用 + ├─ Backend session 失踪? → 重建 (commit c6882cf) + └─ 新建? → backend.start_session(spec) → 创建容器 + │ └─ 应用 spec.extra_mounts (多挂载) + ▼ + execute(spec) + ├─ 获取 session lock (每 session 独立) + ├─ backend.exec(session, spec) 在容器中执行命令 + ├─ 更新 last_used_at + └─ 超时? → 销毁 session + │ + ▼ + Session 保持存活直到: + ├─ TTL 过期 (默认 300s,下次操作时清理) + ├─ 执行超时 (自动销毁) + ├─ 客户端 DELETE_SESSION + └─ SHUTDOWN +``` + +**关键设计**: +- 每 session 有独立 `asyncio.Lock`,同一 session 内的命令串行执行 +- 每 session 维护 `managed_processes: dict[process_id, _ManagedProcess]`,支持多个长驻进程并存(MCP / 自定义) +- 全局 `_lock` 保护 `_sessions` dict 的读写 +- 兼容性检查:比较核心 spec 字段,`image` 字段对不支持自定义镜像的 backend(nsjail/E2B)会跳过 + +**Backend 选择 (`_select_backend`)**: 优先级 +1. 显式 `box.backend` 配置(`docker` / `nsjail` / `e2b`) +2. `local` (默认) → Docker / Podman / nsjail CLI 顺序探测 +3. `get_status` 调用时若当前 backend 不可用,会尝试重新选择 (commit `e5617c7`) + +### 3.2 Backend 系统 + +#### CLISandboxBackend (`box/backend.py`, 411 行) + +Docker / Podman 公共基类: + +``` +start_session(spec): + 1. validate_sandbox_security(spec) + 2. docker/podman run -d --rm --name + --network none (可选) + --cpus/--memory/--pids-limit + --read-only + --tmpfs /tmp + -v :: 主挂载 + -v ::.. 额外挂载 (extra_mounts) + sh -lc 'while true; do sleep 3600; done' + 3. 返回 BoxSessionInfo + +exec(session, spec): + docker/podman exec -e KEY=VAL + sh -lc 'mkdir -p && cd && ' + +start_managed_process(session, spec): + docker/podman exec -i + sh -lc 'mkdir -p && cd && exec ' + 返回 asyncio.subprocess.Process (stdin/stdout PIPE) +``` + +容器以 idle 进程启动,实际命令通过 `docker exec` 执行。`--rm` 确保容器退出时自动清理。 + +**Windows 支持**: backend 内对 Windows 路径处理与 subprocess 调用做了适配(commit `120817a`)。 + +**孤儿清理**: 启动时枚举 `langbot.box=true` 标签的容器,instance_id 不匹配的强制删除。 + +#### NsjailBackend (`box/nsjail_backend.py`, 552 行) + +轻量级 Linux 沙箱(无容器引擎依赖): + +- 使用 namespace 隔离(user/mount/pid/ipc/uts/cgroup/net) +- 挂载宿主 `/usr`/`/lib`/`/bin`/`/sbin` 只读 + 选定 `/etc` 条目 +- 每 session 创建独立目录(workspace/tmp/home) +- 资源限制: cgroup v2 优先,fallback 到 rlimit +- **CLI 兼容**: 通过 `shutil.which(self._nsjail_bin)` 检测系统安装版 nsjail;不存在时再尝试容器内 nsjail(commit `686fcc0`、`feed530`) +- **无自定义镜像**: 使用宿主 OS,`image` 字段固定为 `'host'`,兼容性检查跳过 image + +#### E2BBackend (`box/e2b_backend.py`, 429 行) + +云沙箱后端(commit `75b547f` 引入): + +- 通过 `e2b` SDK 与 E2B 平台通信 +- 配置:`box.e2b.api_key` / `api_url` / `template` +- 支持 `extra_mounts`(commit `0fea9b1` 同步上传文件) +- 无本地容器引擎依赖,适合无 Docker 的部署或 SaaS 多租户场景 +- 不支持自定义 image 字段,由 template 控制 + +### 3.3 Server (`box/server.py`, 508 行) + +单端口 aiohttp 服务(默认 5410),通过路径区分(commit `8c71ec5` 合并端口): + +1. **Action RPC** (`/rpc/ws`): `BoxServerHandler` 处理所有 action,包括 `INIT` 配置注入、skill store 操作等 +2. **WS Relay** (`/v1/sessions/{id}/managed-process/ws` 与 `/v1/sessions/{id}/managed-process/{pid}/ws`): 双向桥接 WebSocket ↔ 指定 managed process stdin/stdout + +stdio 模式同样会在 5410 启动 aiohttp,专门承担 managed process attach;Action RPC 走 stdin/stdout。 + +### 3.4 Client (`box/client.py`, 377 行) + +`ActionRPCBoxClient` 封装 `Handler.call_action()` 调用: + +- 25+ 方法对应 25+ 个 RPC action(exec / session / managed-process / skill / status / shutdown) +- 错误还原: `_translate_action_error()` 通过字符串前缀匹配还原 SDK 侧异常类型 +- `execute()` timeout = 300s,其他默认 15s +- `BoxRuntimeClient` 是 ABC,供后续可能的非 RPC 实现复用 + +包级别 `__init__.py` 显式导出:`BoxRuntimeClient`、`ActionRPCBoxClient`(commit `df9c722`)。 + +### 3.5 Actions (`box/actions.py`, 34 行) + +`LangBotToBoxAction` 枚举共定义 **25 个** action: + +| 类别 | Actions | +|------|---------| +| 控制 | `INIT`、`HEALTH`、`STATUS`、`GET_BACKEND_INFO`、`SHUTDOWN` | +| 执行 | `EXEC` | +| Session | `CREATE_SESSION` / `GET_SESSION` / `GET_SESSIONS` / `DELETE_SESSION` | +| Managed Process | `START_MANAGED_PROCESS` / `GET_MANAGED_PROCESS` / `STOP_MANAGED_PROCESS` | +| Skill | `LIST_SKILLS` / `GET_SKILL` / `CREATE_SKILL` / `UPDATE_SKILL` / `DELETE_SKILL` / `SCAN_SKILL_DIRECTORY` / `LIST_SKILL_FILES` / `READ_SKILL_FILE` / `WRITE_SKILL_FILE` / `PREVIEW_SKILL_ZIP` / `INSTALL_SKILL_ZIP` | + +### 3.6 Models (`box/models.py`, 331 行) + +核心数据模型: + +| 模型 | 用途 | +|------|------| +| `BoxNetworkMode` | `OFF` / `ON` | +| `BoxExecutionStatus` | `COMPLETED` / `TIMED_OUT` | +| `BoxHostMountMode` | `NONE` / `READ_ONLY` / `READ_WRITE` | +| `BoxManagedProcessStatus` | `RUNNING` / `EXITED` | +| `BoxMountSpec` | 单条挂载(host_path/mount_path/mode)— **新增** | +| `BoxSpec` | 执行请求;新增 `extra_mounts: list[BoxMountSpec]`、`persistent`、`workspace_quota_mb` | +| `BoxProfile` | 4 个内置 Profile + `locked` frozenset | +| `BoxSessionInfo` | Session 状态(含 backend_name/created_at/last_used_at) | +| `BoxManagedProcessSpec` | 长驻进程参数(process_id/command/args/env/cwd) | +| `BoxManagedProcessInfo` | 进程状态(status/exit_code/stderr_preview/attached) | +| `BoxExecutionResult` | 执行结果(status/exit_code/stdout/stderr/duration_ms) | + +`BoxSpec` 校验器: `workdir` 默认继承 `mount_path`;`host_path` 支持 POSIX 和 Windows 路径;设置 `host_path` 时 `workdir` 必须在 `mount_path` 下。 + +### 3.7 BoxSkillStore (`box/skill_store.py`, 647 行) + +新增模块(commit `4ab3502`),把 skill 持久化收归 Box runtime: + +``` +BoxSkillStore + ├─ list_skills() / get_skill(name) + ├─ create_skill(data) / update_skill(name, data) / delete_skill(name) + ├─ scan_skill_directory(path) 扫描目录返回候选 skill 包列表 + ├─ list_skill_files(name, path) 浏览 skill 内文件树 + ├─ read_skill_file(name, path) / write_skill_file(name, path, content) + ├─ preview_skill_zip(zip_bytes, ...) 不落盘预览 zip 内容 + └─ install_skill_zip(zip_bytes, ...) 解压、校验、复制到 skills_root + └─ 支持 source_subdir / target_suffix(commit 1aa043f) +``` + +GitHub 安装路径:HTTP 层(`api/http/service/skill.py`)先 `git clone` 拉取,再走 `install_skill_zip` 或 directory 路径。Skill 文件存放于 `box.local.skills_root`(默认 `skills`,相对 `host_root`),容器内对应 `/workspace/.skills/`。 + +### 3.8 Security (`box/security.py`, 52 行) + +`validate_sandbox_security()`: 黑名单校验 host_path,阻止挂载 `/etc`/`/proc`/`/sys`/`/dev`/`/root`/`/boot` 及 Docker/Podman socket。 + +**已知缺陷**: 根路径 `/` 未拦截,用户 home 目录未拦截,是 denylist 而非 allowlist 策略。详见 [问题清单 #5](./box-issues.md)。 + +### 3.9 Errors (`box/errors.py`, 33 行) + +| 异常类型 | 含义 | +|----------|------| +| `BoxError` | 基类 | +| `BoxValidationError` | spec/参数校验失败 | +| `BoxBackendUnavailableError` | 无可用 backend | +| `BoxRuntimeUnavailableError` | Runtime 服务不可用 | +| `BoxSessionConflictError` | session 已存在但 spec 不兼容 | +| `BoxSessionNotFoundError` | session 不存在 | +| `BoxManagedProcessConflictError` | session 已有同名 process | +| `BoxManagedProcessNotFoundError` | process 不存在 | + +--- + +## 4. 工具系统集成 + +### 4.1 ToolManager 编排 (`toolmgr.py`) + +``` +ToolManager.initialize() + ├─ NativeToolLoader (exec / read / write / edit / glob / grep) + ├─ PluginToolLoader (插件工具) + ├─ MCPLoader (MCP Server 工具) + ├─ SkillToolLoader (activate 工具 — Tool Call 激活) + └─ SkillAuthoringToolLoader (Skill CRUD) + +工具调用优先级: native → plugin → mcp → skill → skill_authoring +``` + +### 4.2 Native Tools (`native.py`, 846 行) + +| 工具 | 是否在 Box 中执行 | 是否访问宿主文件系统 | +|------|:---:|:---:| +| `exec` | 是 | 否 | +| `read` | **否** | **是** — 直接 `open()` 宿主文件 | +| `write` | **否** | **是** — 直接 `open()` 宿主文件 | +| `edit` | **否** | **是** — 直接 `open()` 宿主文件 | +| `glob` | **否** | **是** — 直接遍历宿主目录 | +| `grep` | **否** | **是** — 直接读宿主文件 | + +**沙箱边界不对称**: 这是刻意的设计权衡 — `read`/`write`/`edit`/`glob`/`grep` 绕过沙箱以获得性能(避免容器 I/O 开销与跨进程拷贝),但意味着 LLM 可以直接读写 `allowed_mount_roots` 下任何文件。Skill 路径经 `_resolve_host_path()` 重写,禁止穿越 `package_root`。 + +**exec 的 Skill 分支**: 命令中引用 `/workspace/.skills/` 的 skill 时: +1. 验证 skill 已激活 +2. 单次 exec 只能引用一个 skill 包 +3. 若 skill 是 Python 项目(有 `requirements.txt` 或 `pyproject.toml`),命令会被 venv bootstrap 包裹(在 skill 挂载点内创建 `.venv`) +4. 调用 `box_service.execute_tool()` → 走默认 session_id 与已组装好的 `extra_mounts`,**不再为每 skill 起独立 session** + +### 4.3 MCP-in-Box (`mcp_stdio.py`, 354 行) + +`BoxStdioSessionRuntime` 让 MCP stdio 服务器在 Box 容器中运行,**共享 session、多 process**模式(commit `529088e`): + +``` +initialize() + 1. 复用/创建共享 session (session_id = _build_box_session_id()) + - persistent=True,长期保持 + 2. workspace.execute_raw(install_cmd) 安装依赖 (可选) + 3. 将每个 MCP server 文件 stage 到 /workspace/.mcp// + 4. workspace.start_managed_process(process_id=) + 5. websocket_client(ws_url) 通过 WS relay 连接 + 6. ClientSession.initialize() MCP 协议握手 +``` + +配置 (`MCPServerBoxConfig`): `network='on'` (MCP 服务器通常需要网络),`host_path_mode='ro'` (默认只读),`startup_timeout_sec=120` (留时间给 pip install)。 + +每条 MCP server 是同一 session 中的一个 managed process,独立的 `process_id`、独立 attach URL,互不阻塞。 + +--- + +## 5. 启动与生命周期 + +### 5.1 启动顺序 (`build_app.py`) + +``` +BuildAppStage.run(ap) + ├─ ... (persistence, models, sessions) ... + │ + ├─ BoxService(ap) + ├─ box_service.initialize() + │ └─ connector.initialize() + │ ├─ [stdio] fork box subprocess + │ ├─ [subprocess+WS] Windows 本地 + │ └─ [remote WS] connect URL + │ └─ 启动心跳 _heartbeat_task + ├─ ap.box_service = box_service + │ + ├─ ToolManager(ap) + ├─ tool_mgr.initialize() + │ ├─ NativeToolLoader (检查 box_service.available) + │ ├─ PluginToolLoader + │ ├─ MCPLoader (Box 可用时,stdio MCP 走沙箱) + │ └─ SkillAuthoringToolLoader + ├─ ap.tool_mgr = tool_mgr + │ + ├─ ... (platform, pipeline) ... + ├─ SkillManager.initialize() (从 Box runtime 加载 skill 列表) + └─ ... (RAG, HTTP, plugins) ... +``` + +BoxService 在 ToolManager **之前**初始化。ToolManager 创建 loader 时检查 `box_service.available`。 + +### 5.2 初始化失败处理 + +```python +try: + await self._runtime_connector.initialize() + self._available = True +except Exception as e: + self._available = False + logger.warning(f"Box runtime unavailable: {e}") +``` + +**静默降级**: Box 初始化失败不会阻止应用启动,仅导致 6 个 native tool、所有 Skill 工具和 MCP-in-Box 工具不暴露给 LLM。与 Plugin 的行为不同(Plugin 失败会抛异常)。 + +### 5.3 销毁流程 + +``` +app.dispose() + └─ box_service.dispose() + ├─ connector.dispose() + │ ├─ cancel _heartbeat_task + │ ├─ cancel _handler_task / _ctrl_task + │ └─ terminate subprocess (SIGTERM) + └─ loop.create_task(client.shutdown()) + └─ RPC SHUTDOWN → Box Runtime 清理所有容器 +``` + +Box 额外做了 RPC SHUTDOWN 通知 Runtime 主动清理容器,比 Plugin 的直接杀进程更安全。 + +--- + +## 6. 配置 + +### config.yaml (重构后) + +```yaml +box: + enabled: true # 整个 Box 子系统的总开关。设为 false 时: + # - 不连接远程 Box runtime,不 fork 本地 stdio 子进程 + # - sandbox 工具 (exec/read/write/edit/glob/grep) 不暴露给 LLM + # - skill 添加/编辑 / GitHub 安装 / 文件写入全部拒绝 + # - stdio 模式的 MCP server 启动时报错(http/sse 模式不受影响) + # - skill 列表/读取保持只读可用 + # BOX__ENABLED 环境变量可覆盖(统一约定) + backend: 'local' # 'local' (探测) / 'docker' / 'nsjail' / 'e2b' + # 由 box.backend / BOX__BACKEND 选择后端 + runtime: + endpoint: '' # 外部 Runtime 的 WS 基地址 'ws://host:5410' + # 留空 = 本地自管 Runtime + local: + profile: 'default' + image: '' # 覆盖 profile 默认 image + host_root: './data/box' # 工作区挂载根,Docker 部署需绝对路径 + default_workspace: '' # 默认 '/default' + skills_root: 'skills' # Box 管理的 skill 包目录(相对 host_root) + allowed_mount_roots: # 默认 [''] + - './data/box' + - '/tmp' + workspace_quota_mb: null # 配额覆盖,null = 走 profile + e2b: + api_key: '' # 也可走 E2B_API_KEY 环境变量 + api_url: '' # 自托管 E2B 时填写 + template: '' # 默认 template ID +``` + +> **重大变更**: 较 2026-04-16 文档,配置结构完全重组(commit `eefdea4`)。原字段 `box.profile` / `box.runtime_url` / `box.shared_host_root` / `box.allowed_host_mount_roots` 全部迁入 `box.local.*` 子表,新增 `box.backend` 与 `box.e2b.*` 配置组。 + +### docker-compose.yaml + +`langbot_box` 服务受 compose profile 控制,默认 `docker compose up` **不会**启动它。需要 sandbox 时: + +```bash +docker compose --profile box up # 启动 langbot + langbot_box + plugin runtime +docker compose --profile all up # 同上 +docker compose up # 只起 langbot + plugin runtime (box 关闭) +``` + +若不起 `langbot_box`,需要同步在 `data/config.yaml` 中设 `box.enabled: false`(或 langbot 容器 env 加 `BOX__ENABLED=false`),否则 LangBot 会一直尝试连接不存在的 Box runtime 并报错。 + +```yaml +# langbot_box 的关键 volume +volumes: + - ${LANGBOT_BOX_ROOT}:${LANGBOT_BOX_ROOT} # 工作区挂载(源/目标同路径) + - /var/run/docker.sock:/var/run/docker.sock # Docker backend 复用宿主 docker +``` + +### 关闭/连接失败时的行为矩阵 + +`box.enabled = false` 与"启用但连接失败"在用户可观察行为上**完全一致**——都通过 `BoxService.available = False` 表达,只是 `get_status` 多返回 `enabled` 字段供前端区分文案。 + +| 消费方 | Box 可用 | Box 不可用(disabled 或 failed) | +|---|---|---| +| native exec/read/write/edit/glob/grep 工具 | 暴露给 LLM | **不暴露** | +| `activate` / `register_skill` 工具 | 暴露给 LLM | **不暴露** | +| stdio MCP server | 在 Box 内启动 | **`_init_stdio_python_server` 抛 RuntimeError** 拒绝;不退化到宿主 stdio | +| http/sse MCP server | 正常 | 正常(不依赖 Box) | +| Skill 列表/读取 (`list_skills`/`get_skill`/`read_skill_file`) | 走 Box runtime | 走 LangBot 本地 `data/skills/` 只读 fallback | +| Skill 创建/编辑/安装/写文件 | 走 Box runtime | **HTTP 400** + 明确错误信息(`_require_box_for_write`) | +| Pipeline AI 配置中 `box-session-id-template` | 正常生效 | **前端 banner** 提示字段无效 | +| Pipeline 扩展页 `enable_all_skills` / 绑定 skill | 可编辑 | **前端禁用** + banner | +| 仪表盘 Box 状态卡片 | 绿点 / "已连接" | 灰点 / "已禁用"(disabled) 或 红点 / "已断开"(failed) | + +> 后端拒写的边界条件:如果 `ap.box_service` **完全没装**(老式 dev mode,没经过 BuildAppStage),`_require_box_for_write` 视作 no-op,保留 `data/skills/` 本地路径——以兼容历史测试与最小化设置。生产环境总会装 `ap.box_service`,因此该 fallback 不会被触发。 + +### Pipeline 配置 (templates/metadata/pipeline/ai.yaml) + +`local-agent.config.box-session-id-template` 控制 session 作用域,预设: + +- `{launcher_type}_{launcher_id}` — 每个会话 (推荐,默认) +- `{launcher_type}_{launcher_id}_{sender_id}` — 群聊每个用户 +- `{launcher_type}_{launcher_id}_{conversation_id}` — 每个对话上下文 +- `{query_id}` — 每条消息(完全隔离) + +详见 [box-session-scope.md](./box-session-scope.md)。 + +### REST API + +| 端点 | 方法 | 说明 | 前端 | +|------|------|------|:---:| +| `/api/v1/box/status` | GET | 可用性、Profile、后端信息 | ✅ 监控页 | +| `/api/v1/box/sessions` | GET | 活跃 session 列表 | ❌ | +| `/api/v1/box/errors` | GET | 最近 50 条错误 | ❌ | +| `/api/v1/skills` 等 | GET/POST/PUT/DELETE | Skill CRUD、文件浏览、zip/GitHub 安装、preview | ✅ Skill 管理页 | + +前端 `web/src/app/home/monitoring/components/overview-cards/SystemStatusCards.tsx` 已接入 `/api/v1/box/status`,展示 backend 名称、profile 与活跃 session 数。Sessions 与 errors API 仍未接入。 diff --git a/docs/review/box-issues.md b/docs/review/box-issues.md new file mode 100644 index 000000000..76a29e166 --- /dev/null +++ b/docs/review/box-issues.md @@ -0,0 +1,157 @@ +# Box 系统架构问题清单 + +> 更新日期: 2026-05-19 +> 分支: `feat/sandbox` (LangBot + langbot-plugin-sdk) + +--- + +## 已解决(自上一轮 review) + +下列原 P0/P1 项在最新分支已被修复,仅作记录: + +| 原编号 | 问题 | 处理 commit / 说明 | +|--------|------|---------------------| +| #3 | Box 无重连机制 | `_make_connection_callback` 已接入 `runtime_disconnect_callback`;`BoxService._reconnect_loop()` 实现指数退避重连 (`2dfd9d5d`、`c6882cf`) | +| #4 | Box 无心跳 | `BoxRuntimeConnector._heartbeat_loop()`,间隔 20s(沿用 Plugin 模式) | +| #10 | Windows 兼容 | connector 增加 Windows 分支 (subprocess + WS),backend 适配 Windows Docker (`120817a`、`fafb7a4`) | +| #12 | nsjail image 字段冲突 | `_assert_session_compatible()` 在不支持自定义镜像的 backend 跳过 image 字段 | +| #22 | 前端无 Box UI | 监控页 `SystemStatusCards.tsx` 已接入 `/api/v1/box/status`;Skill 管理页接入了全部 skill API(sessions/errors API 仍未接入) | + +--- + +## P0 — 合并前建议修复 + +### 1. policy.py 是死代码 + +- **位置**: `pkg/box/policy.py` (98 行) +- **现状**: `SandboxPolicy`、`ToolPolicy`、`ElevatedPolicy` 三个类已定义,但全项目无任何导入或调用 +- **影响**: 三层安全策略(沙箱模式 / 工具白名单 / 权限提升)完全未生效。当前实际策略仍是"Box 可用就暴露全部 6 个 native tool,不可用就全部隐藏" +- **建议**: 要么删除死代码,要么接入 NativeToolLoader 的工具暴露 / exec 调用链。如果短期不会接入,至少在 `pkg/box/__init__.py` 显式标注其状态 + +### 2. WebSocket relay 无认证 + +- **位置**: SDK `box/server.py` — Action RPC 路径 `/rpc/ws` 与 managed-process relay `/v1/sessions/{id}/managed-process/{pid}/ws` +- **现状**: 任何能访问 5410 端口的客户端都可以连接,attach 任意 session 的 managed process stdin/stdout,或直接发起 EXEC +- **影响**: 容器化 / Docker compose 部署中,若 Box runtime 端口外暴露,网络内的攻击者可直接控制沙箱 +- **建议**: 至少加 token 认证(INIT 时下发,WS 连接 query string 或 header 校验);多 process 后 attach 面更大,更不能裸奔 + +### 3. security.py 根路径未拦截 + +- **位置**: SDK `box/security.py` `BLOCKED_HOST_PATHS_POSIX` +- **现状**: 黑名单中没有 `/`,`host_path="/"` 可通过校验并挂载整个主机文件系统;用户 home 目录、`/var` 等也未拦截 +- **建议**: 将 `/` 加入黑名单,或改用白名单策略与 LangBot 侧 `allowed_mount_roots` 二次拦截 + +### 4. INIT 与 backend 初始化的竞态 + +- **位置**: SDK `box/runtime.py` `init()` 在握手后才下发实际配置;`backend` 在 INIT 之前可能已经按默认值实例化 +- **现状**: commit `5029d9c` 修复了 "init config before backend reuse" 的部分场景,但 backend 重新实例化时若有正在执行的 session,可能命中旧 backend +- **建议**: 整理 init/handshake 顺序——要么 INIT 完成前不接受任何业务 action,要么允许 backend 配置变更时显式清理现有 session + +--- + +## P1 — 合并后优先跟进 + +### 5. Session 数量无上限 + +- **位置**: SDK `box/runtime.py` `_get_or_create_session()` +- **现状**: `_sessions` dict 无容量限制,恶意或异常调用可创建无限 session +- **建议**: 加 `max_sessions` 配置项,达到上限时拒绝新建或按 LRU 清理 + +### 6. Quota 检查存在 TOCTOU + +- **位置**: `pkg/box/service.py` `_enforce_workspace_quota()` +- **现状**: 应用层先读磁盘大小再执行命令,两步之间有竞态窗口 +- **建议**: 短期用 Docker `--storage-opt size=` 做内核级限制;长期用 Redis 原子计数器做预留式配额 + +### 7. 全局锁持有期间执行慢操作 + +- **位置**: SDK `box/runtime.py` `_get_or_create_session()` — `self._lock` 下调用 `backend.start_session()` (即 `docker run` / `nsjail` 进程启动 / E2B `Sandbox.create`) +- **影响**: `docker run` 可能耗时数秒(含镜像拉取)、E2B 冷启动通常 > 1s,期间阻塞所有并发请求 +- **建议**: 在 `_lock` 下仅做状态检查和 session 注册,容器创建在锁外执行 + +### 8. Session 清理是机会性的 + +- **位置**: SDK `box/runtime.py` `_reap_expired_sessions_locked()` — 仅在 `_get_or_create_session()` 时调用 +- **影响**: 如果长时间无新 session 请求,过期 session(含容器)不会被清理 +- **建议**: 加一个独立的 `asyncio.create_task` 定时清理(如每 60s 一次) + +### 9. server.py 直接访问 runtime 私有字段 + +- **位置**: SDK `box/server.py` — managed-process WS handler 直接读 `runtime._sessions` +- **影响**: 绕过锁和封装,在并发场景下可能读到不一致状态 +- **建议**: 在 BoxRuntime 上增加公共方法(如 `get_session_managed_process(session_id, process_id)`) + +### 10. workspace quota 检查阻塞事件循环 + +- **位置**: `pkg/box/service.py` `_get_workspace_size_bytes()` — 使用同步 `os.scandir` 递归遍历 +- **影响**: 大工作区可能阻塞 asyncio event loop +- **建议**: 用 `asyncio.to_thread()` 包装,或用 `aiofiles` 异步扫描 + +### 11. extra_mounts 一旦容器创建即固定 + +- **位置**: SDK `box/runtime.py` 的兼容性检查;`pkg/box/service.py:build_skill_extra_mounts()` +- **现状**: Skill 挂载在容器创建时一次性写入;同一 session 后续 pipeline 切换 skill 列表时,新挂载不会生效(除非销毁重建) +- **影响**: 用户长时间共享 session 的场景下,新激活的 skill 可能挂不上 +- **建议**: 要么在创建时把 pipeline 绑定的所有 skill 都挂上(实际现状)+ 写入文档;要么变更挂载时强制销毁 session 重建(已被 commit `5029d9c` 部分覆盖,需校验) + +--- + +## P2 — 后续迭代 + +### 12. 重复的 `_is_path_under` 函数 + +- **位置**: `pkg/box/service.py` 行 30 附近 — 同名函数定义两次 +- **建议**: 删除重复定义 + +### 13. localagent.py 工具循环无迭代上限 + +- **位置**: `pkg/provider/runners/localagent.py` `while pending_tool_calls` 循环 +- **影响**: 恶意或混乱的 LLM 可无限产生 tool call,消耗资源 +- **建议**: 加 `max_tool_iterations` 配置项(如默认 50 次) + +### 14. localagent.py 中的死代码 + +- **位置**: `pkg/provider/runners/localagent.py:29-35` 附近 — 旧命名 `SANDBOX_EXEC_TOOL_NAME` 和 `SANDBOX_EXEC_SYSTEM_GUIDANCE` +- **现状**: 旧命名方案的遗留常量,从未被引用(实际使用 `EXEC_TOOL_NAME` from native.py) +- **建议**: 删除 + +### 15. @loader_class 装饰器未使用 + +- **位置**: `pkg/provider/tools/loader.py` — `preregistered_loaders` 列表和 `@loader_class` 装饰器 +- **现状**: 各 loader 的 `@loader_class` 多数被注释掉,ToolManager 手动实例化所有 loader +- **建议**: 要么启用装饰器自动注册,要么删除未用的机制 + +### 16. 工具名冲突风险 + +- **位置**: `pkg/provider/tools/toolmgr.py` `execute_func_call()` — 按优先级 native → plugin → mcp → skill → skill_authoring 分发 +- **影响**: 如果 plugin 或 MCP 有名为 `exec`/`read`/`write`/`edit`/`glob`/`grep`/`activate` 的工具,会被前序 loader 静默遮蔽 +- **建议**: 加命名空间前缀或冲突检测告警 + +### 17. client.py 反序列化不一致 + +- **位置**: SDK `box/client.py` — `execute()` 与其他方法对返回值的反序列化方式不统一(部分手动构造 model,部分用 `model_validate`) +- **建议**: 统一使用 `model_validate` + +### 18. 错误类型还原基于字符串前缀匹配 + +- **位置**: SDK `box/client.py` `_translate_action_error()` +- **影响**: 如果 server 端错误消息格式变化,client 会回退到通用 `BoxError`,丢失类型信息 +- **建议**: 在 ActionResponse 中增加结构化的错误类型字段(如 `error_code` 枚举) + +### 19. 前端只用到了 status + +- **位置**: `web/src/app/home/monitoring/...` 已接入 `/api/v1/box/status` +- **现状**: `/api/v1/box/sessions` 与 `/api/v1/box/errors` 后端可用、前端未消费 +- **建议**: 在监控页或独立 Box 详情页展示活跃 session 列表与最近错误,提升运维体感 + +### 20. skill_store 测试覆盖偏薄 + +- **位置**: SDK `tests/box/test_skill_store.py` 仅 88 行 +- **现状**: 相对 `skill_store.py` 的 647 行实现,单测覆盖度不够;GitHub 安装路径、`source_subdir` / `target_suffix` 组合、损坏 zip 的错误处理等场景未覆盖 +- **建议**: 至少补到核心 path 覆盖(preview/install/list/file CRUD 各 2~3 个 case) + +### 21. 集成测试未进 CI + +- **位置**: LangBot `tests/integration_tests/box/test_box_integration.py`、`test_box_mcp_integration.py`,SDK 端的 E2B 真机测试 +- **现状**: 容器实际执行、E2B 真实 sandbox、Managed process WS attach 均仅本地能跑 +- **建议**: 加一个可选的 Docker-in-Docker CI stage,或在合并前手动跑 checklist diff --git a/docs/review/box-session-scope.md b/docs/review/box-session-scope.md new file mode 100644 index 000000000..8255a6a54 --- /dev/null +++ b/docs/review/box-session-scope.md @@ -0,0 +1,401 @@ +# Box Session Scope Design + +> Date: 2026-04-18 (last reviewed 2026-05-19) +> Branch: `feat/sandbox` (LangBot + langbot-plugin-sdk) +> Related: [Box Architecture](./box-architecture.md) | [Box vs Plugin Runtime](./box-vs-plugin-runtime.md) + +--- + +## 0. Implementation Status (2026-05-19) + +This document was authored as a design proposal. The current `feat/sandbox` branch +has shipped the design largely as written: + +| Item | Status | Notes | +|------|--------|-------| +| `BoxMountSpec` + `BoxSpec.extra_mounts` | ✅ Shipped | SDK `box/models.py` | +| Docker / nsjail / E2B backends apply extra mounts | ✅ Shipped | Last gap closed by SDK commit `0fea9b1` (E2B) | +| `box-session-id-template` in `local-agent` pipeline config | ✅ Shipped | `templates/metadata/pipeline/ai.yaml`, default `{launcher_type}_{launcher_id}` | +| `BoxService.resolve_box_session_id(query)` | ✅ Shipped | `pkg/box/service.py:166` | +| `BoxService.build_skill_extra_mounts(query)` | ✅ Shipped | `pkg/box/service.py:189` | +| Skill exec uses unified container + extra mounts | ✅ Shipped | `pkg/provider/tools/loaders/native.py` skill branch | +| MCP-in-Box uses shared persistent session, multi-process | ✅ Shipped (earlier than originally scoped) | SDK commit `529088e`, LangBot `mcp_stdio.py:_build_box_session_id` | +| `BoxManagedProcessSpec.process_id` + multi-process per session | ✅ Shipped | `BoxRuntime` keeps `managed_processes: dict[pid, _ManagedProcess]` | +| Per-tenant / quota integration with templates | ❌ Not started | See [box-tob-analysis.md](./box-tob-analysis.md) | + +The "Phase 2 deferred" note in §10 is **out of date** — MCP unification went in on +the same line. Pipeline-scoped (not user-scoped) MCP container is the realized +behavior: each pipeline's MCP servers share one `mcp-` session, and +user exec sessions use the template-derived id. + +The remaining open work is multi-tenant overlays (tenant_id in session_id, +quota counters keyed by tenant), tracked in the toB analysis doc rather than here. + +--- + +## 1. Problems + +### 1.1 Default exec: per-message containers + +Currently, `BoxService.execute_tool()` sets `session_id = str(query.query_id)` — an +auto-incrementing integer per incoming message. Every user message creates a new sandbox +container. Dependencies installed and in-container state are lost between messages. + +### 1.2 Three isolated container pools + +Default exec, skills, and MCP servers each manage their own containers with +independent session IDs: + +| Path | Session ID | Container | +|--------------|-----------------------------------------------|-------------| +| Default exec | `str(query_id)` (per message) | Ephemeral | +| Skill exec | `skill-{launcher}_{id}-{skill_name}` | Per skill | +| MCP stdio | `mcp-{server_uuid}` | Per server | + +This means a single logical user interaction can spawn 3+ containers that cannot +share state, see each other's files, or reuse installed dependencies. + +### 1.3 Single bind mount limitation + +`BoxSpec` currently supports only **one** `host_path` → `mount_path` bind mount. +This prevents mounting both a default workspace and skill directories into the +same container. + +--- + +## 2. Concept Model + +``` +Platform Message + → Query (query_id: int, auto-increment, per message) + → Session (launcher_type + launcher_id, per chat window) + → Conversation (uuid, per dialogue context within a Session) +``` + +| Concept | Key | Example | Scope | +|---------------|-------------------------------------|----------------------------|------------------------------| +| Query | `query_id` | `42` | Single message | +| Session | `launcher_type` + `launcher_id` | `group_123456` | Chat window (group or PM) | +| Conversation | `conversation_id` (UUID) | `a1b2c3d4-...` | Dialogue context within a Session | +| Sender | `sender_id` | `789` | Individual user | + +Note: in a **group chat**, all users share the same Session (keyed by `group_id`). The +individual sender is tracked as `sender_id` but does not affect Session/Conversation routing. + +--- + +## 3. Target Scenarios + +| # | Scenario | Box Granularity | Desired `session_id` | +|----|--------------------------------|------------------------------------------|---------------------------------------------------------| +| 1 | Personal assistant | 1 Box per user, long-lived | `{launcher_type}_{launcher_id}` | +| 2 | Customer service | 1 Box per customer, cross-pipeline | `{launcher_type}_{launcher_id}` | +| 3 | Internal employee tool | 1 Box per employee | `{launcher_type}_{launcher_id}` | +| 4 | Group chat shared assistant | 1 Box per group | `{launcher_type}_{launcher_id}` | +| 5 | Group chat isolated per user | 1 Box per user within a group | `{launcher_type}_{launcher_id}_{sender_id}` | +| 6 | Teaching (cross-channel) | 1 Box per student across groups/PMs | `{sender_id}` | +| 7 | One-off execution | 1 Box per message (current behavior) | `{query_id}` | +| 8 | Multi-project development | 1 Box per conversation context | `{launcher_type}_{launcher_id}_{conversation_id}` | + +No single fixed granularity covers all scenarios. A template-based approach is needed. + +--- + +## 4. Design Overview + +Two key changes: + +1. **Unified container**: exec, skills, and MCP all share the same container per + session scope. No more separate container pools. +2. **Configurable session scope**: `session_id` is generated from a template with + pipeline variables, configurable per pipeline. + +### 4.1 Unified Container with Multiple Mounts + +A single container per session scope is created on first use. It has: + +- **Primary mount**: default workspace at `/workspace` (from `default_host_workspace`) +- **Skill mounts**: each pipeline-bound skill's `package_root` mounted at + `/workspace/.skills/{skill_name}/` +- **MCP servers**: run as managed processes inside the same container + +``` +Container (session_id = "group_123456") + /workspace/ ← default workspace (bind mount, rw) + /workspace/.skills/web-search/ ← skill package (bind mount, rw) + /workspace/.skills/data-analysis/ ← skill package (bind mount, rw) + [managed process: mcp-server-a] ← MCP server running inside + [managed process: mcp-server-b] ← MCP server running inside +``` + +This requires extending `BoxSpec` to support multiple mounts (see §5). + +### 4.2 Session ID Template + +A new field `box-session-id-template` in the `local-agent` pipeline runner config +controls the session scope: + +```yaml +# templates/metadata/pipeline/ai.yaml (under local-agent.config) +- name: box-session-id-template + label: + en_US: Sandbox Scope + zh_Hans: 沙箱作用域 + description: + en_US: >- + Determines how sandbox environments are shared. Use variables to + control isolation granularity. + zh_Hans: >- + 决定沙箱环境的共享方式。使用变量控制隔离粒度。 + type: select + required: false + default: "{launcher_type}_{launcher_id}" + options: + - value: "{launcher_type}_{launcher_id}" + label: + en_US: Per chat (Recommended) + zh_Hans: 每个会话(推荐) + - value: "{launcher_type}_{launcher_id}_{sender_id}" + label: + en_US: Per user in chat + zh_Hans: 会话中每个用户 + - value: "{launcher_type}_{launcher_id}_{conversation_id}" + label: + en_US: Per conversation context + zh_Hans: 每个对话上下文 + - value: "{query_id}" + label: + en_US: Per message (isolated) + zh_Hans: 每条消息(完全隔离) +``` + +Available template variables (populated by PreProcessor in `query.variables`): + +| Variable | Source | Example | +|---------------------|---------------------------------|----------------------| +| `{launcher_type}` | `query.session.launcher_type` | `person` / `group` | +| `{launcher_id}` | `query.session.launcher_id` | `123456` | +| `{sender_id}` | `query.sender_id` | `789` | +| `{conversation_id}` | `conversation.uuid` | `a1b2c3d4-...` | +| `{query_id}` | `query.query_id` | `42` | + +Default `{launcher_type}_{launcher_id}` covers scenarios 1–4 out of the box. + +--- + +## 5. SDK Changes: Multi-Mount BoxSpec + +### 5.1 Model Extension + +```python +# box/models.py + +class BoxMountSpec(pydantic.BaseModel): + """A single bind mount specification.""" + host_path: str + mount_path: str + mode: BoxHostMountMode = BoxHostMountMode.READ_WRITE + +class BoxSpec(pydantic.BaseModel): + # ... existing fields ... + host_path: str | None = None # Primary mount (backward compat) + host_path_mode: BoxHostMountMode = BoxHostMountMode.READ_WRITE + mount_path: str = DEFAULT_BOX_MOUNT_PATH + extra_mounts: list[BoxMountSpec] = [] # NEW: additional mounts +``` + +`extra_mounts` is additive — the existing `host_path` / `mount_path` pair remains +the primary mount for backward compatibility. + +### 5.2 Backend: Apply Extra Mounts + +```python +# box/backend.py — CLISandboxBackend.start_session() + +# Primary mount (unchanged) +if spec.host_path is not None and spec.host_path_mode != BoxHostMountMode.NONE: + args.extend(['-v', f'{spec.host_path}:{spec.mount_path}:{spec.host_path_mode.value}']) + +# Extra mounts (NEW) +for mount in spec.extra_mounts: + if mount.mode != BoxHostMountMode.NONE: + args.extend(['-v', f'{mount.host_path}:{mount.mount_path}:{mount.mode.value}']) +``` + +Same pattern for nsjail backend. + +--- + +## 6. LangBot Changes + +### 6.1 Session ID Resolution + +In `BoxService.execute_tool()`: + +```python +# Before: +spec_payload.setdefault('session_id', str(query.query_id)) + +# After: +template = (query.pipeline_config or {}).get('ai', {}) \ + .get('local-agent', {}).get('box-session-id-template', + '{launcher_type}_{launcher_id}') +variables = query.variables or {} +session_id = template.format_map(collections.defaultdict( + lambda: 'unknown', variables +)) +spec_payload.setdefault('session_id', session_id) +``` + +### 6.2 Skill Exec: Use Same Container + +Currently `native.py:_invoke_exec` creates a separate `BoxWorkspaceSession` per +skill with `host_path=package_root`. Instead: + +1. Use the **same session_id** as default exec (from the template). +2. Pass the skill's `package_root` as an **extra mount** at + `/workspace/.skills/{skill_name}/` instead of replacing `/workspace`. +3. The container already has the default workspace at `/workspace`. + +```python +# native.py — _invoke_exec, skill branch (REVISED) + +# Same session_id as default exec +session_id = resolve_box_session_id(query) + +spec_payload = { + 'cmd': rewritten_command, + 'workdir': rewritten_workdir, + 'session_id': session_id, + 'extra_mounts': [{ + 'host_path': package_root, + 'mount_path': f'/workspace/.skills/{selected_skill_name}', + 'mode': 'rw', + }], +} +result = await self.ap.box_service.execute_spec_payload(spec_payload, query) +``` + +The virtual path `/workspace/.skills/{name}` no longer needs rewriting at the +command level — it maps directly to the bind mount path inside the container. + +### 6.3 MCP: Use Same Container + +MCP servers should run inside the same container as exec and skills. Changes: + +1. `BoxStdioSessionRuntime` uses the pipeline's session_id template instead of + `mcp-{server_uuid}`. +2. MCP server's working directory is a subdirectory (e.g. `/workspace/.mcp/{name}/`). +3. MCP server's dependencies are mounted or installed into that subdirectory. +4. The MCP server runs as a managed process inside the shared container. + +Since MCP servers start at LangBot boot (not per-query), the session must be +created eagerly. The container will be kept alive by the managed process +exemption in TTL reaping (`runtime.py:259`). + +**Note**: MCP sessions are pipeline-scoped (not per-launcher), so their session_id +should be a **fixed identifier per pipeline** rather than the user-facing template. +This means one shared MCP container per pipeline, with user exec sessions separate. + +Alternatively, in a future iteration, MCP managed processes could be launched +lazily into the user's container on first MCP tool call. This is more complex +but maximizes sharing. For V1, keeping MCP containers at pipeline scope is +simpler and more predictable. + +--- + +## 7. Mount Layout Summary + +### Default exec (no skills activated) + +``` +Container (session_id from template) + /workspace/ ← default_host_workspace (rw) +``` + +### Exec with activated skills + +``` +Container (same session_id) + /workspace/ ← default_host_workspace (rw) + /workspace/.skills/web-search/ ← skill package_root (rw) + /workspace/.skills/data-analysis/ ← skill package_root (rw) +``` + +Extra mounts are **additive** — they are added when the container is first +created (or on the first exec that references a skill). Since Docker bind +mounts are specified at container creation time, skills must be known at +creation time. + +**Resolution**: When creating a container, inject `extra_mounts` for **all +pipeline-bound skills** (from `extensions_preferences`), not just the +currently activated one. This way any skill can be activated later without +recreating the container. + +### MCP servers (V1: pipeline-scoped) + +``` +Container (session_id = "mcp-pipeline-{pipeline_uuid}") + /workspace/ ← MCP shared workspace + /workspace/.mcp/server-a/ ← MCP server A files + /workspace/.mcp/server-b/ ← MCP server B files + [managed process: server-a] + [managed process: server-b] +``` + +--- + +## 8. Data Migration + +Existing pipelines do not have `box-session-id-template`. The backend uses +`.get(..., default)` so missing keys fall back to `{launcher_type}_{launcher_id}`. +This changes behavior from per-message to per-launcher for existing pipelines. + +Recommendation: **accept the behavior change** — per-launcher is the more +intuitive default, and the old per-message behavior was rarely desired. + +--- + +## 9. Cloud Quota Implications + +| Scope | Typical concurrent containers | +|-----------------------------------------------|-------------------------------| +| `{query_id}` (per message) | Many, short-lived | +| `{launcher_type}_{launcher_id}` (per chat) | = active chat count | +| `{sender_id}` (per user) | = active user count | +| `{conversation_id}` (per conversation) | Between per-chat and per-msg | + +With the unified container model, each scope value maps to exactly **one** +container (instead of potentially 3+ per-message). This significantly reduces +resource usage. + +Quota enforcement point: `BoxRuntime._get_or_create_session()` in the SDK. + +--- + +## 10. Implementation Phases + +### Phase 1: Session scope + skill unification (this PR) + +1. **SDK**: Extend `BoxSpec` with `extra_mounts: list[BoxMountSpec]`. +2. **SDK**: Update Docker/nsjail backends to apply extra mounts. +3. **LangBot**: Add `box-session-id-template` to `local-agent` YAML metadata + and default pipeline config JSON. +4. **LangBot**: Update `BoxService.execute_tool()` to use template interpolation. +5. **LangBot**: Update `native.py:_invoke_exec` skill branch to use same + session_id + extra mounts instead of separate `BoxWorkspaceSession`. +6. **LangBot**: On container creation, inject extra mounts for all + pipeline-bound skills. +7. **Frontend**: No code change — `DynamicFormComponent` renders `select` fields. +8. **Tests**: Unit tests for template interpolation and multi-mount specs. + +### Phase 2: MCP unification (future) + +1. Refactor `BoxStdioSessionRuntime` to use pipeline-scoped shared container. +2. MCP servers become managed processes in the shared container. +3. Support multiple concurrent managed processes per container. + +MCP unification is deferred because it requires changes to the managed process +model (currently 1 managed process per session) and has startup ordering +concerns (MCP servers start at boot, before any user query determines +a session_id). diff --git a/docs/review/box-test-coverage.md b/docs/review/box-test-coverage.md new file mode 100644 index 000000000..3fe5b52d4 --- /dev/null +++ b/docs/review/box-test-coverage.md @@ -0,0 +1,121 @@ +# Box 系统测试覆盖分析 + +> 更新日期: 2026-05-19 +> 分支: `feat/sandbox` (LangBot + langbot-plugin-sdk) + +--- + +## 1. 测试文件清单 + +### LangBot 仓库 + +| 文件 | 行数 | CI 运行 | 覆盖范围 | +|------|------|---------|---------| +| `tests/unit_tests/box/test_box_connector.py` | 106 | 是 | Connector 传输决策、WS relay URL、dispose、心跳/重连 | +| `tests/unit_tests/box/test_box_service.py` | 1224 | 是 | Service 核心逻辑(最全面) | +| `tests/unit_tests/box/test_workspace.py` | 147 | 是 | WorkspaceSession 路径重写、payload 构建 | +| `tests/unit_tests/provider/test_mcp_box_integration.py` | 707 | 是 | MCP Box 配置、路径重写、payload、shared-session/multi-process、runtime info | +| `tests/unit_tests/provider/test_localagent_sandbox_exec.py` | 444 | 是 | LocalAgent exec 流程、流式、Skill 激活 (Tool Call) | +| `tests/unit_tests/provider/test_tool_manager_native.py` | 249 | 是 | ToolManager 路由、native tool CRUD、路径穿越、6 工具暴露 | +| `tests/unit_tests/provider/test_skill_tools.py` | 582 | 是 | Skill 管理、Tool Call 激活、路径、authoring CRUD | +| `tests/unit_tests/test_skill_service.py` | 396 | 是 | HTTP service:skill CRUD、zip/GitHub install、文件浏览 | +| `tests/unit_tests/test_paths.py` | 23 | 是 | paths 工具 | +| `tests/unit_tests/test_preproc.py` | 134 | 是 | PreProcessor 注入 session 变量、bound skill 解析 | +| `tests/unit_tests/pipeline/test_chat_handler_logging.py` | 78 | 是 | Chat handler 日志相关回归 | +| `tests/integration_tests/box/test_box_integration.py` | 329 | **否** | 真实容器执行、超时、网络隔离 | +| `tests/integration_tests/box/test_box_mcp_integration.py` | 368 | **否** | Managed process、WS attach、shared-session 清理 | + +### SDK 仓库 + +| 文件 | 行数 | CI 运行 | 覆盖范围 | +|------|------|---------|---------| +| `tests/box/test_backend_selection.py` | 255 | 是 | 显式 backend / local 模式探测顺序 / 配置变更触发 reselect | +| `tests/box/test_nsjail_backend.py` | 452 | 是 | nsjail 可用性、安装版 CLI vs 容器内 CLI、session、arg 构建、资源限制 | +| `tests/box/test_e2b_backend.py` | 482 | 是 | E2B SDK mock、session 生命周期、extra_mounts 同步 | +| `tests/box/test_skill_store.py` | 88 | 是 | zip preview/install、基础 file CRUD | + +**总计**: 17 个测试文件, ~6,500 行测试代码; 其中 2 个集成测试(约 700 行)在 CI 中不运行。 + +> 较 2026-04-16 版增加:`test_skill_service.py`、`test_paths.py`、`test_preproc.py`、`test_chat_handler_logging.py` (LangBot),`test_backend_selection.py`、`test_e2b_backend.py`、`test_skill_store.py` (SDK)。`test_nsjail_backend.py` 增加 CLI 兼容性 case (commit `feed530`)。 + +--- + +## 2. 覆盖良好的区域 + +| 区域 | 质量 | 说明 | +|------|------|------| +| BoxRuntime session 管理 | 优秀 | session 复用、冲突检测、TTL 配置、消失 session 重建 | +| BoxService Profile 系统 | 优秀 | 4 个内置 Profile、locked/unlocked 字段、timeout clamp | +| BoxService host mount 安全 | 优秀 | allowed_mount_roots、disallowed_roots、shared host root | +| BoxService workspace quota | 优秀 | 前置/后置配额检查、超额清理 | +| BoxService 输出截断 | 优秀 | 短/精确边界/长输出、独立 stderr | +| BoxService 可观测性 | 优秀 | 状态报告、error ring buffer、buffer 上限 | +| BoxService session 模板 | 良好 | `resolve_box_session_id` + `build_skill_extra_mounts` 在 service / native / mcp 三处都有覆盖 | +| RPC client/server 协议 | 优秀 | execute/get_sessions/delete/create/conflict error | +| BoxRuntimeConnector | 良好 | local/remote 模式、Docker 平台、relay URL、心跳与重连回调 | +| BoxWorkspaceSession | 良好 | payload 构建、managed process 路径重写、stage host file | +| BoxHostMountMode.NONE | 良好 | 枚举校验、workdir 约束 | +| NsjailBackend | 良好 | 可用性、安装版 vs 容器内、session 生命周期、arg 构建、资源限制 | +| E2BBackend | 良好 | mock SDK、session/extra_mounts 同步 | +| Backend selection | 良好 | 显式 backend 优先级、local 探测顺序、配置变更触发 reselect | +| MCP Box 集成 | 良好 | config model、路径重写、payload、shared-session 多 process | +| Native tool loader | 良好 | 6 工具(exec/read/write/edit/glob/grep)、路径穿越拦截 | +| LocalAgent exec 流程 | 良好 | 完整 tool call 循环、流式、system prompt 注入、Tool Call 激活 | +| Skill 系统 | 良好 | 加载、Tool Call 激活、marker、路径解析、authoring CRUD、HTTP service | + +--- + +## 3. 覆盖缺失的区域 + +### 3.1 零测试 / 严重不足 + +| 区域 | 源文件 | 影响 | +|------|--------|------| +| **`security.py`** | SDK `box/security.py` (52 行) | `validate_sandbox_security()` 无任何测试。阻止 `/etc`/`/proc`/Docker socket 等危险挂载的安全函数从未被验证 | +| **`policy.py`** | `pkg/box/policy.py` (98 行) | 三层安全策略无测试(也是死代码) | +| **`skill_store.py` 边缘场景** | SDK `box/skill_store.py` (647 行) vs 测试 88 行 | GitHub 安装路径、`source_subdir` / `target_suffix` 组合、损坏 zip、文件冲突等场景未覆盖 | + +### 3.2 未测试的关键路径 + +| 区域 | 说明 | +|------|------| +| **Session TTL 过期** | 测试配置了 `session_ttl_sec` 但从未推进时间验证过期清理 | +| **并发 session 访问** | 无并发 exec / 并发创建 / race condition 测试 | +| **Container backend (Docker)** | 仅通过集成测试覆盖(CI 不运行),单元测试全用 FakeBackend | +| **E2B 真实 sandbox** | 单测全是 mock,未对接真实 E2B API | +| **BoxRuntime shutdown()** | 在 test cleanup 中调用但未验证行为 | +| **BoxServerHandler 错误路径** | 畸形请求、未知 action 类型 | +| **WS relay** | 仅在集成测试中覆盖(CI 不运行) | +| **NsjailBackend managed process** | 完全未测试 | +| **MCP stdio 完整生命周期** | 依赖安装 → 进程启动 → 健康检查 → 多 process 并发 → 重试 | +| **BoxService start/stop_managed_process** | 单 process 流转有单测,多 process 互不阻塞主要靠集成测试 | +| **重连指数退避** | connector 单测覆盖回调接线,未实际跑完整重连周期 | + +### 3.3 边缘情况缺失 + +| 区域 | 说明 | +|------|------| +| BoxSpec 校验 | 无效 session_id 格式、超长命令、env 特殊字符 | +| BoxSpec.extra_mounts | 重复 mount_path、与 host_path 冲突、绝对 vs 相对路径 | +| BoxExecutionResult | 仅 COMPLETED 和 TIMED_OUT,无 ERROR 状态测试 | +| 多后端 fallback | local 模式探测顺序仅靠 mock,无真实 Docker 不可用 → nsjail 真机 fallback 测试 | +| Profile YAML 加载 | 测试用硬编码字符串,未从真实 config.yaml 加载 | +| INIT 配置变更触发 backend 重建 | 单测仅在初始化场景验证 | + +--- + +## 4. 集成测试 vs CI 的差距 + +CI 仅运行 `tests/unit_tests/`,以下场景**从未在自动化中验证**: + +- 真实容器的创建/执行/销毁 +- 容器网络隔离(`--network none`) +- 容器资源限制生效(cpus/memory/pids_limit) +- Managed process 的 WS 双向 I/O +- 多 process 同 session 并发 I/O +- 孤儿容器清理 +- Session 删除清理容器 +- 进程退出检测 +- E2B 真实 sandbox 行为 + +**建议**: 在 CI 中加一个可选的 Docker-in-Docker 集成测试 stage,至少覆盖核心执行路径(exec / MCP attach / session 销毁)。 diff --git a/docs/review/box-tob-analysis.md b/docs/review/box-tob-analysis.md new file mode 100644 index 000000000..c41f45ae3 --- /dev/null +++ b/docs/review/box-tob-analysis.md @@ -0,0 +1,166 @@ +# Box 系统 toB 商业化分析 + +> 更新日期: 2026-05-19 +> 分支: `feat/sandbox` (LangBot + langbot-plugin-sdk) + +--- + +## 1. 现有优势 + +| 能力 | toB 价值 | 代码位置 | +|------|---------|---------| +| **沙箱隔离执行** | 企业安全运行不受信代码的基础能力 | SDK `box/backend.py` | +| **多后端支持** | 适配不同企业容器基础设施 (Podman/Docker/nsjail/E2B) | SDK `box/runtime.py` `_select_backend()` | +| **E2B 云沙箱** | SaaS / 无 Docker 部署的兜底执行环境 | SDK `box/e2b_backend.py` | +| **连接自愈** | 心跳 + 自动重连,单点 Box runtime 故障可恢复 | `pkg/box/connector.py` `_heartbeat_loop`, `pkg/box/service.py` `_reconnect_loop` | +| **Profile + locked 字段** | 运维锁定安全边界,LLM/用户无法绕过 | `pkg/box/service.py`, SDK `box/models.py` | +| **资源限制** | CPU/内存/PID 数限制防止资源滥用 | SDK `backend.py` `--cpus/--memory/--pids-limit` | +| **Workspace quota** | 磁盘用量控制 | `pkg/box/service.py` `_enforce_workspace_quota` | +| **静默降级** | Box 不可用不影响其他功能,降低部署门槛 | `pkg/box/service.py:78` `_available=False` | +| **孤儿容器清理** | 防止泄漏的容器持续占用资源 | SDK `backend.py` `cleanup_orphaned_containers` | +| **网络隔离** | `--network none` 防止数据外泄 | SDK `backend.py` start_session | +| **只读根文件系统** | `--read-only` 防止容器被持久篡改 | SDK `backend.py` start_session | +| **Host path 白名单** | `allowed_host_mount_roots` 限制可挂载目录 | `pkg/box/service.py` `_validate_host_mount` | + +--- + +## 2. toB 差距分析 + +### 2.1 安全与合规 + +| 维度 | 现状 | toB 要求 | 优先级 | +|------|------|---------|--------| +| **WS relay 认证** | 无认证,任何人可 attach | 至少 token 认证 | **P0** | +| **安全策略** | policy.py 是死代码,实际无细粒度控制 | 工具级 allow/deny、沙箱模式控制 | **P0** | +| **审计日志** | 仅内存中 50 条 `_recent_errors` | 持久化审计:谁何时执行了什么、结果如何 | **P0** | +| **Host path 校验** | 黑名单策略,`/` 未拦截 | 白名单策略,默认拒绝 | **P1** | +| **数据驻留** | 无控制 | GDPR / 等保要求的数据隔离 | **P2** | + +### 2.2 多租户 + +| 维度 | 现状 | toB 要求 | 优先级 | +|------|------|---------|--------| +| **租户隔离** | 无租户概念 | BoxSpec/Profile 绑定 tenant_id | **P0** | +| **RBAC** | 仅 token 认证 | admin/operator/viewer 角色权限 | **P0** | +| **资源配额** | 单一 workspace quota | 每租户 CPU 时间/内存/并发/执行次数配额 | **P1** | +| **Session 隔离** | 所有 session 共享 dict | 按租户分区,互不可见 | **P1** | + +### 2.3 可靠性 + +| 维度 | 现状 | toB 要求 | 优先级 | +|------|------|---------|--------| +| **连接恢复** | 已实现:20s 心跳 + `_reconnect_loop` 指数退避 | 已满足基本要求 | 已有 | +| **Session 清理** | 机会性(仅新建时触发) | 定时清理 + 独立 reaper | **P1** | +| **水平扩展** | 单 Box Runtime 实例 | 多实例负载均衡(按 tenant 路由) | **P1** | +| **优雅降级** | 已有(_available=False) | 已满足基本要求 | 已有 | +| **Backend 自愈** | 已实现:`get_status` 时若 backend 不可用会重新选择 | 已满足基本要求 | 已有 | + +### 2.4 可观测性 + +| 维度 | 现状 | toB 要求 | 优先级 | +|------|------|---------|--------| +| **监控指标** | 无 Prometheus metrics | session 数/执行延迟/资源用量/错误率 | **P1** | +| **结构化日志** | Python logging, 无结构化 | JSON 格式日志,含 trace_id/tenant_id | **P1** | +| **前端面板** | 监控页接入 `/api/v1/box/status`(backend 名 + 活跃 session 数);`sessions` / `errors` 仍未接入 | 完整状态面板 + 历史错误/审计列表 | **P2** | + +--- + +## 3. SaaS 部署架构建议 + +### 3.1 方案 A: 共享 Box Runtime Pool (快速上线) + +``` +LangBot Instance ──> Box Runtime (共享) + ├─ tenant_id 标签隔离 + ├─ Redis 配额计数器 + └─ Container labels: langbot.tenant_id=xxx +``` + +- **优点**: 改动最小,加 tenant_id 到 BoxSpec/labels 即可 +- **缺点**: 容器引擎共享,安全隔离弱 + +### 3.2 方案 B: 每租户 K8s Namespace + gVisor (推荐中期) + +``` +LangBot ──> K8s API + ├─ namespace: tenant-xxx + │ ├─ RuntimeClass: gVisor (runsc) + │ ├─ ResourceQuota + │ └─ NetworkPolicy + └─ namespace: tenant-yyy + └─ ... +``` + +- **优点**: 强隔离(namespace + gVisor),原生 K8s 配额 +- **缺点**: 需要重写 backend 为 K8s Job,部署复杂度高 + +### 3.3 方案 C: K8s Job 直接编排 (长期) + +``` +LangBot ──> K8s Job per execution + ├─ 每次执行创建 Job + ├─ Pod Security Standards + ├─ 自动调度和资源分配 + └─ Job TTL Controller 自动清理 +``` + +- **优点**: 最强隔离,天然水平扩展 +- **缺点**: 冷启动延迟,架构重写 + +**推荐演进路径**: A → B → C + +--- + +## 4. 配额体系建议 + +### 三层配额 + +| 层 | 实现 | 作用 | +|----|------|------| +| **内核层** | Docker `--cpus`/`--memory`/`--storage-opt` | 硬性资源上限,不可绕过 | +| **应用层** | Redis 原子计数器 | 并发 session 数/执行次数/CPU 时间预算 | +| **计费层** | 月度聚合 | 按租户计费(session-hours/execution-count) | + +### Profile 与套餐映射 + +| 套餐 | Profile | locked 字段 | 配额 | +|------|---------|------------|------| +| Free | `offline_readonly` | network, host_path_mode, rootfs | 10 exec/天, 0.5 CPU, 256MB | +| Pro | `default` | (无) | 100 exec/天, 1 CPU, 512MB | +| Enterprise | `network_extended` | (按需) | 无限, 2 CPU, 1GB, 自定义镜像 | + +### TOCTOU 配额修复 + +当前 `_enforce_workspace_quota` 的 TOCTOU 问题可通过两种方式解决: + +1. **预留式配额** (应用层): Redis `INCRBY` 预扣额度 → 执行 → 成功则扣减,失败则回滚 +2. **内核级限制** (Docker): `--storage-opt size=500m` 直接限制容器可写层大小 + +--- + +## 5. 优先实施路线 + +### Phase 1 (2-4 周): 安全基线 + +- [ ] WS relay 加 token 认证 +- [ ] 接入或删除 policy.py +- [x] ~~Box 加重连和心跳~~(已完成,见 [box-issues.md 已解决](./box-issues.md)) +- [ ] 审计日志持久化(至少写文件/数据库) +- [ ] `security.py` 加 `/` 拦截,考虑白名单 +- [ ] INIT 与 backend 初始化顺序整理(避免 backend 在配置到达前实例化) + +### Phase 2 (4-8 周): 多租户基础 + +- [ ] BoxSpec 加 `tenant_id` 字段 +- [ ] 容器 labels 加 tenant 标识 +- [ ] Redis 配额计数器(并发/执行次数/时间) +- [ ] RBAC 基础框架 +- [ ] 定时 session reaper + +### Phase 3 (8-16 周): 生产就绪 + +- [ ] Prometheus metrics exporter +- [ ] 前端 Box 状态面板 +- [ ] K8s backend 支持 (方案 B) +- [ ] 结构化日志 (JSON, trace_id) +- [ ] 水平扩展支持 diff --git a/docs/review/box-vs-plugin-runtime.md b/docs/review/box-vs-plugin-runtime.md new file mode 100644 index 000000000..622a425f5 --- /dev/null +++ b/docs/review/box-vs-plugin-runtime.md @@ -0,0 +1,221 @@ +# Box Runtime vs Plugin Runtime: 连接架构对比 + +> 更新日期: 2026-05-19 +> 分支: `feat/sandbox` (LangBot + langbot-plugin-sdk) + +--- + +## 1. 总体差异 + +| 维度 | Plugin Runtime | Box Runtime | +|------|---------------|-------------| +| **继承关系** | `PluginRuntimeConnector(ManagedRuntimeConnector)` | `BoxRuntimeConnector`(独立类) | +| **传输分支** | 3 条 (Docker/WS, Win32/subprocess+WS, Unix/stdio) | 3 条 (本地 stdio, Win32/subprocess+WS, 远程 WS) | +| **心跳** | 20s ping loop | 20s ping loop(`_heartbeat_loop`) | +| **重连** | WS 模式: sleep 3s → re-initialize | 由 BoxService `_reconnect_loop` 处理,指数退避 | +| **Handler 类型** | `RuntimeConnectionHandler` (1132 行, 25+ action) | 基础 `Handler` + `BoxServerHandler`(SDK 端 25 action) | +| **Client 抽象** | Handler 即 API | 独立 `ActionRPCBoxClient` 封装 Handler | +| **启用/禁用** | `is_enable_plugin` 开关 | 无开关(可用/不可用由初始化结果决定) | +| **初始化失败** | 异常上抛 | 静默降级 `_available=False` | +| **Shutdown** | 直接杀进程 | RPC SHUTDOWN → 清理容器 → 再杀进程 | + +--- + +## 2. 传输决策 + +### Plugin: 3-路决策 + +```python +# pkg/plugin/connector.py:106-165 +if get_platform() == 'docker' or use_websocket_to_connect_plugin_runtime(): + # Docker/WS → ws://langbot_plugin_runtime:5400/control/ws +elif get_platform() == 'win32': + # Windows → 起子进程(无 pipe) + ws://localhost:5400/control/ws +else: + # Unix/Mac → StdioClientController(python -m langbot_plugin.cli rt -s) +``` + +### Box: 3-路决策 + +```python +# pkg/box/connector.py +if self._uses_websocket(): + if platform.get_platform() == 'win32' and not self.configured_runtime_url: + await self._start_subprocess_then_ws() # subprocess + ws://localhost:5410/rpc/ws + else: + await self._connect_remote_ws() # ws://{host}:5410/rpc/ws +else: + await self._start_local_stdio() # StdioClientController +``` + +> 历史:2026-04-16 版本本文档曾把 Box 描述为 2 路决策(缺 Windows 分支)。现已对齐 Plugin 的 3 路设计。 + +### 决策矩阵 + +| 环境 | Plugin | Box | +|------|--------|-----| +| Docker | WS → `:5400` | WS → `:5410/rpc/ws` | +| `--standalone-box` | N/A | WS → `localhost:5410/rpc/ws` | +| Windows 非 Docker | subprocess + WS (`:5400`) | subprocess + WS (`localhost:5410/rpc/ws`) | +| Unix/Mac 非 Docker | stdio | stdio | +| 手动配置 URL | 通过配置项 | WS → 用户配置的 URL | + +--- + +## 3. 连接建立 + +### 同步模式差异 + +**Plugin**: `new_connection_callback` 内直接 ping + await handler_task,`initialize()` 通过 `create_task()` 异步启动,不阻塞等待连接。 + +**Box**: 使用 `asyncio.Event` + `wait_for(timeout=30s)` 模式,`initialize()` 同步等待连接成功或超时。 + +### Box stdio 路径 + +``` +connector._start_local_stdio() + ├─ connected = asyncio.Event() + ├─ ctrl = StdioClientController(python, ['-m', 'langbot_plugin.cli.__init__', 'box', '-s', '--ws-control-port', N]) + ├─ _ctrl_task = create_task(ctrl.run(callback)) + │ callback: + │ handler = Handler(connection) ← 基础 Handler, 无 disconnect_callback + │ client.set_handler(handler) + │ _handler_task = create_task(handler.run()) + │ call_action(PING, {}) ← 握手, timeout=15s + │ connected.set() ← 通知外层 + │ await _handler_task ← 阻塞直到断开 + └─ await wait_for(connected.wait(), 30s) ← 同步等待 +``` + +### Plugin stdio 路径 + +``` +connector.initialize() + ├─ ctrl = StdioClientController(python, ['-m', 'langbot_plugin.cli', 'rt', '-s']) + ├─ task = ctrl.run(callback) + │ callback: + │ disconnect_callback: + │ [WS] → runtime_disconnect_callback → 重连 + │ [stdio] → 仅日志, 不重连 + │ handler = RuntimeConnectionHandler(conn, disconnect_cb, ap) + │ create_task(handler.run()) + │ handler.ping() ← 握手, timeout=10s + │ await handler_task ← 阻塞直到断开 + ├─ create_task(heartbeat_loop()) ← 20s ping loop + └─ create_task(task) ← 不等待连接 +``` + +--- + +## 4. 心跳与重连 + +### 心跳 + +| 维度 | Plugin | Box | +|------|--------|-----| +| 有心跳? | 是 | 是(`connector.py` `_heartbeat_loop`) | +| 间隔 | 20s | 20s | +| 失败处理 | 仅 DEBUG 日志,不触发重连 | 仅 DEBUG 日志,依赖 connection close 触发重连 | +| 生命周期 | 整个应用生命周期 | 连接建立后启动;`dispose()` 时 cancel | + +### 重连 + +| 维度 | Plugin | Box | +|------|--------|-----| +| Docker/WS 断开 | `runtime_disconnect_callback` → sleep 3s → re-initialize | `runtime_disconnect_callback` → `BoxService._reconnect_loop()`(指数退避) | +| WS 连接失败 | 同上 | 同上;初次失败时 `_available=False`,重连成功后恢复 | +| stdio 断开 | 仅日志,不重连 | 接同样回调;stdio 重连需重新 fork 子进程 | +| 重连退避 | 固定 3s,无 backoff | 指数退避 | + +> 历史:2026-04-16 版本本文档曾把心跳与重连标记为 Box 缺失。这两项已在 commit `2dfd9d5d` / `c6882cf` / `5029d9c` 等修复(详见 [box-issues.md 已解决](./box-issues.md))。 + +--- + +## 5. 共享 IO 层 + +两者复用同一套 SDK IO 基础设施: + +``` +Handler ← ABC (runtime/io/handler.py) + ├── RuntimeConnectionHandler (Plugin 用, LangBot 侧) + ├── ControlConnectionHandler (Plugin 用, SDK 侧) + ├── BoxServerHandler (Box 用, SDK 侧) + └── 匿名 Handler 实例 (Box 用, LangBot 侧) + +Connection ← ABC + ├── StdioConnection (stdio: 16KB chunks, 应用层分帧协议) + └── WebSocketConnection (WS: 64KB chunks, 原生 WS 分帧) + +Controller ← ABC + ├── StdioClientController (fork 子进程, pipe stdin/stdout) + ├── StdioServerController (接管当前进程 stdin/stdout) + ├── WebSocketClientController (连接 WS 服务端) + └── WebSocketServerController (监听 WS 端口) +``` + +共享的核心机制: +- `call_action()` / `call_action_generator()` — RPC 调用/流式调用 +- `ActionRequest` / `ActionResponse` — 请求/响应协议 +- `seq_id` 关联 — 并发请求复用单连接 +- `CommonAction.PING` — 两者都用于初始握手 +- 文件传输 (`send_file`) — Plugin 用,Box 不用 + +--- + +## 6. 端口方案 + +| 服务 | Plugin | Box | +|------|--------|-----| +| Action RPC (stdio) | stdin/stdout | stdin/stdout | +| Action RPC (WS) | `:5400` | `:5410/rpc/ws` | +| 辅助服务 | debug WS `:5401` | managed process WS relay `:5410/v1/sessions/{id}/managed-process/ws` | + +**Box 特点**: 单端口 aiohttp 服务(默认 5410),通过路径区分 Action RPC 和 managed process relay。即使在 stdio 模式,也在 `:5410` 启动 aiohttp 用于 managed process attach。Plugin 在 stdio 模式不开额外端口。 + +--- + +## 7. 销毁对比 + +### Plugin + +```python +dispose(): + if stdio: ctrl.process.terminate() + _dispose_subprocess() # Windows 子进程 + heartbeat_task.cancel() +``` + +### Box + +```python +connector.dispose(): + _handler_task.cancel() + _ctrl_task.cancel() + _subprocess.terminate() + +service.dispose(): + connector.dispose() + loop.create_task(client.shutdown()) # RPC SHUTDOWN → 清理所有容器 +``` + +Box 的 RPC SHUTDOWN 确保容器被正确停止,不会成为孤儿。Plugin 直接杀进程。 + +--- + +## 8. 改进建议 + +### P0 + +1. **两者都加 WS 认证**: 至少 token 认证(INIT 时下发,连接时校验) + +### P1 + +2. **考虑 Box 继承 ManagedRuntimeConnector**: 复用 `_start_runtime_subprocess` / `_wait_until_ready` / `_dispose_subprocess`,减少重复代码 +3. **Plugin 重连加退避**: 固定 3s 无 backoff 可能造成日志洪水,建议向 Box 的指数退避看齐 +4. **统一连接管理模式**: Event-based (Box) vs direct-await (Plugin),考虑收敛为一种 + +### 已完成(自上一轮) + +- ~~Box 加重连~~(commit `2dfd9d5d`) +- ~~Box 加心跳~~(20s loop 与 Plugin 一致) +- ~~Box 加 Windows 支持~~(commit `120817a` / `fafb7a4`) diff --git a/pyproject.toml b/pyproject.toml index 8c5fe6512..bbac52046 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "langbot" -version = "4.9.7" +version = "4.10.0-beta.1" description = "Production-grade platform for building agentic IM bots" readme = "README.md" license-files = ["LICENSE"] @@ -70,7 +70,7 @@ dependencies = [ "chromadb>=1.0.0,<2.0.0", "qdrant-client (>=1.15.1,<2.0.0)", "pyseekdb==1.1.0.post3", - "langbot-plugin==0.3.11", + "langbot-plugin==0.4.0b1", "asyncpg>=0.30.0", "line-bot-sdk>=3.19.0", "matrix-nio>=0.25.2", @@ -223,4 +223,3 @@ skip-magic-trailing-comma = false # Like Black, automatically detect the appropriate line ending. line-ending = "auto" - diff --git a/src/langbot/__init__.py b/src/langbot/__init__.py index 9fa15e197..28a31d07e 100644 --- a/src/langbot/__init__.py +++ b/src/langbot/__init__.py @@ -1,3 +1,3 @@ """LangBot - Production-grade platform for building agentic IM bots""" -__version__ = '4.9.7' +__version__ = '4.10.0-beta.1' diff --git a/src/langbot/__main__.py b/src/langbot/__main__.py index b94500e75..485598296 100644 --- a/src/langbot/__main__.py +++ b/src/langbot/__main__.py @@ -5,6 +5,8 @@ import sys import os +from langbot.pkg.utils import paths + # ASCII art banner asciiart = r""" _ ___ _ @@ -27,6 +29,12 @@ async def main_entry(loop: asyncio.AbstractEventLoop): help='Use standalone plugin runtime / 使用独立插件运行时', default=False, ) + parser.add_argument( + '--standalone-box', + action='store_true', + help='Use standalone box runtime / 使用独立 Box 运行时', + default=False, + ) parser.add_argument('--debug', action='store_true', help='Debug mode / 调试模式', default=False) args = parser.parse_args() @@ -35,6 +43,11 @@ async def main_entry(loop: asyncio.AbstractEventLoop): platform.standalone_runtime = True + if args.standalone_box: + from langbot.pkg.utils import platform + + platform.standalone_box = True + if args.debug: from langbot.pkg.utils import constants @@ -87,7 +100,7 @@ def main(): # Set up the working directory # When installed as a package, we need to handle the working directory differently # We'll create data directory in current working directory if not exists - os.makedirs('data', exist_ok=True) + os.makedirs(paths.get_data_root(), exist_ok=True) loop = asyncio.new_event_loop() diff --git a/src/langbot/pkg/api/http/controller/groups/box.py b/src/langbot/pkg/api/http/controller/groups/box.py new file mode 100644 index 000000000..d39ced932 --- /dev/null +++ b/src/langbot/pkg/api/http/controller/groups/box.py @@ -0,0 +1,22 @@ +from __future__ import annotations + +from .. import group + + +@group.group_class('box', '/api/v1/box') +class BoxRouterGroup(group.RouterGroup): + async def initialize(self) -> None: + @self.route('/status', methods=['GET'], auth_type=group.AuthType.USER_TOKEN) + async def _() -> str: + status = await self.ap.box_service.get_status() + return self.success(data=status) + + @self.route('/sessions', methods=['GET'], auth_type=group.AuthType.USER_TOKEN) + async def _() -> str: + sessions = await self.ap.box_service.get_sessions() + return self.success(data=sessions) + + @self.route('/errors', methods=['GET'], auth_type=group.AuthType.USER_TOKEN) + async def _() -> str: + errors = self.ap.box_service.get_recent_errors() + return self.success(data=errors) diff --git a/src/langbot/pkg/api/http/controller/groups/extensions.py b/src/langbot/pkg/api/http/controller/groups/extensions.py new file mode 100644 index 000000000..ac8463c90 --- /dev/null +++ b/src/langbot/pkg/api/http/controller/groups/extensions.py @@ -0,0 +1,52 @@ +from __future__ import annotations + +import asyncio +import quart + +from .. import group + + +@group.group_class('extensions', '/api/v1/extensions') +class ExtensionsRouterGroup(group.RouterGroup): + """Unified API for installed extensions (plugins, MCP servers, skills).""" + + async def initialize(self) -> None: + @self.route('', methods=['GET'], auth_type=group.AuthType.USER_TOKEN_OR_API_KEY) + async def _() -> quart.Response: + plugins, mcp_servers, skills = await asyncio.gather( + self.ap.plugin_connector.list_plugins(), + self.ap.mcp_service.get_mcp_servers(contain_runtime_info=True), + self.ap.skill_service.list_skills(), + return_exceptions=True, + ) + + def _sort_key(item: dict) -> str: + if item['type'] == 'plugin': + return ( + item['plugin'] + .get('manifest', {}) + .get('manifest', {}) + .get('metadata', {}) + .get('name', '') + .lower() + ) + if item['type'] == 'mcp': + return (item['server'].get('name') or '').lower() + if item['type'] == 'skill': + return (item['skill'].get('display_name') or item['skill'].get('name') or '').lower() + return '' + + extensions: list[dict] = [] + if isinstance(plugins, list): + for plugin in plugins: + extensions.append({'type': 'plugin', 'plugin': plugin}) + if isinstance(mcp_servers, list): + for server in mcp_servers: + extensions.append({'type': 'mcp', 'server': server}) + if isinstance(skills, list): + for skill in skills: + extensions.append({'type': 'skill', 'skill': skill}) + + extensions.sort(key=_sort_key) + + return self.success(data={'extensions': extensions}) diff --git a/src/langbot/pkg/api/http/controller/groups/pipelines/pipelines.py b/src/langbot/pkg/api/http/controller/groups/pipelines/pipelines.py index e7fb61188..c6b2a1b43 100644 --- a/src/langbot/pkg/api/http/controller/groups/pipelines/pipelines.py +++ b/src/langbot/pkg/api/http/controller/groups/pipelines/pipelines.py @@ -73,15 +73,21 @@ async def _(pipeline_uuid: str) -> str: plugins = await self.ap.plugin_connector.list_plugins(component_kinds=pipeline_component_kinds) mcp_servers = await self.ap.mcp_service.get_mcp_servers(contain_runtime_info=True) + # Get available skills + available_skills = await self.ap.skill_service.list_skills() + extensions_prefs = pipeline.get('extensions_preferences', {}) return self.success( data={ 'enable_all_plugins': extensions_prefs.get('enable_all_plugins', True), 'enable_all_mcp_servers': extensions_prefs.get('enable_all_mcp_servers', True), + 'enable_all_skills': extensions_prefs.get('enable_all_skills', True), 'bound_plugins': extensions_prefs.get('plugins', []), 'available_plugins': plugins, 'bound_mcp_servers': extensions_prefs.get('mcp_servers', []), 'available_mcp_servers': mcp_servers, + 'bound_skills': extensions_prefs.get('skills', []), + 'available_skills': available_skills, } ) elif quart.request.method == 'PUT': @@ -89,11 +95,19 @@ async def _(pipeline_uuid: str) -> str: json_data = await quart.request.json enable_all_plugins = json_data.get('enable_all_plugins', True) enable_all_mcp_servers = json_data.get('enable_all_mcp_servers', True) + enable_all_skills = json_data.get('enable_all_skills', True) bound_plugins = json_data.get('bound_plugins', []) bound_mcp_servers = json_data.get('bound_mcp_servers', []) + bound_skills = json_data.get('bound_skills', []) await self.ap.pipeline_service.update_pipeline_extensions( - pipeline_uuid, bound_plugins, bound_mcp_servers, enable_all_plugins, enable_all_mcp_servers + pipeline_uuid, + bound_plugins, + bound_mcp_servers, + enable_all_plugins, + enable_all_mcp_servers, + bound_skills=bound_skills, + enable_all_skills=enable_all_skills, ) return self.success() diff --git a/src/langbot/pkg/api/http/controller/groups/pipelines/websocket_chat.py b/src/langbot/pkg/api/http/controller/groups/pipelines/websocket_chat.py index c85ecc779..ebe46b8fe 100644 --- a/src/langbot/pkg/api/http/controller/groups/pipelines/websocket_chat.py +++ b/src/langbot/pkg/api/http/controller/groups/pipelines/websocket_chat.py @@ -43,8 +43,12 @@ async def websocket_connect(pipeline_uuid: str): await quart.websocket.send(json.dumps({'type': 'error', 'message': 'WebSocket adapter not found'})) return - # Find the owning bot for this pipeline (e.g. a web_page_bot) - owner_bot = self._find_owner_bot(pipeline_uuid) + # Dashboard pipeline-debug sessions must always run under the + # built-in websocket_proxy_bot identity. We deliberately do NOT + # resolve a web_page_bot owner here — even if one is bound to + # the same pipeline, debug requests must not be attributed to + # it. The embed widget path (`/api/v1/embed//ws/connect`) + # is the one that carries the page-bot identity. # 注册连接 connection = await ws_connection_manager.add_connection( @@ -73,7 +77,7 @@ async def websocket_connect(pipeline_uuid: str): ) # 创建接收和发送任务 - receive_task = asyncio.create_task(self._handle_receive(connection, websocket_adapter, owner_bot)) + receive_task = asyncio.create_task(self._handle_receive(connection, websocket_adapter)) send_task = asyncio.create_task(self._handle_send(connection)) # 等待任务完成 @@ -181,14 +185,7 @@ async def broadcast_message(pipeline_uuid: str) -> str: except Exception as e: return self.http_status(500, -1, f'Internal server error: {str(e)}') - def _find_owner_bot(self, pipeline_uuid: str): - """Find a user-created bot (e.g. web_page_bot) that owns this pipeline.""" - for bot in self.ap.platform_mgr.bots: - if bot.bot_entity.adapter == 'web_page_bot' and bot.bot_entity.use_pipeline_uuid == pipeline_uuid: - return bot - return None - - async def _handle_receive(self, connection, websocket_adapter, owner_bot=None): + async def _handle_receive(self, connection, websocket_adapter): """处理接收消息的任务""" try: while connection.is_active: @@ -213,7 +210,10 @@ async def _handle_receive(self, connection, websocket_adapter, owner_bot=None): logger.debug(f'收到消息: {data} from {connection.connection_id}') # 处理消息(不等待响应,响应会通过broadcast异步发送) - await websocket_adapter.handle_websocket_message(connection, data, owner_bot=owner_bot) + # owner_bot is intentionally NOT passed: the dashboard + # debug WebSocket must always run under the proxy bot, + # never under a coincidentally-bound web_page_bot. + await websocket_adapter.handle_websocket_message(connection, data) elif message_type == 'disconnect': # 客户端主动断开 diff --git a/src/langbot/pkg/api/http/controller/groups/plugins.py b/src/langbot/pkg/api/http/controller/groups/plugins.py index 3de3e678b..05a8a2714 100644 --- a/src/langbot/pkg/api/http/controller/groups/plugins.py +++ b/src/langbot/pkg/api/http/controller/groups/plugins.py @@ -1,11 +1,15 @@ from __future__ import annotations import base64 +import io import quart import re import httpx import uuid import os +import zipfile +import yaml +from urllib.parse import urlparse import posixpath import sqlalchemy @@ -53,6 +57,97 @@ def _get_request_origin() -> str: @group.group_class('plugins', '/api/v1/plugins') class PluginsRouterGroup(group.RouterGroup): + @staticmethod + def _normalize_archive_path(path: str) -> str: + normalized = str(path or '').replace('\\', '/').strip('/') + return posixpath.normpath(normalized) if normalized else '' + + @classmethod + def _component_source_path(cls, entry) -> str: + if isinstance(entry, dict): + return cls._normalize_archive_path(entry.get('path') or '') + return cls._normalize_archive_path(str(entry or '')) + + @classmethod + def _count_component_configs(cls, component_config, archive_names: list[str]) -> int: + normalized_names = [cls._normalize_archive_path(name) for name in archive_names] + component_files: set[str] = set() + + if isinstance(component_config, list): + return len(component_config) + if not isinstance(component_config, dict): + return 1 if component_config else 0 + + for entry in component_config.get('fromFiles') or []: + source_path = cls._component_source_path(entry) + if source_path and source_path in normalized_names: + component_files.add(source_path) + + for entry in component_config.get('fromDirs') or []: + source_dir = cls._component_source_path(entry).rstrip('/') + if not source_dir: + continue + prefix = f'{source_dir}/' + for archive_name in normalized_names: + if not archive_name.startswith(prefix): + continue + if archive_name.lower().endswith(('.yaml', '.yml')): + component_files.add(archive_name) + + if component_files: + return len(component_files) + + return 1 if any(key in component_config for key in ('path', 'name', 'kind')) else 0 + + @classmethod + def _count_plugin_components(cls, components, archive_names: list[str]) -> dict[str, int]: + if not isinstance(components, dict): + return {} + + component_counts: dict[str, int] = {} + for kind, component_config in components.items(): + count = cls._count_component_configs(component_config, archive_names) + if count > 0: + component_counts[str(kind)] = count + return component_counts + + @staticmethod + def _parse_github_repo_url(repo_url: str) -> dict | None: + raw_url = str(repo_url or '').strip() + if not raw_url: + return None + + if not re.match(r'^[a-zA-Z][a-zA-Z0-9+.-]*://', raw_url): + raw_url = f'https://{raw_url}' + + parsed = urlparse(raw_url) + if parsed.netloc.lower() not in ('github.com', 'www.github.com'): + return None + + parts = [part for part in parsed.path.strip('/').split('/') if part] + if len(parts) < 2: + return None + + owner = parts[0] + repo = parts[1] + if repo.endswith('.git'): + repo = repo[:-4] + if not owner or not repo: + return None + + ref = '' + subdir = '' + if len(parts) >= 4 and parts[2] in ('tree', 'blob'): + ref = parts[3] + subdir = '/'.join(parts[4:]).strip('/') + + return { + 'owner': owner, + 'repo': repo, + 'ref': ref, + 'subdir': subdir, + } + async def _check_extensions_limit(self) -> str | None: """Check if extensions limit is reached. Returns error response if limit exceeded, None otherwise.""" limitation = self.ap.instance_config.data.get('system', {}).get('limitation', {}) @@ -254,17 +349,37 @@ async def _() -> str: data = await quart.request.json repo_url = data.get('repo_url', '') - # Parse GitHub repository URL to extract owner and repo - # Supports: https://github.com/owner/repo or github.com/owner/repo - pattern = r'github\.com/([^/]+)/([^/]+?)(?:\.git)?(?:/.*)?$' - match = re.search(pattern, repo_url) - - if not match: + parsed_repo = self._parse_github_repo_url(repo_url) + if not parsed_repo: return self.http_status(400, -1, 'Invalid GitHub repository URL') - owner, repo = match.groups() + owner = parsed_repo['owner'] + repo = parsed_repo['repo'] + requested_ref = parsed_repo['ref'] + requested_subdir = parsed_repo['subdir'] try: + if requested_ref: + return self.success( + data={ + 'releases': [ + { + 'id': 0, + 'tag_name': requested_ref, + 'name': requested_ref, + 'published_at': '', + 'prerelease': False, + 'draft': False, + 'source_type': 'branch', + 'archive_url': f'https://api.github.com/repos/{owner}/{repo}/zipball/{requested_ref}', + } + ], + 'owner': owner, + 'repo': repo, + 'source_subdir': requested_subdir, + } + ) + # Fetch releases from GitHub API url = f'https://api.github.com/repos/{owner}/{repo}/releases' async with httpx.AsyncClient( @@ -290,7 +405,14 @@ async def _() -> str: } ) - return self.success(data={'releases': formatted_releases, 'owner': owner, 'repo': repo}) + return self.success( + data={ + 'releases': formatted_releases, + 'owner': owner, + 'repo': repo, + 'source_subdir': requested_subdir, + } + ) except httpx.RequestError as e: return self.http_status(500, -1, f'Failed to fetch releases: {str(e)}') @@ -445,6 +567,62 @@ async def _() -> str: return self.success(data={'task_id': wrapper.id}) + @self.route('/install/local/preview', methods=['POST'], auth_type=group.AuthType.USER_TOKEN_OR_API_KEY) + async def _() -> str: + file = (await quart.request.files).get('file') + if file is None: + return self.http_status(400, -1, 'file is required') + + file_bytes = file.read() + try: + with zipfile.ZipFile(io.BytesIO(file_bytes)) as zf: + names = [name for name in zf.namelist() if not name.endswith('/')] + manifest_name = next( + ( + name + for name in names + if name.replace('\\', '/').strip('/').lower() in ('manifest.yaml', 'manifest.yml') + ), + None, + ) + if manifest_name is None: + return self.http_status(400, -1, 'manifest.yaml is required') + + manifest = yaml.safe_load(zf.read(manifest_name).decode('utf-8')) or {} + requirements: list[str] = [] + requirements_name = next( + (name for name in names if name.replace('\\', '/').strip('/').lower() == 'requirements.txt'), + None, + ) + if requirements_name is not None: + requirements = [ + line.strip() + for line in zf.read(requirements_name).decode('utf-8', errors='ignore').splitlines() + if line.strip() and not line.strip().startswith('#') + ] + + spec = manifest.get('spec') or {} + components = spec.get('components') or {} + component_counts = self._count_plugin_components(components, names) + component_types = list(component_counts.keys()) + + return self.success( + data={ + 'filename': file.filename or 'local plugin', + 'size': len(file_bytes), + 'manifest': manifest, + 'metadata': manifest.get('metadata') or {}, + 'component_types': component_types, + 'component_counts': component_counts, + 'requirements': requirements, + 'file_count': len(names), + } + ) + except zipfile.BadZipFile: + return self.http_status(400, -1, 'invalid .lbpkg file') + except Exception as exc: + return self.http_status(500, -1, f'Failed to preview plugin package: {exc}') + @self.route('/config-files', methods=['POST'], auth_type=group.AuthType.USER_TOKEN) async def _() -> str: """Upload a file for plugin configuration""" diff --git a/src/langbot/pkg/api/http/controller/groups/resources/mcp.py b/src/langbot/pkg/api/http/controller/groups/resources/mcp.py index ac91abffd..e6bc2e77d 100644 --- a/src/langbot/pkg/api/http/controller/groups/resources/mcp.py +++ b/src/langbot/pkg/api/http/controller/groups/resources/mcp.py @@ -31,6 +31,9 @@ async def _() -> str: @self.route('/servers/', methods=['GET', 'PUT', 'DELETE'], auth_type=group.AuthType.USER_TOKEN) async def _(server_name: str) -> str: """获取、更新或删除MCP服务器配置""" + from urllib.parse import unquote + + server_name = unquote(server_name) server_data = await self.ap.mcp_service.get_mcp_server_by_name(server_name) if server_data is None: @@ -57,6 +60,9 @@ async def _(server_name: str) -> str: @self.route('/servers//test', methods=['POST'], auth_type=group.AuthType.USER_TOKEN) async def _(server_name: str) -> str: """测试MCP服务器连接""" + from urllib.parse import unquote + + server_name = unquote(server_name) server_data = await quart.request.json task_id = await self.ap.mcp_service.test_mcp_server(server_name=server_name, server_data=server_data) return self.success(data={'task_id': task_id}) diff --git a/src/langbot/pkg/api/http/controller/groups/skills.py b/src/langbot/pkg/api/http/controller/groups/skills.py new file mode 100644 index 000000000..946741d76 --- /dev/null +++ b/src/langbot/pkg/api/http/controller/groups/skills.py @@ -0,0 +1,190 @@ +from __future__ import annotations + +import quart + +from langbot_plugin.box.errors import BoxError + +from .. import group + + +@group.group_class('skills', '/api/v1/skills') +class SkillsRouterGroup(group.RouterGroup): + """Skills management API endpoints.""" + + async def initialize(self) -> None: + @self.route('', methods=['GET', 'POST'], auth_type=group.AuthType.USER_TOKEN_OR_API_KEY) + async def list_or_create_skills() -> quart.Response: + if quart.request.method == 'GET': + try: + skills = await self.ap.skill_service.list_skills() + except (ValueError, BoxError) as exc: + return self.http_status(400, -1, str(exc)) + return self.success(data={'skills': skills}) + + data = await quart.request.json + if 'name' not in data or not data['name']: + return self.http_status(400, -1, 'Missing required field: name') + + try: + skill = await self.ap.skill_service.create_skill(data) + return self.success(data={'skill': skill}) + except (ValueError, BoxError) as exc: + return self.http_status(400, -1, str(exc)) + + @self.route('/', methods=['GET', 'PUT', 'DELETE'], auth_type=group.AuthType.USER_TOKEN_OR_API_KEY) + async def get_update_delete_skill(skill_name: str) -> quart.Response: + if quart.request.method == 'GET': + try: + skill = await self.ap.skill_service.get_skill(skill_name) + except (ValueError, BoxError) as exc: + return self.http_status(400, -1, str(exc)) + if not skill: + return self.http_status(404, -1, 'Skill not found') + return self.success(data={'skill': skill}) + + if quart.request.method == 'PUT': + data = await quart.request.json + try: + skill = await self.ap.skill_service.update_skill(skill_name, data) + return self.success(data={'skill': skill}) + except (ValueError, BoxError) as exc: + return self.http_status(400, -1, str(exc)) + + try: + await self.ap.skill_service.delete_skill(skill_name) + return self.success() + except (ValueError, BoxError) as exc: + return self.http_status(400, -1, str(exc)) + + @self.route('//files', methods=['GET'], auth_type=group.AuthType.USER_TOKEN_OR_API_KEY) + async def list_skill_files(skill_name: str) -> quart.Response: + """List files in skill package directory.""" + path = quart.request.args.get('path', '.').strip() + include_hidden = quart.request.args.get('include_hidden', 'false').lower() == 'true' + + try: + result = await self.ap.skill_service.list_skill_files( + skill_name, + path=path, + include_hidden=include_hidden, + ) + return self.success(data=result) + except (ValueError, BoxError) as exc: + return self.http_status(400, -1, str(exc)) + + @self.route( + '//files/', methods=['GET', 'PUT'], auth_type=group.AuthType.USER_TOKEN_OR_API_KEY + ) + async def read_or_write_skill_file(skill_name: str, path: str) -> quart.Response: + """Read or write a file in skill package.""" + if quart.request.method == 'GET': + try: + result = await self.ap.skill_service.read_skill_file(skill_name, path) + return self.success(data=result) + except (ValueError, BoxError) as exc: + return self.http_status(400, -1, str(exc)) + + # PUT - write file + data = await quart.request.json + content = data.get('content', '') + if content is None: + return self.http_status(400, -1, 'Missing required field: content') + + try: + result = await self.ap.skill_service.write_skill_file(skill_name, path, content) + return self.success(data=result) + except (ValueError, BoxError) as exc: + return self.http_status(400, -1, str(exc)) + + @self.route('//preview', methods=['GET'], auth_type=group.AuthType.USER_TOKEN_OR_API_KEY) + async def preview_skill(skill_name: str) -> quart.Response: + skill = self.ap.skill_mgr.get_skill_by_name(skill_name) + if not skill: + return self.http_status(404, -1, 'Skill not found') + return self.success(data={'instructions': skill.get('instructions', '')}) + + @self.route('/install/github', methods=['POST'], auth_type=group.AuthType.USER_TOKEN_OR_API_KEY) + async def install_skill_from_github() -> quart.Response: + data = await quart.request.json + required_fields = ['asset_url', 'owner', 'repo'] + for field in required_fields: + if field not in data or not data[field]: + return self.http_status(400, -1, f'Missing required field: {field}') + asset_url = str(data['asset_url']).strip().lower().split('?', 1)[0].split('#', 1)[0] + if not asset_url.endswith('skill.md') and not data.get('release_tag'): + return self.http_status(400, -1, 'Missing required field: release_tag') + + try: + skill = await self.ap.skill_service.install_from_github(data) + return self.success(data={'skills': skill}) + except (ValueError, BoxError) as exc: + return self.http_status(400, -1, str(exc)) + except Exception as exc: + return self.http_status(500, -1, f'Failed to install skill: {exc}') + + @self.route('/install/github/preview', methods=['POST'], auth_type=group.AuthType.USER_TOKEN_OR_API_KEY) + async def preview_skill_from_github() -> quart.Response: + data = await quart.request.json + required_fields = ['asset_url', 'owner', 'repo'] + for field in required_fields: + if field not in data or not data[field]: + return self.http_status(400, -1, f'Missing required field: {field}') + asset_url = str(data['asset_url']).strip().lower().split('?', 1)[0].split('#', 1)[0] + if not asset_url.endswith('skill.md') and not data.get('release_tag'): + return self.http_status(400, -1, 'Missing required field: release_tag') + + try: + preview = await self.ap.skill_service.preview_install_from_github(data) + return self.success(data={'skills': preview}) + except (ValueError, BoxError) as exc: + return self.http_status(400, -1, str(exc)) + except Exception as exc: + return self.http_status(500, -1, f'Failed to preview skill: {exc}') + + @self.route('/install/upload', methods=['POST'], auth_type=group.AuthType.USER_TOKEN_OR_API_KEY) + async def install_skill_from_upload() -> quart.Response: + file = (await quart.request.files).get('file') + if file is None: + return self.http_status(400, -1, 'file is required') + form = await quart.request.form + + try: + skill = await self.ap.skill_service.install_from_zip_upload( + file_bytes=file.read(), + filename=file.filename or '', + source_paths=form.getlist('source_paths'), + ) + return self.success(data={'skills': skill}) + except (ValueError, BoxError) as exc: + return self.http_status(400, -1, str(exc)) + except Exception as exc: + return self.http_status(500, -1, f'Failed to install skill: {exc}') + + @self.route('/install/upload/preview', methods=['POST'], auth_type=group.AuthType.USER_TOKEN_OR_API_KEY) + async def preview_skill_from_upload() -> quart.Response: + file = (await quart.request.files).get('file') + if file is None: + return self.http_status(400, -1, 'file is required') + + try: + preview = await self.ap.skill_service.preview_install_from_zip_upload( + file_bytes=file.read(), + filename=file.filename or '', + ) + return self.success(data={'skills': preview}) + except (ValueError, BoxError) as exc: + return self.http_status(400, -1, str(exc)) + except Exception as exc: + return self.http_status(500, -1, f'Failed to preview skill: {exc}') + + @self.route('/scan', methods=['GET'], auth_type=group.AuthType.USER_TOKEN_OR_API_KEY) + async def scan_skill_directory() -> quart.Response: + path = quart.request.args.get('path', '').strip() + if not path: + return self.http_status(400, -1, 'Missing required parameter: path') + + try: + result = await self.ap.skill_service.scan_directory_async(path) + return self.success(data=result) + except (ValueError, BoxError) as exc: + return self.http_status(400, -1, str(exc)) diff --git a/src/langbot/pkg/api/http/service/pipeline.py b/src/langbot/pkg/api/http/service/pipeline.py index 9175aba55..d7685fe43 100644 --- a/src/langbot/pkg/api/http/service/pipeline.py +++ b/src/langbot/pkg/api/http/service/pipeline.py @@ -215,6 +215,8 @@ async def update_pipeline_extensions( bound_mcp_servers: list[str] = None, enable_all_plugins: bool = True, enable_all_mcp_servers: bool = True, + bound_skills: list[str] = None, + enable_all_skills: bool = True, ) -> None: """Update the bound plugins and MCP servers for a pipeline""" # Get current pipeline @@ -232,9 +234,12 @@ async def update_pipeline_extensions( extensions_preferences = pipeline.extensions_preferences or {} extensions_preferences['enable_all_plugins'] = enable_all_plugins extensions_preferences['enable_all_mcp_servers'] = enable_all_mcp_servers + extensions_preferences['enable_all_skills'] = enable_all_skills extensions_preferences['plugins'] = bound_plugins if bound_mcp_servers is not None: extensions_preferences['mcp_servers'] = bound_mcp_servers + if bound_skills is not None: + extensions_preferences['skills'] = bound_skills await self.ap.persistence_mgr.execute_async( sqlalchemy.update(persistence_pipeline.LegacyPipeline) diff --git a/src/langbot/pkg/api/http/service/skill.py b/src/langbot/pkg/api/http/service/skill.py new file mode 100644 index 000000000..94b926975 --- /dev/null +++ b/src/langbot/pkg/api/http/service/skill.py @@ -0,0 +1,428 @@ +from __future__ import annotations + +import io +import inspect +import os +import posixpath +import zipfile +from typing import Optional +from urllib.parse import quote, unquote, urlparse + +import httpx + +from ....core import app +from ....skill.utils import parse_frontmatter + + +_PUBLIC_SKILL_FIELDS = ( + 'name', + 'display_name', + 'description', + 'instructions', + 'package_root', + 'created_at', + 'updated_at', +) + +_GITHUB_ASSET_HOSTS = { + 'github.com', + 'api.github.com', + 'objects.githubusercontent.com', + 'githubusercontent.com', + 'raw.githubusercontent.com', + 'codeload.github.com', +} + + +class SkillService: + """Filesystem-backed skill management service.""" + + ap: app.Application + + def __init__(self, ap: app.Application) -> None: + self.ap = ap + + def _box_service(self): + box_service = getattr(self.ap, 'box_service', None) + if box_service is not None and getattr(box_service, 'available', False): + return box_service + return None + + def _require_box(self, action: str): + """Return the Box service or raise if it is not available. + + Box is the only source of truth for skills. Every read and write + operation goes through it — there is no local-filesystem fallback. + """ + box_service = self._box_service() + if box_service is not None: + return box_service + ap_box = getattr(self.ap, 'box_service', None) + if ap_box is None: + reason = 'not initialised' + elif not getattr(ap_box, 'enabled', True): + reason = 'disabled in config (box.enabled = false)' + else: + connector_error = getattr(ap_box, '_connector_error', '') or 'currently unavailable' + reason = f'unavailable: {connector_error}' + raise ValueError( + f'{action} requires the Box runtime, which is {reason}. ' + f'Enable Box in config.yaml (box.enabled = true) and ensure the ' + f'runtime is reachable before retrying.' + ) + + def _require_box_for_write(self, action: str) -> None: + """Backwards-compatible alias preserved for clarity at call sites.""" + self._require_box(action) + + @staticmethod + def _serialize_skill(skill: dict) -> dict: + return {field: skill.get(field) for field in _PUBLIC_SKILL_FIELDS if field in skill} + + async def list_skills(self) -> list[dict]: + # When Box is unavailable, surface an empty list rather than raising — + # the skills page should render cleanly, and the UI separately renders + # a "Box disabled / unavailable" banner via useBoxStatus. + box_service = self._box_service() + if box_service is None: + return [] + return [self._serialize_skill(skill) for skill in await box_service.list_skills()] + + async def get_skill(self, skill_name: str) -> Optional[dict]: + box_service = self._box_service() + if box_service is None: + return None + skill = await box_service.get_skill(skill_name) + return self._serialize_skill(skill) if skill else None + + async def get_skill_by_name(self, name: str) -> Optional[dict]: + return await self.get_skill(name) + + async def create_skill(self, data: dict) -> dict: + box_service = self._require_box('Creating a skill') + created = await box_service.create_skill(data) + await self._reload_skills() + return self._serialize_skill(created) + + async def update_skill(self, skill_name: str, data: dict) -> dict: + box_service = self._require_box('Editing a skill') + updated = await box_service.update_skill(skill_name, data) + await self._reload_skills() + return self._serialize_skill(updated) + + async def delete_skill(self, skill_name: str) -> bool: + box_service = self._require_box('Deleting a skill') + await box_service.delete_skill(skill_name) + await self._reload_skills() + return True + + async def list_skill_files( + self, + skill_name: str, + path: str = '.', + include_hidden: bool = False, + max_entries: int = 200, + ) -> dict: + box_service = self._require_box('Browsing skill files') + return await box_service.list_skill_files(skill_name, path, include_hidden, max_entries) + + async def read_skill_file(self, skill_name: str, path: str) -> dict: + box_service = self._require_box('Reading a skill file') + return await box_service.read_skill_file(skill_name, path) + + async def write_skill_file(self, skill_name: str, path: str, content: str) -> dict: + box_service = self._require_box('Editing skill files') + result = await box_service.write_skill_file(skill_name, path, content) + await self._reload_skills() + return result + + async def install_from_github(self, data: dict) -> list[dict]: + box_service = self._require_box('Installing a skill from GitHub') + owner = str(data['owner']).strip() + repo = str(data['repo']).strip() + release_tag = str(data.get('release_tag', '')).strip() + raw_asset_url = str(data['asset_url']).strip() + if self._is_github_skill_md_url(raw_asset_url): + return await self._install_github_skill_md(raw_asset_url, owner=owner, repo=repo, data=data) + + asset_url = self._validate_github_asset_url(raw_asset_url, owner=owner, repo=repo, release_tag=release_tag) + source_subdir = str(data.get('source_subdir', '') or '').strip() + + zip_bytes = await self._download_github_asset(asset_url) + filename = f'{repo}-{release_tag.lstrip("v").replace("/", "-") or "source"}.zip' + installed = await box_service.install_skill_zip( + zip_bytes, + filename, + source_paths=data.get('source_paths') or [], + source_path=str(data.get('source_path', '') or ''), + source_subdir=source_subdir, + ) + await self._reload_skills() + return [self._serialize_skill(skill) for skill in installed] + + async def preview_install_from_github(self, data: dict) -> list[dict]: + box_service = self._require_box('Previewing a skill from GitHub') + owner = str(data['owner']).strip() + repo = str(data['repo']).strip() + release_tag = str(data.get('release_tag', '')).strip() + raw_asset_url = str(data['asset_url']).strip() + if self._is_github_skill_md_url(raw_asset_url): + return await self._preview_github_skill_md(raw_asset_url, owner=owner, repo=repo) + + asset_url = self._validate_github_asset_url(raw_asset_url, owner=owner, repo=repo, release_tag=release_tag) + source_subdir = str(data.get('source_subdir', '') or '').strip() + + zip_bytes = await self._download_github_asset(asset_url) + return await box_service.preview_skill_zip( + zip_bytes, + f'{repo}-{release_tag.lstrip("v").replace("/", "-") or "source"}.zip', + source_subdir=source_subdir, + ) + + async def install_from_zip_upload( + self, + *, + file_bytes: bytes, + filename: str, + source_paths: list[str] | None = None, + source_path: str = '', + ) -> list[dict]: + box_service = self._require_box('Installing a skill from upload') + installed = await box_service.install_skill_zip( + file_bytes, + filename, + source_paths=source_paths or [], + source_path=source_path, + ) + await self._reload_skills() + return [self._serialize_skill(skill) for skill in installed] + + async def preview_install_from_zip_upload(self, *, file_bytes: bytes, filename: str) -> list[dict]: + box_service = self._require_box('Previewing a skill upload') + return await box_service.preview_skill_zip(file_bytes, filename) + + async def _install_github_skill_md(self, asset_url: str, *, owner: str, repo: str, data: dict) -> list[dict]: + box_service = self._require_box('Installing a skill from GitHub') + zip_bytes, filename, _package_name = await self._download_github_skill_directory_as_zip( + asset_url, + owner=owner, + repo=repo, + ) + + installed = await box_service.install_skill_zip( + zip_bytes, + filename, + source_paths=data.get('source_paths') or [], + source_path=str(data.get('source_path', '') or ''), + target_suffix='', + ) + await self._reload_skills() + return [self._serialize_skill(skill) for skill in installed] + + async def _preview_github_skill_md(self, asset_url: str, *, owner: str, repo: str) -> list[dict]: + box_service = self._require_box('Previewing a skill from GitHub') + zip_bytes, _filename, package_name = await self._download_github_skill_directory_as_zip( + asset_url, + owner=owner, + repo=repo, + ) + return await box_service.preview_skill_zip(zip_bytes, f'{package_name}.zip', target_suffix='') + + async def reload_skills(self) -> list[dict]: + await self._reload_skills() + return await self.list_skills() + + async def scan_directory_async(self, path: str) -> dict: + box_service = self._require_box('Scanning a skill directory') + return await box_service.scan_skill_directory(path) + + async def _reload_skills(self) -> None: + skill_mgr = getattr(self.ap, 'skill_mgr', None) + reload_skills = getattr(skill_mgr, 'reload_skills', None) + if not callable(reload_skills): + return + result = reload_skills() + if inspect.isawaitable(result): + await result + + async def _download_github_asset(self, asset_url: str) -> bytes: + async with httpx.AsyncClient(follow_redirects=True, timeout=120) as client: + resp = await client.get(asset_url) + resp.raise_for_status() + return resp.content + + async def _download_github_skill_directory_as_zip( + self, asset_url: str, *, owner: str, repo: str + ) -> tuple[bytes, str, str]: + info = self._parse_github_skill_md_url(asset_url, owner=owner, repo=repo) + archive_url = f'https://codeload.github.com/{owner}/{repo}/zip/{quote(info["ref"], safe="/")}' + archive_bytes = await self._download_github_asset(archive_url) + + try: + source_archive = zipfile.ZipFile(io.BytesIO(archive_bytes), 'r') + except zipfile.BadZipFile as exc: + raise ValueError('GitHub repository archive must be a valid .zip archive') from exc + + with source_archive as source_zip: + skill_entry = self._find_github_skill_archive_entry(source_zip, info['file_path']) + try: + skill_md_content = source_zip.read(skill_entry).decode('utf-8') + except UnicodeDecodeError as exc: + raise ValueError('GitHub SKILL.md must be valid UTF-8 text') from exc + + package_name = self._resolve_github_skill_md_package_name(skill_md_content, info['package_name']) + source_skill_dir = posixpath.dirname(posixpath.normpath(skill_entry.filename)) + + buffer = io.BytesIO() + with zipfile.ZipFile(buffer, 'w', zipfile.ZIP_DEFLATED) as target_zip: + self._copy_github_skill_directory_to_zip(source_zip, target_zip, source_skill_dir, package_name) + return buffer.getvalue(), f'{package_name}.zip', package_name + + def _find_github_skill_archive_entry(self, archive: zipfile.ZipFile, file_path: str) -> zipfile.ZipInfo: + normalized_file_path = posixpath.normpath(file_path).lower() + for member in archive.infolist(): + if member.is_dir(): + continue + normalized_member = posixpath.normpath(member.filename) + path_parts = normalized_member.split('/', 1) + if len(path_parts) != 2: + continue + archive_relative_path = path_parts[1].lower() + if archive_relative_path == normalized_file_path: + return member + raise ValueError(f'GitHub archive does not contain requested SKILL.md: {file_path}') + + def _copy_github_skill_directory_to_zip( + self, + source_zip: zipfile.ZipFile, + target_zip: zipfile.ZipFile, + source_skill_dir: str, + package_name: str, + ) -> None: + normalized_source_dir = posixpath.normpath(source_skill_dir) + source_prefix = f'{normalized_source_dir}/' + copied_files = 0 + + for member in source_zip.infolist(): + normalized_member = posixpath.normpath(member.filename) + if normalized_member != normalized_source_dir and not normalized_member.startswith(source_prefix): + continue + + relative_path = posixpath.relpath(normalized_member, normalized_source_dir) + if relative_path in ('', '.'): + continue + if relative_path.startswith('../') or relative_path == '..' or posixpath.isabs(relative_path): + raise ValueError(f'GitHub archive contains an unsafe skill path: {member.filename}') + + target_name = f'{package_name}/{relative_path}' + if member.is_dir() and not target_name.endswith('/'): + target_name = f'{target_name}/' + target_info = zipfile.ZipInfo(target_name, date_time=member.date_time) + target_info.external_attr = member.external_attr + target_info.compress_type = zipfile.ZIP_DEFLATED + + if member.is_dir(): + target_zip.writestr(target_info, b'') + continue + + target_zip.writestr(target_info, source_zip.read(member)) + copied_files += 1 + + if copied_files == 0: + raise ValueError('GitHub skill directory is empty') + + def _uploaded_skill_target_stem(self, filename: str) -> str: + stem = os.path.splitext(os.path.basename(str(filename or '').strip()))[0] + safe_stem = ''.join(ch if ch.isalnum() or ch in ('-', '_') else '-' for ch in stem).strip('-_') + if not safe_stem: + safe_stem = 'uploaded-skill' + return safe_stem + + @staticmethod + def _is_github_skill_md_url(asset_url: str) -> bool: + parsed = urlparse(str(asset_url or '').strip()) + normalized_path = posixpath.normpath(parsed.path or '/') + return normalized_path.lower().endswith('/skill.md') + + def _parse_github_skill_md_url(self, asset_url: str, *, owner: str, repo: str) -> dict: + parsed = urlparse(str(asset_url or '').strip()) + if parsed.scheme != 'https' or not parsed.netloc: + raise ValueError('asset_url must be a valid HTTPS GitHub SKILL.md URL') + + host = parsed.netloc.lower() + path_parts = [unquote(part) for part in (parsed.path or '').split('/') if part] + if host == 'github.com': + if ( + len(path_parts) < 5 + or path_parts[0] != owner + or path_parts[1] != repo + or path_parts[2] + not in ( + 'blob', + 'raw', + ) + ): + raise ValueError('GitHub SKILL.md URL must point to the requested owner/repo blob path') + ref = path_parts[3] + file_path = '/'.join(path_parts[4:]) + elif host == 'raw.githubusercontent.com': + if len(path_parts) < 4 or path_parts[0] != owner or path_parts[1] != repo: + raise ValueError('GitHub SKILL.md URL must point to the requested owner/repo raw path') + ref = path_parts[2] + file_path = '/'.join(path_parts[3:]) + else: + raise ValueError('asset_url must point to a GitHub SKILL.md file') + + normalized_file_path = posixpath.normpath(file_path) + normalized_file_path_lower = normalized_file_path.lower() + if normalized_file_path_lower != 'skill.md' and not normalized_file_path_lower.endswith('/skill.md'): + raise ValueError('GitHub skill import requires a URL ending with SKILL.md') + + parent_dir = posixpath.basename(posixpath.dirname(normalized_file_path)) or repo + return { + 'ref': ref, + 'file_path': normalized_file_path, + 'package_name': self._uploaded_skill_target_stem(parent_dir), + } + + def _resolve_github_skill_md_package_name(self, content: str, fallback: str) -> str: + metadata, _instructions = parse_frontmatter(content) + candidate = str(metadata.get('name') or fallback or '').strip() + try: + return self._validate_skill_name(candidate) + except ValueError: + return self._validate_skill_name(fallback) + + @staticmethod + def _validate_github_asset_url(asset_url: str, *, owner: str, repo: str, release_tag: str) -> str: + parsed = urlparse(str(asset_url).strip()) + if parsed.scheme != 'https' or not parsed.netloc: + raise ValueError('asset_url must be a valid HTTPS GitHub asset URL') + + host = parsed.netloc.lower() + if host not in _GITHUB_ASSET_HOSTS: + raise ValueError('asset_url must point to a GitHub-hosted release asset or archive') + + normalized_path = posixpath.normpath(parsed.path or '/') + allowed_prefixes = [ + f'/repos/{owner}/{repo}/', + f'/{owner}/{repo}/', + ] + if not any(normalized_path.startswith(prefix) for prefix in allowed_prefixes): + raise ValueError('asset_url does not match the requested owner/repo') + + if release_tag and release_tag not in parsed.path and release_tag not in parsed.query: + raise ValueError('asset_url does not match the requested release_tag') + + return parsed.geturl() + + @staticmethod + def _validate_skill_name(name: str) -> str: + name = str(name or '').strip() + if not name: + raise ValueError('Skill name is required') + if not name.replace('-', '').replace('_', '').isalnum(): + raise ValueError('Skill name can only contain letters, numbers, hyphens and underscores') + if len(name) > 64: + raise ValueError('Skill name cannot exceed 64 characters') + return name diff --git a/src/langbot/pkg/box/__init__.py b/src/langbot/pkg/box/__init__.py new file mode 100644 index 000000000..de6394177 --- /dev/null +++ b/src/langbot/pkg/box/__init__.py @@ -0,0 +1,5 @@ +"""LangBot Box runtime package.""" + +from .workspace import BoxWorkspaceSession + +__all__ = ['BoxWorkspaceSession'] diff --git a/src/langbot/pkg/box/connector.py b/src/langbot/pkg/box/connector.py new file mode 100644 index 000000000..deda0b89f --- /dev/null +++ b/src/langbot/pkg/box/connector.py @@ -0,0 +1,354 @@ +from __future__ import annotations + +import asyncio +import json +import os +import sys +import typing +from typing import TYPE_CHECKING +from urllib.parse import urlparse + +from langbot_plugin.entities.io.actions.enums import CommonAction +from langbot_plugin.runtime.io.handler import Handler +from langbot_plugin.runtime.io.connection import Connection + +from langbot_plugin.box.client import ActionRPCBoxClient +from langbot_plugin.box.errors import BoxRuntimeUnavailableError +from langbot_plugin.box.actions import LangBotToBoxAction + +from ..utils import platform +from ..utils.managed_runtime import ManagedRuntimeConnector + +if TYPE_CHECKING: + from ..core import app as core_app + + +# Default Docker Compose service name for the standalone Box container. +_DOCKER_BOX_HOST = 'langbot_box' +_DEFAULT_PORT = 5410 + +_HEARTBEAT_INTERVAL_SEC = 20 + +# Top-level keys under ``box`` that are LangBot-internal and should not be +# forwarded to the Box runtime. +_INTERNAL_BOX_CONFIG_KEYS = frozenset({'runtime'}) + + +def _get_box_config(ap) -> dict: + """Return the 'box' section from instance config. + + Environment-variable overrides are handled uniformly by + ``LoadConfigStage._apply_env_overrides_to_config`` using the + ``SECTION__SUBSECTION__KEY`` convention (e.g. ``BOX__LOCAL__HOST_ROOT``, + ``BOX__LOCAL__ALLOWED_MOUNT_ROOTS="/a,/b"``) before this is read, so no + box-specific env parsing is needed here. + """ + instance_config = getattr(ap, 'instance_config', None) + config_data = getattr(instance_config, 'data', {}) if instance_config is not None else {} + return dict(config_data.get('box', {}) or {}) + + +def _get_runtime_endpoint(box_cfg: dict) -> str: + runtime_cfg = box_cfg.get('runtime') or {} + return str(runtime_cfg.get('endpoint', '')).strip() + + +def _filter_config_for_runtime(box_cfg: dict) -> dict: + return {k: v for k, v in box_cfg.items() if k not in _INTERNAL_BOX_CONFIG_KEYS} + + +def resolve_box_ws_relay_url(ap: core_app.Application) -> str: + """Derive the WS relay base URL used for managed-process attach. + + The WS relay serves the ``/v1/sessions/{id}/managed-process/ws`` endpoint + on the *relay* port (default 5410). + """ + box_cfg = _get_box_config(ap) + + # Explicit runtime endpoint takes precedence. The config value is a base + # URL; endpoint-specific paths are appended by the SDK client. + endpoint = _get_runtime_endpoint(box_cfg) + if endpoint: + parsed = urlparse(endpoint) + scheme = parsed.scheme or 'ws' + if scheme == 'ws': + scheme = 'http' + elif scheme == 'wss': + scheme = 'https' + host = parsed.hostname or '127.0.0.1' + port = parsed.port or _DEFAULT_PORT + return f'{scheme}://{host}:{port}' + + # In Docker, relay lives on the box runtime container. + if platform.get_platform() == 'docker': + return f'http://{_DOCKER_BOX_HOST}:{_DEFAULT_PORT}' + + return f'http://127.0.0.1:{_DEFAULT_PORT}' + + +class BoxRuntimeConnector(ManagedRuntimeConnector): + """Connect to the Box runtime via action RPC. + + Transport decision (mirrors Plugin runtime logic): + 1. Docker / --standalone-box / explicit runtime.endpoint -> WebSocket to external Box process + 2. Windows (non-Docker) -> subprocess + WebSocket (Windows lacks async stdio pipe) + 3. Unix / macOS -> subprocess + stdio pipe + """ + + def __init__( + self, + ap: core_app.Application, + runtime_disconnect_callback: typing.Callable[ + ['BoxRuntimeConnector'], typing.Coroutine[typing.Any, typing.Any, None] + ] + | None = None, + ): + super().__init__(ap) + self.runtime_disconnect_callback = runtime_disconnect_callback + self.configured_runtime_endpoint = self._load_configured_runtime_endpoint() + self.ws_relay_base_url = resolve_box_ws_relay_url(ap) + self.client = ActionRPCBoxClient(logger=ap.logger) + + self._handler: Handler | None = None + self._handler_task: asyncio.Task | None = None + self._ctrl_task: asyncio.Task | None = None + self._heartbeat_task: asyncio.Task | None = None + + # Parse the relay URL once for reuse. + parsed = urlparse(self.ws_relay_base_url) + self._relay_host = parsed.hostname or '127.0.0.1' + self._relay_port = parsed.port or _DEFAULT_PORT + self._filtered_box_config = _filter_config_for_runtime(_get_box_config(ap)) + + def _uses_websocket(self) -> bool: + """Whether the connector should use WebSocket to reach the Box runtime. + + True when: + - Running inside Docker (Box runtime is a separate container) + - The ``--standalone-box`` CLI flag was passed + - An explicit ``runtime.endpoint`` was configured + """ + return bool( + self.configured_runtime_endpoint + or platform.get_platform() == 'docker' + or platform.use_websocket_to_connect_box_runtime() + ) + + async def initialize(self) -> None: + if self._uses_websocket(): + if platform.get_platform() == 'win32' and not self.configured_runtime_endpoint: + await self._start_subprocess_then_ws() + else: + await self._connect_remote_ws() + else: + await self._start_local_stdio() + + # Start heartbeat after successful connection + if self._heartbeat_task is None: + self._heartbeat_task = asyncio.create_task(self._heartbeat_loop()) + + # -- heartbeat ----------------------------------------------------------- + + async def _heartbeat_loop(self) -> None: + """Periodically ping the Box runtime to detect silent disconnections.""" + while True: + await asyncio.sleep(_HEARTBEAT_INTERVAL_SEC) + try: + await self.ping() + self.ap.logger.debug('Heartbeat to Box runtime success.') + except Exception as e: + self.ap.logger.debug(f'Failed to heartbeat to Box runtime: {e}') + + async def ping(self) -> None: + if self._handler is None: + raise BoxRuntimeUnavailableError('Box runtime is not connected') + await self._handler.call_action(CommonAction.PING, {}) + + # -- transport paths ----------------------------------------------------- + + async def _start_local_stdio(self) -> None: + """Launch box server as subprocess and connect via stdio (Unix/macOS).""" + from langbot_plugin.runtime.io.controllers.stdio.client import StdioClientController + + self.ap.logger.info('Use stdio to connect to box runtime') + python_path = sys.executable + env = os.environ.copy() + if self._filtered_box_config: + env['LANGBOT_BOX_CONFIG'] = json.dumps(self._filtered_box_config) + + connected = asyncio.Event() + connect_error: list[Exception] = [] + + ctrl = StdioClientController( + command=python_path, + # Launched through the same CLI entry point as the plugin runtime + # (cli.__init__ ); `-s` selects the stdio transport, + # mirroring `rt -s`. + args=['-m', 'langbot_plugin.cli.__init__', 'box', '-s', '--ws-control-port', str(self._relay_port)], + env=env, + ) + self._ctrl_task = asyncio.create_task( + ctrl.run(self._make_connection_callback('stdio', connected, connect_error)) + ) + + try: + await asyncio.wait_for(connected.wait(), timeout=30.0) + except asyncio.TimeoutError: + raise BoxRuntimeUnavailableError('box runtime subprocess did not connect in time') + + if connect_error: + raise BoxRuntimeUnavailableError(f'box runtime connection failed: {connect_error[0]}') + + self._subprocess = ctrl.process + + async def _start_subprocess_then_ws(self) -> None: + """Launch box server as detached subprocess, then connect via WS (Windows).""" + self.ap.logger.info('(windows) Use cmd to launch box runtime and communicate via ws') + + env = os.environ.copy() + if self._filtered_box_config: + env['LANGBOT_BOX_CONFIG'] = json.dumps(self._filtered_box_config) + + python_path = sys.executable + # Launched through the same CLI entry point as the plugin runtime + # (cli.__init__ ); no flag => WebSocket transport. + self.runtime_subprocess = await asyncio.create_subprocess_exec( + python_path, + '-m', + 'langbot_plugin.cli.__init__', + 'box', + '--ws-control-port', + str(self._relay_port), + env=env, + ) + self.runtime_subprocess_task = asyncio.create_task(self.runtime_subprocess.wait()) + + ws_url = f'ws://localhost:{self._relay_port}/rpc/ws' + await self._connect_ws(ws_url, '(windows) WebSocket') + + async def _connect_remote_ws(self) -> None: + """Connect to a remote (or Docker) box server via WebSocket.""" + ws_url = self._resolve_rpc_ws_url() + self.ap.logger.info(f'Use WebSocket to connect to box runtime ({ws_url})') + await self._connect_ws(ws_url, 'WebSocket') + + # -- helpers ------------------------------------------------------------- + + def _resolve_rpc_ws_url(self) -> str: + """Determine the action-RPC WebSocket URL. + + All endpoints share a single port; action RPC is at ``/rpc/ws``. + """ + if self.configured_runtime_endpoint: + base = self.configured_runtime_endpoint.rstrip('/') + parsed = urlparse(base) + scheme = parsed.scheme or 'ws' + if scheme in ('http', 'https'): + scheme = 'wss' if scheme == 'https' else 'ws' + host = parsed.hostname or '127.0.0.1' + port = parsed.port or _DEFAULT_PORT + return f'{scheme}://{host}:{port}/rpc/ws' + + if platform.get_platform() == 'docker': + return f'ws://{_DOCKER_BOX_HOST}:{_DEFAULT_PORT}/rpc/ws' + + return f'ws://localhost:{self._relay_port}/rpc/ws' + + async def _connect_ws(self, ws_url: str, transport_name: str) -> None: + """Shared WebSocket connection procedure.""" + from langbot_plugin.runtime.io.controllers.ws.client import WebSocketClientController + + connected = asyncio.Event() + connect_error: list[Exception] = [] + + async def on_connect_failed(ctrl, exc): + if exc is not None: + self.ap.logger.error(f'Failed to connect to Box runtime ({ws_url}): {exc}') + else: + self.ap.logger.error(f'Failed to connect to Box runtime ({ws_url}), trying to reconnect...') + connect_error.append(exc or BoxRuntimeUnavailableError('ws connection failed')) + connected.set() + if self.runtime_disconnect_callback is not None: + await self.runtime_disconnect_callback(self) + + ctrl = WebSocketClientController(ws_url=ws_url, make_connection_failed_callback=on_connect_failed) + self._ctrl_task = asyncio.create_task( + ctrl.run(self._make_connection_callback(transport_name, connected, connect_error)) + ) + + try: + await asyncio.wait_for(connected.wait(), timeout=30.0) + except asyncio.TimeoutError: + raise BoxRuntimeUnavailableError(f'box runtime ws connection timed out ({ws_url})') + + if connect_error: + raise BoxRuntimeUnavailableError(f'box runtime connection failed: {connect_error[0]}') + + def _make_connection_callback( + self, + transport_name: str, + connected: asyncio.Event, + connect_error: list[Exception], + ): + async def new_connection_callback(connection: Connection) -> None: + handler = Handler(connection) + self._handler = handler + self.client.set_handler(handler) + self._handler_task = asyncio.create_task(handler.run()) + try: + await handler.call_action(CommonAction.PING, {}) + if self._filtered_box_config: + await handler.call_action(LangBotToBoxAction.INIT, self._filtered_box_config) + self.ap.logger.debug('Sent box configuration to Box runtime via INIT.') + self.ap.logger.info(f'Connected to Box runtime via {transport_name}.') + connected.set() + await self._handler_task + except Exception as exc: + if not connected.is_set(): + connect_error.append(exc) + connected.set() + return + + # If we reach here, handler.run() returned normally (connection + # closed) or raised after the initial handshake succeeded. + # Either way, treat it as a disconnect. + if connected.is_set(): + if self._uses_websocket(): + self.ap.logger.error('Disconnected from Box runtime, trying to reconnect...') + if self.runtime_disconnect_callback is not None: + await self.runtime_disconnect_callback(self) + else: + self.ap.logger.error( + 'Disconnected from Box runtime via stdio. ' + 'Cannot automatically reconnect — please restart LangBot.' + ) + + return new_connection_callback + + # -- lifecycle ----------------------------------------------------------- + + def dispose(self) -> None: + if self._heartbeat_task is not None: + self._heartbeat_task.cancel() + self._heartbeat_task = None + + if self._handler_task is not None: + self._handler_task.cancel() + self._handler_task = None + + if self._ctrl_task is not None: + self._ctrl_task.cancel() + self._ctrl_task = None + + # stdio-managed subprocess (stored as self._subprocess by _start_local_stdio) + if hasattr(self, '_subprocess') and self._subprocess is not None and self._subprocess.returncode is None: + self.ap.logger.info('Terminating managed box runtime process...') + self._subprocess.terminate() + + # Subprocess launched by ManagedRuntimeConnector._start_runtime_subprocess (Windows path) + self._dispose_subprocess() + + # -- config helpers ------------------------------------------------------ + + def _load_configured_runtime_endpoint(self) -> str: + return _get_runtime_endpoint(_get_box_config(self.ap)) diff --git a/src/langbot/pkg/box/policy.py b/src/langbot/pkg/box/policy.py new file mode 100644 index 000000000..15f4c45c9 --- /dev/null +++ b/src/langbot/pkg/box/policy.py @@ -0,0 +1,98 @@ +"""Three-layer security policy for LangBot Box. + +The design separates concerns into three independent layers, aligned with +OpenCode / OpenClaw patterns: + +1. **SandboxPolicy** – *where* tools run (host vs sandbox). +2. **ToolPolicy** – *which* tools are allowed (allow/deny lists). +3. **ElevatedPolicy** – *whether* a single exec call may temporarily + escape the default sandbox boundary. + +These three layers are orthogonal: +- ToolPolicy is a hard boundary; ``elevated`` cannot bypass a denied tool. +- SandboxPolicy decides the default execution location. +- ElevatedPolicy only affects ``exec`` and only when the framework allows it. +""" + +from __future__ import annotations + +import enum +from typing import Sequence + + +# ── Layer 1: Sandbox Policy ────────────────────────────────────────── + + +class SandboxMode(str, enum.Enum): + """Determines when agent execution is routed through the sandbox.""" + + OFF = 'off' + """Sandbox disabled; all exec runs on the host.""" + + NON_DEFAULT = 'non_default' + """Only non-default sessions are sandboxed (e.g. sub-agents, MCP).""" + + ALL = 'all' + """Every agent exec call is routed through the sandbox.""" + + +class SandboxPolicy: + """Decides whether a given execution context should use the sandbox.""" + + def __init__(self, mode: SandboxMode = SandboxMode.ALL): + self.mode = mode + + def should_sandbox(self, *, is_default_session: bool = True) -> bool: + if self.mode == SandboxMode.OFF: + return False + if self.mode == SandboxMode.ALL: + return True + # NON_DEFAULT: sandbox everything except the default session + return not is_default_session + + +# ── Layer 2: Tool Policy ───────────────────────────────────────────── + + +class ToolPolicy: + """Controls which tools are available to the current agent/session. + + Rules: + - ``deny`` always takes precedence over ``allow``. + - An empty ``allow`` list means "all tools allowed" (no allowlist filter). + - ``elevated`` cannot bypass a denied tool. + """ + + def __init__( + self, + allow: Sequence[str] = (), + deny: Sequence[str] = (), + ): + self._allow: frozenset[str] = frozenset(allow) + self._deny: frozenset[str] = frozenset(deny) + + def is_tool_allowed(self, tool_name: str) -> bool: + if tool_name in self._deny: + return False + if self._allow and tool_name not in self._allow: + return False + return True + + +# ── Layer 3: Elevated Policy ───────────────────────────────────────── + + +class ElevatedPolicy: + """Controls whether ``exec`` may request temporary privilege escalation. + + ``elevated`` only applies to the ``exec`` tool. It means "run this + command outside the default sandbox boundary" (e.g. with network, or + on the host). The framework decides whether to honor the request. + """ + + def __init__(self, *, allow_elevated: bool = False, require_approval: bool = True): + self.allow_elevated = allow_elevated + self.require_approval = require_approval + + def is_elevation_permitted(self) -> bool: + return self.allow_elevated diff --git a/src/langbot/pkg/box/service.py b/src/langbot/pkg/box/service.py new file mode 100644 index 000000000..13469634c --- /dev/null +++ b/src/langbot/pkg/box/service.py @@ -0,0 +1,794 @@ +from __future__ import annotations + +import asyncio +import collections +import datetime as _dt +import enum +import json +import os +from typing import TYPE_CHECKING + +import pydantic + +from langbot_plugin.box.client import BoxRuntimeClient +from .connector import BoxRuntimeConnector, _get_box_config +from langbot_plugin.box.errors import BoxError, BoxValidationError +from langbot_plugin.box.models import ( + BUILTIN_PROFILES, + BoxExecutionResult, + BoxManagedProcessInfo, + BoxManagedProcessSpec, + BoxProfile, + BoxSpec, +) + +_INT_ADAPTER = pydantic.TypeAdapter(int) +_UTC = _dt.timezone.utc +_MAX_RECENT_ERRORS = 50 +_MIB = 1024 * 1024 + + +def _is_path_under(path: str, root: str) -> bool: + """Check whether *path* equals *root* or is a child of *root*.""" + return path == root or path.startswith(f'{root}{os.sep}') + + +if TYPE_CHECKING: + from ..core import app as core_app + import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query + + +class BoxService: + def __init__( + self, + ap: core_app.Application, + client: BoxRuntimeClient | None = None, + output_limit_chars: int = 4000, + ): + self.ap = ap + self._enabled = self._load_enabled() + self._runtime_connector: BoxRuntimeConnector | None = None + if client is None: + # Always construct a connector — its __init__ is side-effect free + # (no I/O, no subprocess). When ``box.enabled = false`` we simply + # skip ``connector.initialize()`` so no connection is attempted. + self._runtime_connector = BoxRuntimeConnector(ap, runtime_disconnect_callback=self._on_runtime_disconnect) + client = self._runtime_connector.client + self.client = client + self.output_limit_chars = output_limit_chars + self.host_root = self._load_host_root() + self.allowed_mount_roots = self._load_allowed_mount_roots() + self.default_workspace = self._load_default_workspace() + self.profile = self._load_profile() + self.custom_image = self._load_custom_image() + self.workspace_quota_mb = self._load_workspace_quota_mb() + self._recent_errors: collections.deque[dict] = collections.deque(maxlen=_MAX_RECENT_ERRORS) + self._shutdown_task = None + self._available = False + self._connector_error: str = '' + self._reconnecting = False + + @property + def enabled(self) -> bool: + """Whether Box is enabled in config. False means the operator has + deliberately turned the sandbox off via ``box.enabled = false``. + Disabled and "enabled but unavailable" are reported as the same + ``available = False`` to consumers, but distinguished in get_status.""" + return self._enabled + + async def initialize(self): + self._ensure_default_workspace() + if not self._enabled: + # Disabled by config: do NOT connect to a remote runtime, do NOT + # fork a stdio subprocess. Every consumer of box_service should + # gate on ``available`` and degrade gracefully. + self._available = False + self._connector_error = 'Box runtime is disabled in config (box.enabled = false)' + self.ap.logger.info( + 'Box runtime disabled by config; sandbox features (exec/read/write/edit, ' + 'skill add/edit, stdio MCP) will be unavailable.' + ) + return + try: + if self._runtime_connector is not None: + await self._runtime_connector.initialize() + else: + await self.client.initialize() + self._available = True + self._connector_error = '' + self.ap.logger.info( + f'LangBot Box runtime initialized: profile={self.profile.name} ' + f'default_workspace={self.default_workspace or "(none)"}' + ) + except Exception as exc: + self.ap.logger.warning(f'LangBot Box runtime unavailable, sandbox features disabled: {exc}') + self._available = False + self._connector_error = str(exc) + + async def _on_runtime_disconnect(self, connector: BoxRuntimeConnector) -> None: + """Called by the connector when the Box runtime connection drops. + + Spawns a background reconnection loop so the caller is not blocked. + Skipped entirely when Box is disabled by config — that path should + never have connected in the first place. + """ + if not self._enabled: + return + if self._reconnecting: + return # Another reconnect loop is already running + self._reconnecting = True + self._available = False + self._connector_error = 'Disconnected from Box runtime' + self.ap.logger.warning('Box runtime disconnected, sandbox features temporarily disabled.') + asyncio.create_task(self._reconnect_loop(connector)) + + async def _reconnect_loop(self, connector: BoxRuntimeConnector) -> None: + """Retry reconnection with exponential backoff (3s → 60s max).""" + delay = 3 + max_delay = 60 + try: + while True: + self.ap.logger.info(f'Attempting to reconnect to Box runtime in {delay}s...') + await asyncio.sleep(delay) + try: + connector.dispose() + await connector.initialize() + self._available = True + self._connector_error = '' + self.ap.logger.info('Box runtime reconnected, sandbox features restored.') + return + except Exception as exc: + self._connector_error = str(exc) + self.ap.logger.warning(f'Box runtime reconnection failed: {exc}') + delay = min(delay * 2, max_delay) + finally: + self._reconnecting = False + + @property + def available(self) -> bool: + return self._available + + async def execute_spec_payload( + self, + spec_payload: dict, + query: pipeline_query.Query, + *, + skip_host_mount_validation: bool = False, + ) -> dict: + if not self._available: + raise BoxError('Box runtime is not available. Install and start Docker to use sandbox features.') + try: + spec = self.build_spec(spec_payload, skip_host_mount_validation=skip_host_mount_validation) + except BoxError as exc: + self._record_error(exc, query) + raise + self.ap.logger.info( + 'LangBot Box request: ' + f'query_id={query.query_id} ' + f'spec={json.dumps(self._summarize_spec(spec), ensure_ascii=False)}' + ) + try: + self._enforce_workspace_quota(spec, phase='before execution') + except BoxError as exc: + self._record_error(exc, query) + raise + try: + result = await self.client.execute(spec) + except BoxError as exc: + self._record_error(exc, query) + raise + try: + self._enforce_workspace_quota(spec, phase='after execution') + except BoxError as exc: + await self._cleanup_exceeded_session(spec) + self._record_error(exc, query) + raise + self.ap.logger.info( + 'LangBot Box result: ' + f'query_id={query.query_id} ' + f'summary={json.dumps(self._summarize_result(result), ensure_ascii=False)}' + ) + return self._serialize_result(result) + + def resolve_box_session_id(self, query: pipeline_query.Query) -> str: + """Resolve the Box session_id from the pipeline's template and query variables.""" + template = ( + (query.pipeline_config or {}) + .get('ai', {}) + .get('local-agent', {}) + .get('box-session-id-template', '{launcher_type}_{launcher_id}') + ) + variables = dict(query.variables or {}) + launcher_type = getattr(query, 'launcher_type', None) + if hasattr(launcher_type, 'value'): + launcher_type = launcher_type.value + launcher_id = getattr(query, 'launcher_id', None) + sender_id = getattr(query, 'sender_id', None) + query_id = getattr(query, 'query_id', None) + + variables.setdefault('query_id', str(query_id or 'unknown')) + variables.setdefault('launcher_type', str(launcher_type or 'query')) + variables.setdefault('launcher_id', str(launcher_id or query_id or 'unknown')) + variables.setdefault('sender_id', str(sender_id or launcher_id or query_id or 'unknown')) + variables.setdefault('global', 'global') + return template.format_map(collections.defaultdict(lambda: 'unknown', variables)) + + def build_skill_extra_mounts(self, query: pipeline_query.Query) -> list[dict]: + """Build extra_mounts entries for all pipeline-bound skills. + + This ensures that when a container is first created it already has + all skill packages mounted, regardless of which skill is currently + activated. + + Skills whose ``package_root`` is missing or no longer a directory on + the LangBot-visible filesystem are skipped with a warning instead of + being passed through to the backend. Without this guard the three + backends behave inconsistently on a stale mount: nsjail refuses to + start the sandbox (failing every exec in the session), Docker + silently auto-creates a root-owned empty directory on the host, and + E2B silently skips the upload — none of which surfaces an + actionable error to the agent or operator. + """ + skill_mgr = getattr(self.ap, 'skill_mgr', None) + if skill_mgr is None: + return [] + + from ..provider.tools.loaders import skill as skill_loader + + visible_skills = skill_loader.get_visible_skills(self.ap, query) + mounts: list[dict] = [] + for skill_name, skill_data in visible_skills.items(): + package_root = str(skill_data.get('package_root', '') or '').strip() + if not package_root: + continue + if not os.path.isdir(package_root): + self.ap.logger.warning( + f'Skill "{skill_name}" package_root missing on filesystem ' + f'({package_root}); skipping mount to prevent sandbox failures. ' + f'The skill cache may be stale — consider reloading skills.' + ) + continue + mounts.append( + { + 'host_path': package_root, + 'mount_path': f'/workspace/.skills/{skill_name}', + 'mode': 'rw', + } + ) + return mounts + + async def execute_tool(self, parameters: dict, query: pipeline_query.Query) -> dict: + """Execute an agent-facing ``exec`` tool call. + + Translates the agent-facing ``command`` field to the internal + ``BoxSpec.cmd`` field and injects the session id from the query. + """ + spec_payload: dict = {'cmd': parameters['command']} + + # Pass through allowed agent-facing fields + for key in ('workdir', 'timeout_sec', 'env'): + if key in parameters: + spec_payload[key] = parameters[key] + + # Inject context the agent must not control + spec_payload.setdefault('session_id', self.resolve_box_session_id(query)) + + # Mount all pipeline-bound skills so they are available in the container + if 'extra_mounts' not in spec_payload: + spec_payload['extra_mounts'] = self.build_skill_extra_mounts(query) + + return await self.execute_spec_payload(spec_payload, query) + + async def shutdown(self): + await self.client.shutdown() + + def dispose(self): + if self._runtime_connector is not None: + self._runtime_connector.dispose() + loop = getattr(self.ap, 'event_loop', None) + if loop is not None and not loop.is_closed() and (self._shutdown_task is None or self._shutdown_task.done()): + self._shutdown_task = loop.create_task(self.shutdown()) + + async def get_sessions(self) -> list[dict]: + if not self._available: + return [] + try: + return await self.client.get_sessions() + except Exception: + return [] + + def build_spec(self, spec_payload: dict, skip_host_mount_validation: bool = False) -> BoxSpec: + spec_payload = dict(spec_payload) + spec_payload.setdefault('env', {}) + if spec_payload.get('host_path') in (None, '') and self.default_workspace is not None: + spec_payload['host_path'] = self.default_workspace + if spec_payload.get('workspace_quota_mb') in (None, '') and self.workspace_quota_mb is not None: + spec_payload['workspace_quota_mb'] = self.workspace_quota_mb + + # Global custom image overrides profile default (but not caller-specified image) + if self.custom_image and 'image' not in spec_payload: + spec_payload['image'] = self.custom_image + + self._apply_profile(spec_payload) + + try: + spec = BoxSpec.model_validate(spec_payload) + except pydantic.ValidationError as exc: + first_error = exc.errors()[0] + raise BoxValidationError(first_error.get('msg', 'invalid box arguments')) from exc + + if not skip_host_mount_validation: + self._validate_host_mount(spec) + return spec + + async def create_session(self, spec_payload: dict, *, skip_host_mount_validation: bool = False) -> dict: + spec = self.build_spec(spec_payload, skip_host_mount_validation=skip_host_mount_validation) + return await self.client.create_session(spec) + + async def start_managed_process(self, session_id: str, process_payload: dict) -> BoxManagedProcessInfo: + process_spec = BoxManagedProcessSpec.model_validate(process_payload) + return await self.client.start_managed_process(session_id, process_spec) + + async def get_managed_process(self, session_id: str, process_id: str = 'default') -> BoxManagedProcessInfo: + return await self.client.get_managed_process(session_id, process_id) + + async def stop_managed_process(self, session_id: str, process_id: str = 'default') -> None: + return await self.client.stop_managed_process(session_id, process_id) + + def get_managed_process_websocket_url(self, session_id: str, process_id: str = 'default') -> str: + getter = getattr(self.client, 'get_managed_process_websocket_url', None) + if getter is None: + raise BoxValidationError('box runtime client does not support managed process websocket attach') + ws_relay_base_url = ( + self._runtime_connector.ws_relay_base_url + if self._runtime_connector is not None + else 'http://127.0.0.1:5410' + ) + return getter(session_id, ws_relay_base_url, process_id) + + async def list_skills(self) -> list[dict]: + return await self.client.list_skills() + + async def get_skill(self, name: str) -> dict | None: + return await self.client.get_skill(name) + + async def create_skill(self, skill: dict) -> dict: + return await self.client.create_skill(skill) + + async def update_skill(self, name: str, skill: dict) -> dict: + return await self.client.update_skill(name, skill) + + async def delete_skill(self, name: str) -> None: + await self.client.delete_skill(name) + + async def scan_skill_directory(self, path: str) -> dict: + return await self.client.scan_skill_directory(path) + + async def list_skill_files( + self, + name: str, + path: str = '.', + include_hidden: bool = False, + max_entries: int = 200, + ) -> dict: + return await self.client.list_skill_files(name, path, include_hidden, max_entries) + + async def read_skill_file(self, name: str, path: str) -> dict: + return await self.client.read_skill_file(name, path) + + async def write_skill_file(self, name: str, path: str, content: str) -> dict: + return await self.client.write_skill_file(name, path, content) + + async def preview_skill_zip( + self, + file_bytes: bytes, + filename: str, + source_subdir: str = '', + target_suffix: str = 'upload', + ) -> list[dict]: + return await self.client.preview_skill_zip(file_bytes, filename, source_subdir, target_suffix) + + async def install_skill_zip( + self, + file_bytes: bytes, + filename: str, + source_paths: list[str] | None = None, + source_path: str = '', + source_subdir: str = '', + target_suffix: str = 'upload', + ) -> list[dict]: + return await self.client.install_skill_zip( + file_bytes, + filename, + source_paths, + source_path, + source_subdir, + target_suffix, + ) + + def _serialize_result(self, result: BoxExecutionResult) -> dict: + stdout, stdout_truncated = self._truncate(result.stdout) + stderr, stderr_truncated = self._truncate(result.stderr) + + return { + 'session_id': result.session_id, + 'backend': result.backend_name, + 'status': result.status.value, + 'ok': result.ok, + 'exit_code': result.exit_code, + 'stdout': stdout, + 'stderr': stderr, + 'stdout_truncated': stdout_truncated, + 'stderr_truncated': stderr_truncated, + 'duration_ms': result.duration_ms, + } + + def _truncate(self, text: str) -> tuple[str, bool]: + if len(text) <= self.output_limit_chars: + return text, False + if self.output_limit_chars <= 0: + return '', True + + head_size = 0 + tail_size = 0 + notice = '' + # Recompute once the omitted count is known so the final payload + # stays within output_limit_chars even after adding the notice. + for _ in range(4): + omitted = max(len(text) - head_size - tail_size, 0) + notice = f'\n\n... [{omitted} characters truncated] ...\n\n' + available = self.output_limit_chars - len(notice) + if available <= 0: + return notice[: self.output_limit_chars], True + + new_head_size = int(available * 0.6) + new_tail_size = available - new_head_size + if new_head_size == head_size and new_tail_size == tail_size: + break + head_size = new_head_size + tail_size = new_tail_size + + head = text[:head_size] + tail = text[-tail_size:] if tail_size else '' + truncated = f'{head}{notice}{tail}' + return truncated[: self.output_limit_chars], True + + def _summarize_spec(self, spec: BoxSpec) -> dict: + cmd = spec.cmd.strip() + if len(cmd) > 400: + cmd = f'{cmd[:397]}...' + + return { + 'session_id': spec.session_id, + 'workdir': spec.workdir, + 'mount_path': spec.mount_path, + 'timeout_sec': spec.timeout_sec, + 'network': spec.network.value, + 'image': spec.image, + 'host_path': spec.host_path, + 'host_path_mode': spec.host_path_mode.value, + 'cpus': spec.cpus, + 'memory_mb': spec.memory_mb, + 'pids_limit': spec.pids_limit, + 'read_only_rootfs': spec.read_only_rootfs, + 'workspace_quota_mb': spec.workspace_quota_mb, + 'env_keys': sorted(spec.env.keys()), + 'cmd': cmd, + } + + def _summarize_result(self, result: BoxExecutionResult) -> dict: + stdout_preview = result.stdout[:200] + stderr_preview = result.stderr[:200] + if len(result.stdout) > 200: + stdout_preview = f'{stdout_preview}...' + if len(result.stderr) > 200: + stderr_preview = f'{stderr_preview}...' + + return { + 'session_id': result.session_id, + 'backend': result.backend_name, + 'status': result.status.value, + 'exit_code': result.exit_code, + 'duration_ms': result.duration_ms, + 'stdout_preview': stdout_preview, + 'stderr_preview': stderr_preview, + } + + def _local_config(self) -> dict: + """Return ``box.local`` from instance config. + + Environment overrides are applied uniformly by + ``LoadConfigStage._apply_env_overrides_to_config`` (e.g. + ``BOX__LOCAL__HOST_ROOT``) before this is read, so no box-specific + env parsing happens here. + """ + return dict(_get_box_config(self.ap).get('local') or {}) + + def _load_allowed_mount_roots(self) -> list[str]: + configured_roots = self._local_config().get('allowed_mount_roots', []) + # The unified env-override mechanism stores a brand-new key as a raw + # string when the key is absent from config.yaml. Accept a + # comma-separated string as well as a list so that + # ``BOX__LOCAL__ALLOWED_MOUNT_ROOTS="/a,/b"`` keeps working even when + # the config file has no ``box.local.allowed_mount_roots`` entry. + if isinstance(configured_roots, str): + configured_roots = [item.strip() for item in configured_roots.split(',') if item.strip()] + + normalized_roots: list[str] = [] + for root in configured_roots: + root_value = str(root).strip() + if not root_value: + continue + normalized_roots.append(os.path.realpath(os.path.abspath(root_value))) + + if not normalized_roots and self.host_root is not None: + normalized_roots.append(self.host_root) + + return normalized_roots + + def _load_host_root(self) -> str | None: + host_root = str(self._local_config().get('host_root', '')).strip() + if not host_root: + return None + return os.path.realpath(os.path.abspath(host_root)) + + def _load_default_workspace(self) -> str | None: + default_workspace = str(self._local_config().get('default_workspace', '')).strip() + if not default_workspace: + if self.host_root is None: + return None + default_workspace = os.path.join(self.host_root, 'default') + elif not os.path.isabs(default_workspace) and self.host_root is not None: + default_workspace = os.path.join(self.host_root, default_workspace) + return os.path.realpath(os.path.abspath(default_workspace)) + + def get_skills_root(self) -> str | None: + skills_root = str(self._local_config().get('skills_root', '') or 'skills').strip() + if not skills_root: + skills_root = 'skills' + if not os.path.isabs(skills_root) and self.host_root is not None: + skills_root = os.path.join(self.host_root, skills_root) + return os.path.realpath(os.path.abspath(skills_root)) + + def _load_enabled(self) -> bool: + """Read ``box.enabled`` (top-level, not ``box.local.*``). Default True + — disabling is opt-in. Accepts bool, ``'true'``/``'false'`` strings, + and the standard env-overridden truthy values that + ``LoadConfigStage._apply_env_overrides_to_config`` produces.""" + raw = _get_box_config(self.ap).get('enabled', True) + if isinstance(raw, bool): + return raw + return str(raw).strip().lower() not in ('false', '0', 'no', 'off', '') + + def _load_custom_image(self) -> str | None: + raw = str(self._local_config().get('image', '') or '').strip() + return raw or None + + def _load_workspace_quota_mb(self) -> int | None: + raw_value = self._local_config().get('workspace_quota_mb') + if raw_value in (None, ''): + return None + try: + value = _INT_ADAPTER.validate_python(raw_value) + except pydantic.ValidationError as exc: + raise BoxValidationError('workspace_quota_mb must be an integer greater than or equal to 0') from exc + if value < 0: + raise BoxValidationError('workspace_quota_mb must be greater than or equal to 0') + return value + + def _ensure_default_workspace(self): + if self.default_workspace is None: + return + + if os.path.isdir(self.default_workspace): + return + + if os.path.exists(self.default_workspace): + raise BoxValidationError('box.local.default_workspace must point to a directory on the host') + + if not self.allowed_mount_roots: + raise BoxValidationError( + 'box.local.default_workspace cannot be created because no allowed_mount_roots are configured' + ) + + for allowed_root in self.allowed_mount_roots: + if _is_path_under(self.default_workspace, allowed_root): + os.makedirs(self.default_workspace, exist_ok=True) + return + + allowed_roots = ', '.join(self.allowed_mount_roots) + raise BoxValidationError(f'box.local.default_workspace is outside allowed_mount_roots: {allowed_roots}') + + def _validate_host_mount(self, spec: BoxSpec): + if spec.host_path is None: + return + + host_path = os.path.realpath(spec.host_path) + if not os.path.isdir(host_path): + raise BoxValidationError('host_path must point to an existing directory on the host') + + if not self.allowed_mount_roots: + raise BoxValidationError('host_path mounting is disabled because no allowed_mount_roots are configured') + + for allowed_root in self.allowed_mount_roots: + if _is_path_under(host_path, allowed_root): + return + + allowed_roots = ', '.join(self.allowed_mount_roots) + raise BoxValidationError(f'host_path is outside allowed_mount_roots: {allowed_roots}') + + def _load_profile(self) -> BoxProfile: + profile_name = str(self._local_config().get('profile', 'default')).strip() or 'default' + + profile = BUILTIN_PROFILES.get(profile_name) + if profile is None: + available = ', '.join(sorted(BUILTIN_PROFILES)) + raise BoxValidationError(f"unknown box profile '{profile_name}', available profiles: {available}") + return profile + + def _apply_profile(self, params: dict): + """Merge profile defaults into *params* in-place, enforce locked fields and clamp timeout.""" + profile = self.profile + _PROFILE_FIELDS = ( + 'image', + 'network', + 'timeout_sec', + 'host_path_mode', + 'cpus', + 'memory_mb', + 'pids_limit', + 'read_only_rootfs', + 'workspace_quota_mb', + ) + + for field in _PROFILE_FIELDS: + profile_value = getattr(profile, field) + raw_value = profile_value.value if isinstance(profile_value, enum.Enum) else profile_value + + if field in profile.locked: + params[field] = raw_value + elif field not in params: + params[field] = raw_value + + timeout = params.get('timeout_sec') + try: + normalized_timeout = _INT_ADAPTER.validate_python(timeout) + except pydantic.ValidationError: + return + + if normalized_timeout > profile.max_timeout_sec: + params['timeout_sec'] = profile.max_timeout_sec + + def _get_workspace_size_bytes(self, root: str) -> int: + total = 0 + + def _walk(path: str): + nonlocal total + try: + with os.scandir(path) as entries: + for entry in entries: + try: + if entry.is_symlink(): + total += entry.stat(follow_symlinks=False).st_size + continue + if entry.is_dir(follow_symlinks=False): + _walk(entry.path) + continue + total += entry.stat(follow_symlinks=False).st_size + except FileNotFoundError: + continue + except FileNotFoundError: + return + + _walk(root) + return total + + def _enforce_workspace_quota(self, spec: BoxSpec, *, phase: str) -> None: + if spec.host_path is None or spec.workspace_quota_mb <= 0: + return + + host_path = os.path.realpath(spec.host_path) + if not os.path.isdir(host_path): + return + + used_bytes = self._get_workspace_size_bytes(host_path) + limit_bytes = spec.workspace_quota_mb * _MIB + if used_bytes <= limit_bytes: + return + + raise BoxValidationError( + f'workspace quota exceeded {phase}: ' + f'used={used_bytes} bytes limit={limit_bytes} bytes ' + f'host_path={host_path} session_id={spec.session_id}' + ) + + async def _cleanup_exceeded_session(self, spec: BoxSpec) -> None: + try: + await self.client.delete_session(spec.session_id) + except Exception as exc: + self.ap.logger.warning( + 'Failed to clean up Box session after workspace quota was exceeded: ' + f'session_id={spec.session_id} error={exc}' + ) + + # ── Observability ───────────────────────────────────────────────── + + def _record_error(self, exc: Exception, query: pipeline_query.Query): + self._recent_errors.append( + { + 'timestamp': _dt.datetime.now(_UTC).isoformat(), + 'type': type(exc).__name__, + 'message': str(exc), + 'query_id': str(query.query_id), + } + ) + + def get_recent_errors(self) -> list[dict]: + return list(self._recent_errors) + + def get_system_guidance(self) -> str: + """Return LLM system-prompt guidance for the exec tool. + + All execution-specific prompt text is kept here so that callers + (e.g. LocalAgentRunner) stay free of box domain knowledge. + """ + guidance = ( + 'When the exec tool is available, use it for exact calculations, statistics, structured data parsing, ' + 'and code execution instead of estimating mentally. If the user provides numbers, tables, CSV-like text, ' + 'JSON, or other data and asks for a computed answer, prefer running a short Python script via exec ' + 'and then answer from the tool result. Unless the user explicitly asks for the script, code, or implementation ' + 'details, do not include the generated script in the final answer; return the result and a brief explanation only.' + ) + if self.default_workspace: + guidance += ( + ' A default workspace is mounted at /workspace for file tasks. When the user asks to read, create, or ' + 'modify local files in the working directory, use exec with /workspace paths directly; do not ask the ' + 'user for directory parameters unless they explicitly need a different directory.' + ) + return guidance + + async def get_status(self) -> dict: + if not self._available: + return { + 'available': False, + 'enabled': self._enabled, + 'profile': self.profile.name, + 'recent_error_count': len(self._recent_errors), + 'connector_error': self._connector_error, + } + try: + runtime_status = await self.client.get_status() + except Exception as exc: + # RPC failed — the runtime likely just disconnected and the + # heartbeat hasn't flipped _available yet. + return { + 'available': False, + 'enabled': self._enabled, + 'profile': self.profile.name, + 'recent_error_count': len(self._recent_errors), + 'connector_error': str(exc), + } + # Backend state can be unavailable even when the connector is healthy + # (operator selected nsjail but the binary is missing, Docker daemon + # went down after the runtime started, E2B credentials wrong, ...). + # Report the combined state in the top-level ``available`` so the + # frontend banner / ``useBoxStatus`` hook / native-tool gate all + # agree on "actually usable" rather than "connector alive". The + # detailed ``backend`` object stays in the payload so the dialog + # can still show which backend was tried. + backend_info = runtime_status.get('backend') if isinstance(runtime_status, dict) else None + backend_ok = bool(backend_info and backend_info.get('available', False)) + payload = { + **runtime_status, + 'available': backend_ok, + 'enabled': self._enabled, + 'profile': self.profile.name, + 'recent_error_count': len(self._recent_errors), + } + if not backend_ok and 'connector_error' not in payload: + backend_name = backend_info.get('name') if backend_info else None + if backend_name: + payload['connector_error'] = f'Configured sandbox backend "{backend_name}" is unavailable' + else: + payload['connector_error'] = 'No supported sandbox backend (Docker / nsjail / E2B) is available' + return payload diff --git a/src/langbot/pkg/box/workspace.py b/src/langbot/pkg/box/workspace.py new file mode 100644 index 000000000..948622efb --- /dev/null +++ b/src/langbot/pkg/box/workspace.py @@ -0,0 +1,413 @@ +"""Reusable workspace/session helpers built on top of Box. + +This module is the middle layer between the raw Box runtime primitives and +application-specific flows such as skills or MCP stdio. + +It intentionally stays generic: +- path and virtualenv rewriting are workspace concerns +- Python project detection/bootstrap are workspace concerns +- session exec / managed-process helpers are workspace concerns + +Higher layers add their own semantics on top, for example: +- skills choose a stable per-skill session id and use repeated exec +- MCP stdio chooses how to prepare dependencies and attaches to a managed process +""" + +from __future__ import annotations + +import os +import textwrap +from typing import Any + +PYTHON_MANIFEST_FILES = ( + 'requirements.txt', + 'pyproject.toml', + 'setup.py', + 'setup.cfg', +) +_VENV_DIRS = frozenset({'.venv', 'venv', 'env', '.env'}) +_VENV_BIN_DIRS = frozenset({'bin', 'Scripts'}) + + +def normalize_host_path(path: str | None) -> str: + if path is None: + return '' + stripped = str(path).strip() + if not stripped: + return '' + return os.path.realpath(os.path.abspath(stripped)) + + +def rewrite_mounted_path(path: str, host_path: str | None, *, mount_path: str = '/workspace') -> str: + """Translate a host path into the path visible inside the sandbox mount.""" + if not host_path or not path: + return path + normalized_host = os.path.realpath(host_path) + normalized_path = os.path.realpath(path) + if normalized_path.startswith(normalized_host + '/'): + return mount_path + normalized_path[len(normalized_host) :] + if normalized_path == normalized_host: + return mount_path + return path + + +def unwrap_venv_path(directory: str) -> str: + """Collapse ``.../.venv/bin`` style paths back to the project root.""" + parts = directory.replace('\\', '/').split('/') + for i in range(len(parts) - 1, 0, -1): + if parts[i] in _VENV_BIN_DIRS and i >= 1: + venv_dir = parts[i - 1] + if venv_dir in _VENV_DIRS: + project_root = '/'.join(parts[: i - 1]) + return project_root if project_root else '/' + return directory + + +def infer_workspace_host_path(command: str, args: list[str] | None = None) -> str | None: + """Infer the project/workspace root from absolute command/arg paths.""" + candidates: list[str] = [] + for part in [command, *(args or [])]: + if not os.path.isabs(part): + continue + if os.path.exists(part): + directory = os.path.dirname(part) + candidates.append(os.path.realpath(unwrap_venv_path(directory))) + if not candidates: + return None + common = os.path.commonpath(candidates) + return common if common != '/' else None + + +def rewrite_venv_command(command: str, host_path: str | None, *, mount_path: str = '/workspace') -> str: + """Rewrite host venv interpreters to plain ``python`` inside the sandbox. + + Once a project is mounted into the sandbox, host virtualenv paths are no + longer valid. For those paths we intentionally drop down to ``python`` and + let the sandbox-side environment/bootstrap decide what interpreter to use. + """ + if not host_path or not command: + return command + normalized_host = os.path.realpath(host_path) + normalized_command = os.path.realpath(command) + if not normalized_command.startswith(normalized_host + '/'): + return command + rel = normalized_command[len(normalized_host) + 1 :] + parts = rel.replace('\\', '/').split('/') + if len(parts) >= 3 and parts[0] in _VENV_DIRS and parts[1] in _VENV_BIN_DIRS and parts[2].startswith('python'): + return 'python' + return rewrite_mounted_path(normalized_command, host_path, mount_path=mount_path) + + +def list_python_manifest_files(host_path: str | None) -> list[str]: + normalized_root = normalize_host_path(host_path) + if not normalized_root: + return [] + return [filename for filename in PYTHON_MANIFEST_FILES if os.path.isfile(os.path.join(normalized_root, filename))] + + +def classify_python_workspace(host_path: str | None) -> str | None: + """Return the generic Python workspace shape, without app-specific policy.""" + manifest_files = set(list_python_manifest_files(host_path)) + if not manifest_files: + return None + if {'pyproject.toml', 'setup.py', 'setup.cfg'} & manifest_files: + return 'package' + if 'requirements.txt' in manifest_files: + return 'requirements' + return None + + +def should_prepare_python_env(host_path: str | None) -> bool: + normalized_root = normalize_host_path(host_path) + if not normalized_root: + return False + if os.path.isdir(os.path.join(normalized_root, '.venv')): + return True + return bool(list_python_manifest_files(normalized_root)) + + +def wrap_python_command_with_env(command: str, *, mount_path: str = '/workspace') -> str: + """Wrap a command with a reusable sandbox-local Python env bootstrap. + + This is the generic "workspace is a Python project" path used by mutable + workspaces such as skills. Read-only installation strategies stay in the + higher-level caller because they are application policy, not workspace + semantics. + """ + bootstrap = textwrap.dedent( + f""" + set -e + + _LB_VENV_DIR="{mount_path}/.venv" + _LB_META_DIR="{mount_path}/.langbot" + _LB_META_FILE="$_LB_META_DIR/python-env.json" + _LB_LOCK_DIR="$_LB_META_DIR/python-env.lock" + _LB_TMP_DIR="{mount_path}/.tmp" + _LB_PIP_CACHE_DIR="{mount_path}/.cache/pip" + + mkdir -p "$_LB_META_DIR" "$_LB_TMP_DIR" "$_LB_PIP_CACHE_DIR" + export TMPDIR="$_LB_TMP_DIR" + export TEMP="$_LB_TMP_DIR" + export TMP="$_LB_TMP_DIR" + export PIP_CACHE_DIR="$_LB_PIP_CACHE_DIR" + + _lb_python_meta() {{ + python - <<'PY' + import hashlib + import json + import os + import sys + + root = "{mount_path}" + digest = hashlib.sha256() + manifest_files = [] + for rel in ("requirements.txt", "pyproject.toml", "setup.py", "setup.cfg"): + path = os.path.join(root, rel) + if not os.path.isfile(path): + continue + manifest_files.append(rel) + with open(path, "rb") as handle: + digest.update(rel.encode("utf-8")) + digest.update(b"\\0") + digest.update(handle.read()) + digest.update(b"\\0") + + print( + json.dumps( + {{ + "python_executable": sys.executable, + "python_version": list(sys.version_info[:3]), + "manifest_files": manifest_files, + "manifest_sha256": digest.hexdigest(), + }}, + sort_keys=True, + ) + ) + PY + }} + + _LB_CURRENT_META="$(_lb_python_meta)" + _LB_NEEDS_BOOTSTRAP=0 + + if [ ! -x "$_LB_VENV_DIR/bin/python" ]; then + _LB_NEEDS_BOOTSTRAP=1 + elif [ ! -f "$_LB_META_FILE" ]; then + _LB_NEEDS_BOOTSTRAP=1 + elif [ "$(cat "$_LB_META_FILE")" != "$_LB_CURRENT_META" ]; then + _LB_NEEDS_BOOTSTRAP=1 + fi + + if [ "$_LB_NEEDS_BOOTSTRAP" -eq 1 ]; then + _LB_LOCK_WAIT=0 + while ! mkdir "$_LB_LOCK_DIR" 2>/dev/null; do + if [ "$_LB_LOCK_WAIT" -ge 120 ]; then + echo "Timed out waiting for Python environment lock: $_LB_LOCK_DIR" >&2 + exit 1 + fi + sleep 1 + _LB_LOCK_WAIT=$((_LB_LOCK_WAIT + 1)) + done + + _lb_cleanup_lock() {{ + rmdir "$_LB_LOCK_DIR" >/dev/null 2>&1 || true + }} + trap _lb_cleanup_lock EXIT INT TERM + + _LB_CURRENT_META="$(_lb_python_meta)" + _LB_NEEDS_BOOTSTRAP=0 + if [ ! -x "$_LB_VENV_DIR/bin/python" ]; then + _LB_NEEDS_BOOTSTRAP=1 + elif [ ! -f "$_LB_META_FILE" ]; then + _LB_NEEDS_BOOTSTRAP=1 + elif [ "$(cat "$_LB_META_FILE")" != "$_LB_CURRENT_META" ]; then + _LB_NEEDS_BOOTSTRAP=1 + fi + + if [ "$_LB_NEEDS_BOOTSTRAP" -eq 1 ]; then + rm -rf "$_LB_VENV_DIR" + python -m venv "$_LB_VENV_DIR" + . "$_LB_VENV_DIR/bin/activate" + python -m pip install --upgrade pip setuptools wheel + if [ -f "{mount_path}/requirements.txt" ]; then + python -m pip install -r "{mount_path}/requirements.txt" + elif [ -f "{mount_path}/pyproject.toml" ] || [ -f "{mount_path}/setup.py" ] || [ -f "{mount_path}/setup.cfg" ]; then + python -m pip install "{mount_path}" + fi + printf '%s' "$_LB_CURRENT_META" > "$_LB_META_FILE" + fi + fi + + export VIRTUAL_ENV="$_LB_VENV_DIR" + export PATH="$_LB_VENV_DIR/bin:$PATH" + {command} + """ + ).strip() + return bootstrap + '\n' + + +class BoxWorkspaceSession: + """High-level handle for one reusable workspace-backed Box session. + + The Box runtime already understands sessions and managed processes. This + wrapper adds LangBot's workspace-centric view on top: a mounted host path, + a stable ``session_id``, optional environment defaults, and convenience + helpers for exec or long-running processes inside that workspace. + """ + + def __init__( + self, + box_service, + session_id: str, + *, + host_path: str | None = None, + host_path_mode: str = 'rw', + workdir: str = '/workspace', + env: dict[str, str] | None = None, + mount_path: str = '/workspace', + network: str | None = None, + read_only_rootfs: bool | None = None, + image: str | None = None, + cpus: float | None = None, + memory_mb: int | None = None, + pids_limit: int | None = None, + persistent: bool = False, + ): + self.box_service = box_service + self.session_id = session_id + self.host_path = host_path + self.host_path_mode = host_path_mode + self.workdir = workdir + self.env = dict(env or {}) + self.mount_path = mount_path + self.network = network + self.read_only_rootfs = read_only_rootfs + self.image = image + self.cpus = cpus + self.memory_mb = memory_mb + self.pids_limit = pids_limit + self.persistent = persistent + + def rewrite_path(self, path: str) -> str: + return rewrite_mounted_path(path, self.host_path, mount_path=self.mount_path) + + def rewrite_venv_command(self, command: str) -> str: + return rewrite_venv_command(command, self.host_path, mount_path=self.mount_path) + + def build_session_payload(self) -> dict[str, Any]: + # Keep this payload generic so callers can reuse the same workspace + # handle for plain exec, file-producing tasks, or managed processes. + payload: dict[str, Any] = { + 'session_id': self.session_id, + 'workdir': self.workdir, + 'env': self.env, + 'persistent': self.persistent, + } + if self.network is not None: + payload['network'] = self.network + if self.read_only_rootfs is not None: + payload['read_only_rootfs'] = self.read_only_rootfs + if self.host_path: + payload['host_path'] = self.host_path + payload['host_path_mode'] = self.host_path_mode + for key in ('image', 'cpus', 'memory_mb', 'pids_limit'): + value = getattr(self, key) + if value is not None: + payload[key] = value + return payload + + def build_exec_payload( + self, + cmd: str, + *, + workdir: str | None = None, + env: dict[str, str] | None = None, + timeout_sec: int | None = None, + ) -> dict[str, Any]: + # Exec payloads inherit the session-level workspace config, then layer + # per-call command/workdir/env overrides on top. + payload = self.build_session_payload() + payload['cmd'] = cmd + payload['workdir'] = workdir or self.workdir + if timeout_sec is not None: + payload['timeout_sec'] = timeout_sec + resolved_env = self.env if env is None else env + if resolved_env: + payload['env'] = resolved_env + elif 'env' in payload and not payload['env']: + payload.pop('env') + return payload + + async def execute_raw( + self, + cmd: str, + *, + workdir: str | None = None, + env: dict[str, str] | None = None, + timeout_sec: int | None = None, + ): + payload = self.build_exec_payload(cmd, workdir=workdir, env=env, timeout_sec=timeout_sec) + return await self.box_service.client.execute(self.box_service.build_spec(payload)) + + async def execute_for_query( + self, + query, + cmd: str, + *, + workdir: str | None = None, + env: dict[str, str] | None = None, + timeout_sec: int | None = None, + ) -> dict: + payload = self.build_exec_payload(cmd, workdir=workdir, env=env, timeout_sec=timeout_sec) + return await self.box_service.execute_spec_payload(payload, query) + + async def create_session(self): + return await self.box_service.create_session(self.build_session_payload()) + + def build_process_payload( + self, + command: str, + args: list[str] | None = None, + *, + env: dict[str, str] | None = None, + cwd: str = '/workspace', + ) -> dict[str, Any]: + # Managed processes run inside the same workspace model as one-shot + # execs, so path/venv rewriting is shared here. + normalized_command = command + normalized_args = list(args or []) + normalized_cwd = cwd + if self.host_path: + normalized_command = self.rewrite_venv_command(command) + normalized_args = [self.rewrite_path(arg) for arg in normalized_args] + normalized_cwd = self.rewrite_path(cwd) + return { + 'command': normalized_command, + 'args': normalized_args, + 'env': dict(env or {}), + 'cwd': normalized_cwd, + } + + async def start_managed_process( + self, + command: str, + args: list[str] | None = None, + *, + process_id: str = 'default', + env: dict[str, str] | None = None, + cwd: str = '/workspace', + ): + payload = self.build_process_payload(command, args, env=env, cwd=cwd) + payload['process_id'] = process_id + return await self.box_service.start_managed_process(self.session_id, payload) + + async def get_managed_process(self, process_id: str = 'default'): + return await self.box_service.get_managed_process(self.session_id, process_id) + + async def stop_managed_process(self, process_id: str = 'default') -> None: + await self.box_service.stop_managed_process(self.session_id, process_id) + + def get_managed_process_websocket_url(self, process_id: str = 'default') -> str: + return self.box_service.get_managed_process_websocket_url(self.session_id, process_id) + + async def cleanup(self) -> None: + await self.box_service.client.delete_session(self.session_id) diff --git a/src/langbot/pkg/core/app.py b/src/langbot/pkg/core/app.py index 7e5386cf5..6e91c2b0b 100644 --- a/src/langbot/pkg/core/app.py +++ b/src/langbot/pkg/core/app.py @@ -9,6 +9,7 @@ from ..platform.webhook_pusher import WebhookPusher from ..provider.session import sessionmgr as llm_session_mgr from ..provider.modelmgr import modelmgr as llm_model_mgr +from ..box import service as box_service_module from langbot.pkg.provider.tools import toolmgr as llm_tool_mgr from ..config import manager as config_mgr @@ -31,8 +32,8 @@ from ..api.http.service import apikey as apikey_service from ..api.http.service import webhook as webhook_service from ..api.http.service import monitoring as monitoring_service +from ..api.http.service import skill as skill_service from ..api.http.service import maintenance as maintenance_service - from ..discover import engine as discover_engine from ..storage import mgr as storagemgr from ..utils import logcache @@ -43,6 +44,7 @@ from ..vector import mgr as vectordb_mgr from ..telemetry import telemetry as telemetry_module from ..survey import manager as survey_module +from ..skill import manager as skill_mgr class Application: @@ -70,6 +72,7 @@ class Application: # TODO move to pipeline tool_mgr: llm_tool_mgr.ToolManager = None + box_service: box_service_module.BoxService = None # ======= Config manager ======= @@ -156,6 +159,10 @@ class Application: monitoring_service: monitoring_service.MonitoringService = None + skill_service: skill_service.SkillService = None + + skill_mgr: skill_mgr.SkillManager = None + maintenance_service: maintenance_service.MaintenanceService = None def __init__(self): @@ -301,7 +308,10 @@ def _get_positive_float_config(self, value, default: float, name: str) -> float: return parsed def dispose(self): - self.plugin_connector.dispose() + if self.plugin_connector is not None: + self.plugin_connector.dispose() + if self.box_service is not None: + self.box_service.dispose() async def print_web_access_info(self): """Print access webui tips""" diff --git a/src/langbot/pkg/core/boot.py b/src/langbot/pkg/core/boot.py index f866376bf..952a71685 100644 --- a/src/langbot/pkg/core/boot.py +++ b/src/langbot/pkg/core/boot.py @@ -62,4 +62,6 @@ def signal_handler(sig, frame): app_inst = await make_app(loop) await app_inst.run() except Exception: + if app_inst is not None: + app_inst.dispose() traceback.print_exc() diff --git a/src/langbot/pkg/core/stages/build_app.py b/src/langbot/pkg/core/stages/build_app.py index 3bb5ffd7a..a8d53b7b3 100644 --- a/src/langbot/pkg/core/stages/build_app.py +++ b/src/langbot/pkg/core/stages/build_app.py @@ -6,6 +6,7 @@ from ...utils import version, proxy from ...pipeline import pool, controller, pipelinemgr from ...pipeline import aggregator as message_aggregator +from ...box import service as box_service from ...plugin import connector as plugin_connector from ...command import cmdmgr from ...provider.session import sessionmgr as llm_session_mgr @@ -28,6 +29,8 @@ from ...api.http.service import apikey as apikey_service from ...api.http.service import webhook as webhook_service from ...api.http.service import monitoring as monitoring_service +from ...api.http.service import skill as skill_service +from ...skill import manager as skill_mgr from ...api.http.service import maintenance as maintenance_service from ...discover import engine as discover_engine from ...storage import mgr as storagemgr @@ -86,6 +89,9 @@ async def run(self, ap: app.Application): webhook_service_inst = webhook_service.WebhookService(ap) ap.webhook_service = webhook_service_inst + skill_service_inst = skill_service.SkillService(ap) + ap.skill_service = skill_service_inst + proxy_mgr = proxy.ProxyManager(ap) await proxy_mgr.initialize() ap.proxy_mgr = proxy_mgr @@ -129,6 +135,10 @@ async def run(self, ap: app.Application): await llm_session_mgr_inst.initialize() ap.sess_mgr = llm_session_mgr_inst + box_service_inst = box_service.BoxService(ap) + await box_service_inst.initialize() + ap.box_service = box_service_inst + llm_tool_mgr_inst = llm_tool_mgr.ToolManager(ap) await llm_tool_mgr_inst.initialize() ap.tool_mgr = llm_tool_mgr_inst @@ -149,6 +159,11 @@ async def run(self, ap: app.Application): msg_aggregator_inst = message_aggregator.MessageAggregator(ap) ap.msg_aggregator = msg_aggregator_inst + # Initialize skill manager + skill_mgr_inst = skill_mgr.SkillManager(ap) + await skill_mgr_inst.initialize() + ap.skill_mgr = skill_mgr_inst + rag_mgr_inst = rag_mgr.RAGManager(ap) await rag_mgr_inst.initialize() ap.rag_mgr = rag_mgr_inst diff --git a/src/langbot/pkg/pipeline/preproc/preproc.py b/src/langbot/pkg/pipeline/preproc/preproc.py index 83ddce893..8aa157506 100644 --- a/src/langbot/pkg/pipeline/preproc/preproc.py +++ b/src/langbot/pkg/pipeline/preproc/preproc.py @@ -32,6 +32,9 @@ async def process( ) -> entities.StageProcessResult: """Process""" selected_runner = query.pipeline_config['ai']['runner']['runner'] + include_skill_authoring = ( + selected_runner == 'local-agent' and getattr(self.ap, 'skill_service', None) is not None + ) session = await self.ap.sess_mgr.get_session(query) @@ -110,7 +113,11 @@ async def process( # Get bound plugins and MCP servers for filtering tools bound_plugins = query.variables.get('_pipeline_bound_plugins', None) bound_mcp_servers = query.variables.get('_pipeline_bound_mcp_servers', None) - query.use_funcs = await self.ap.tool_mgr.get_all_tools(bound_plugins, bound_mcp_servers) + query.use_funcs = await self.ap.tool_mgr.get_all_tools( + bound_plugins, + bound_mcp_servers, + include_skill_authoring=include_skill_authoring, + ) self.ap.logger.debug(f'Bound plugins: {bound_plugins}') self.ap.logger.debug(f'Bound MCP servers: {bound_mcp_servers}') @@ -121,7 +128,11 @@ async def process( if not query.use_funcs and query.variables.get('_fallback_model_uuids'): bound_plugins = query.variables.get('_pipeline_bound_plugins', None) bound_mcp_servers = query.variables.get('_pipeline_bound_mcp_servers', None) - query.use_funcs = await self.ap.tool_mgr.get_all_tools(bound_plugins, bound_mcp_servers) + query.use_funcs = await self.ap.tool_mgr.get_all_tools( + bound_plugins, + bound_mcp_servers, + include_skill_authoring=include_skill_authoring, + ) sender_name = '' @@ -237,4 +248,67 @@ async def process( query.prompt.messages = event_ctx.event.default_prompt query.messages = event_ctx.event.prompt + # =========== Skill awareness for the local-agent runner =========== + # The actual activation goes through the ``activate`` Tool Call so the + # LLM doesn't see full SKILL.md instructions until it commits to a + # skill (Claude Code's progressive disclosure). But the LLM still has + # to KNOW which skills exist to make that choice, so we: + # 1. resolve the pipeline's bound skills and stash them in + # ``query.variables['_pipeline_bound_skills']`` for downstream + # visibility checks (skill loader, native exec workdir); + # 2. inject a short ``Available Skills`` index (name + description + # only) into the system prompt. The contributor's original PR + # relied on this injection; without it the LLM never discovers + # the skills are there and just calls native tools instead. + if selected_runner == 'local-agent' and self.ap.skill_mgr: + pipeline_data = await self.ap.pipeline_service.get_pipeline(query.pipeline_uuid) + extensions_prefs = (pipeline_data or {}).get('extensions_preferences', {}) + enable_all_skills = extensions_prefs.get('enable_all_skills', True) + + if enable_all_skills: + bound_skills = None # None = all loaded skills are visible + else: + bound_skills = extensions_prefs.get('skills', []) + + query.variables['_pipeline_bound_skills'] = bound_skills + + skill_addition = self.ap.skill_mgr.build_skill_aware_prompt_addition( + bound_skills=bound_skills, + ) + if skill_addition: + # Append to the first system message; create one if the + # prompt has none. Handles both plain-string and + # content-element (list) message bodies. + if query.prompt.messages and query.prompt.messages[0].role == 'system': + head = query.prompt.messages[0] + if isinstance(head.content, str): + head.content = head.content + skill_addition + elif isinstance(head.content, list): + appended = False + for ce in head.content: + if getattr(ce, 'type', None) == 'text': + ce.text = (ce.text or '') + skill_addition + appended = True + break + if not appended: + head.content.append(provider_message.ContentElement(type='text', text=skill_addition)) + else: + query.prompt.messages.insert( + 0, + provider_message.Message(role='system', content=skill_addition.strip()), + ) + self.ap.logger.debug( + f'Skill index injected into system prompt: ' + f'pipeline={query.pipeline_uuid} ' + f'bound_skills={bound_skills or "all"} ' + f'loaded_skills={len(self.ap.skill_mgr.skills)}' + ) + else: + self.ap.logger.debug( + f'No skills available for prompt injection: ' + f'pipeline={query.pipeline_uuid} ' + f'loaded_skills={len(self.ap.skill_mgr.skills)} ' + f'bound_skills={bound_skills}' + ) + return entities.StageProcessResult(result_type=entities.ResultType.CONTINUE, new_query=query) diff --git a/src/langbot/pkg/pipeline/process/handler.py b/src/langbot/pkg/pipeline/process/handler.py index b70a8e043..989cb0b01 100644 --- a/src/langbot/pkg/pipeline/process/handler.py +++ b/src/langbot/pkg/pipeline/process/handler.py @@ -5,6 +5,7 @@ from ...core import app from .. import entities import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query +import langbot_plugin.api.entities.builtin.provider.message as provider_message class MessageHandler(metaclass=abc.ABCMeta): @@ -31,3 +32,29 @@ def cut_str(self, s: str) -> str: if len(s0) > 20 or '\n' in s: s0 = s0[:20] + '...' return s0 + + def format_result_log( + self, + result: provider_message.Message | provider_message.MessageChunk, + ) -> str | None: + if result.tool_calls: + tool_names = [tc.function.name for tc in result.tool_calls if tc.function and tc.function.name] + if tool_names: + return f'{result.role}: requested tools: {", ".join(tool_names)}' + return f'{result.role}: requested tool calls' + + content = result.content + if isinstance(content, str): + if not content.strip(): + return None + + if result.role == 'tool': + if content.startswith('err:'): + return f'tool error: {self.cut_str(content)}' + + return self.cut_str(result.readable_str()) + + if isinstance(content, list) and len(content) == 0: + return None + + return self.cut_str(result.readable_str()) diff --git a/src/langbot/pkg/pipeline/process/handlers/chat.py b/src/langbot/pkg/pipeline/process/handlers/chat.py index 87f8d8ce4..c81461fdb 100644 --- a/src/langbot/pkg/pipeline/process/handlers/chat.py +++ b/src/langbot/pkg/pipeline/process/handlers/chat.py @@ -113,9 +113,11 @@ async def handle( # This prevents memory overflow from thousands of log entries per conversation # First chunk uses INFO level to confirm connection establishment if chunk_count == 1: - self.ap.logger.info( - f'Conversation({query.query_id}) Streaming started: {self.cut_str(result.readable_str())}' - ) + summary = self.format_result_log(result) + if summary is not None: + self.ap.logger.info(f'Conversation({query.query_id}) Streaming started: {summary}') + else: + self.ap.logger.info(f'Conversation({query.query_id}) Streaming started') elif chunk_count % 10 == 0: self.ap.logger.debug( f'Conversation({query.query_id}) Streaming chunk {chunk_count}: {self.cut_str(result.readable_str())}' @@ -135,9 +137,9 @@ async def handle( async for result in runner.run(query): query.resp_messages.append(result) - self.ap.logger.info( - f'Conversation({query.query_id}) Response: {self.cut_str(result.readable_str())}' - ) + summary = self.format_result_log(result) + if summary is not None: + self.ap.logger.info(f'Conversation({query.query_id}) Response: {summary}') if result.content is not None: text_length += len(result.content) diff --git a/src/langbot/pkg/plugin/connector.py b/src/langbot/pkg/plugin/connector.py index 9e1b0ea8a..70c2591e0 100644 --- a/src/langbot/pkg/plugin/connector.py +++ b/src/langbot/pkg/plugin/connector.py @@ -18,6 +18,7 @@ from ..core import app from . import handler from ..utils import platform +from ..utils.managed_runtime import ManagedRuntimeConnector from langbot_plugin.runtime.io.controllers.stdio import ( client as stdio_client_controller, ) @@ -39,11 +40,9 @@ class PluginRuntimeNotConnectedError(RuntimeError): """Raised when plugin runtime operations are requested before connection.""" -class PluginRuntimeConnector: +class PluginRuntimeConnector(ManagedRuntimeConnector): """Plugin runtime connector""" - ap: app.Application - handler: handler.RuntimeConnectionHandler handler_task: asyncio.Task @@ -54,10 +53,6 @@ class PluginRuntimeConnector: ctrl: stdio_client_controller.StdioClientController | ws_client_controller.WebSocketClientController - runtime_subprocess_on_windows: asyncio.subprocess.Process | None = None - - runtime_subprocess_on_windows_task: asyncio.Task | None = None - runtime_disconnect_callback: typing.Callable[ [PluginRuntimeConnector], typing.Coroutine[typing.Any, typing.Any, None] ] @@ -72,7 +67,7 @@ def __init__( [PluginRuntimeConnector], typing.Coroutine[typing.Any, typing.Any, None] ], ): - self.ap = ap + super().__init__(ap) self.runtime_disconnect_callback = runtime_disconnect_callback self.is_enable_plugin = self.ap.instance_config.data.get('plugin', {}).get('enable', True) @@ -140,19 +135,7 @@ async def make_connection_failed_callback( # We have to launch runtime via cmd but communicate via ws. self.ap.logger.info('(windows) use cmd to launch plugin runtime and communicate via ws') - if self.runtime_subprocess_on_windows is None: # only launch once - python_path = sys.executable - env = os.environ.copy() - self.runtime_subprocess_on_windows = await asyncio.create_subprocess_exec( - python_path, - '-m', - 'langbot_plugin.cli.__init__', - 'rt', - env=env, - ) - - # hold the process - self.runtime_subprocess_on_windows_task = asyncio.create_task(self.runtime_subprocess_on_windows.wait()) + await self._start_runtime_subprocess('-m', 'langbot_plugin.cli.__init__', 'rt') ws_url = 'ws://localhost:5400/control/ws' @@ -236,6 +219,88 @@ def _inspect_plugin_package( return plugin_author, plugin_name + async def _install_mcp_from_marketplace( + self, + mcp_data: dict[str, Any], + task_context: taskmgr.TaskContext | None = None, + ): + """Install an MCP server from marketplace data.""" + from ..entity.persistence import mcp as persistence_mcp + import uuid + + config = mcp_data.get('config', {}) + url = config.get('url', '') + # Use __ instead of / to avoid URL routing issues with slashes + name = f'{mcp_data.get("author", "")}__{mcp_data.get("name", "")}' + + # Determine mode from URL + if 'sse' in url.lower(): + mode = 'sse' + elif url.startswith('http'): + mode = 'http' + else: + mode = 'stdio' + + # Build extra_args from config + extra_args = { + 'url': url, + 'timeout': config.get('timeout', 30), + 'sse_read_timeout': config.get('sse_read_timeout', 300), + } + + # Check if MCP server already exists + existing = await self.ap.persistence_mgr.execute_async( + sqlalchemy.select(persistence_mcp.MCPServer).where(persistence_mcp.MCPServer.name == name) + ) + if existing.scalar_one_or_none(): + self.ap.logger.info(f'MCP server {name} already exists, skipping installation') + return + + # Create MCP server record + server_uuid = str(uuid.uuid4()) + server_data = { + 'uuid': server_uuid, + 'name': name, + 'enable': True, + 'mode': mode, + 'extra_args': extra_args, + } + + await self.ap.persistence_mgr.execute_async(sqlalchemy.insert(persistence_mcp.MCPServer).values(server_data)) + + # Start the MCP server + result = await self.ap.persistence_mgr.execute_async( + sqlalchemy.select(persistence_mcp.MCPServer).where(persistence_mcp.MCPServer.uuid == server_uuid) + ) + server_entity = result.first() + if server_entity: + server_config = self.ap.persistence_mgr.serialize_model(persistence_mcp.MCPServer, server_entity) + if self.ap.tool_mgr.mcp_tool_loader: + mcp_task = asyncio.create_task(self.ap.tool_mgr.mcp_tool_loader.host_mcp_server(server_config)) + self.ap.tool_mgr.mcp_tool_loader._hosted_mcp_tasks.append(mcp_task) + + self.ap.logger.info(f'Installed MCP server {name} from marketplace') + + async def _install_skill_from_zip( + self, + file_bytes: bytes, + filename: str, + task_context: taskmgr.TaskContext | None = None, + ): + """Install a skill from marketplace ZIP data.""" + from ..api.http.service.skill import SkillService + + skill_service = SkillService(self.ap) + + self.ap.logger.info(f'Installing skill from marketplace ZIP ({len(file_bytes)} bytes)') + + # Install from ZIP using skill service + result = await skill_service.install_from_zip_upload( + file_bytes=file_bytes, + filename=filename + '.zip', + ) + self.ap.logger.info(f'Skill installed successfully: {result}') + def _build_plugin_startup_failure_message( self, plugin_author: str, @@ -298,6 +363,110 @@ async def install_plugin( plugin_author = install_info.get('plugin_author') plugin_name = install_info.get('plugin_name') + if install_source == PluginInstallSource.MARKETPLACE: + # Handle marketplace plugin/mcp/skill installation + plugin_author = install_info.get('plugin_author', '') + plugin_name = install_info.get('plugin_name', '') + space_url = ( + self.ap.instance_config.data.get('space', {}).get('url', 'https://space.langbot.app').rstrip('/') + ) + + # Try MCP endpoint first + async with httpx.AsyncClient(trust_env=True, timeout=15) as client: + mcp_resp = await client.get(f'{space_url}/api/v1/marketplace/mcps/{plugin_author}/{plugin_name}') + if mcp_resp.status_code == 200: + mcp_data = mcp_resp.json().get('data', {}).get('mcp', {}) + if mcp_data.get('config'): + # It's an MCP - create server locally + self.ap.logger.info(f'Installing MCP from marketplace: {plugin_author}/{plugin_name}') + if task_context: + task_context.set_current_action('installing mcp server') + await self._install_mcp_from_marketplace(mcp_data, task_context) + return + else: + raise Exception(f'MCP {plugin_author}/{plugin_name} has no config') + elif mcp_resp.status_code == 404: + # Try skill endpoint - download ZIP and install + self.ap.logger.info(f'Trying skill endpoint for: {plugin_author}/{plugin_name}') + if task_context: + task_context.set_current_action('checking skill marketplace') + + # Get skill detail to find version + skill_resp = await client.get( + f'{space_url}/api/v1/marketplace/skills/{plugin_author}/{plugin_name}' + ) + if skill_resp.status_code == 200: + self.ap.logger.info(f'Installing skill from marketplace: {plugin_author}/{plugin_name}') + if task_context: + task_context.set_current_action('installing skill from marketplace') + + # Download the skill ZIP (no version needed - uses latest) + if task_context: + task_context.set_current_action('downloading skill package') + + download_resp = await client.get( + f'{space_url}/api/v1/marketplace/skills/download/{plugin_author}/{plugin_name}' + ) + if download_resp.status_code != 200: + raise Exception( + f'Failed to download skill {plugin_author}/{plugin_name}: {download_resp.status_code}' + ) + + file_bytes = download_resp.content + file_size = len(file_bytes) + self.ap.logger.info(f'Downloaded skill ZIP ({file_size} bytes)') + + # Install skill from ZIP using skill service + await self._install_skill_from_zip(file_bytes, f'{plugin_author}-{plugin_name}', task_context) + return + elif skill_resp.status_code == 404: + # Try plugin endpoint - get versions and download + self.ap.logger.info(f'Trying plugin endpoint for: {plugin_author}/{plugin_name}') + if task_context: + task_context.set_current_action('checking plugin marketplace') + + # Get plugin versions to find latest + versions_resp = await client.get( + f'{space_url}/api/v1/marketplace/plugins/{plugin_author}/{plugin_name}/versions' + ) + if versions_resp.status_code == 200: + versions_data = versions_resp.json().get('data', {}).get('versions', []) + if versions_data: + latest_version = versions_data[0].get('version', '') + if latest_version: + self.ap.logger.info( + f'Installing plugin from marketplace: {plugin_author}/{plugin_name} v{latest_version}' + ) + if task_context: + task_context.set_current_action('downloading plugin package') + + download_resp = await client.get( + f'{space_url}/api/v1/marketplace/plugins/download/{plugin_author}/{plugin_name}/{latest_version}' + ) + if download_resp.status_code != 200: + raise Exception( + f'Failed to download plugin {plugin_author}/{plugin_name}: {download_resp.status_code}' + ) + + file_bytes = download_resp.content + self._extract_deps_metadata(file_bytes, task_context) + file_key = await self.handler.send_file(file_bytes, 'lbpkg') + install_info['plugin_file_key'] = file_key + self.ap.logger.info(f'Transfered file {file_key} to plugin runtime') + # Continue to install via runtime + else: + raise Exception(f'No version found for plugin {plugin_author}/{plugin_name}') + else: + raise Exception(f'Plugin {plugin_author}/{plugin_name} has no versions') + else: + raise Exception(f'Plugin {plugin_author}/{plugin_name} not found in marketplace') + else: + skill_resp.raise_for_status() + raise Exception(f'Failed to get skill {plugin_author}/{plugin_name}') + else: + mcp_resp.raise_for_status() + raise Exception(f'Failed to get MCP {plugin_author}/{plugin_name}') + if install_source == PluginInstallSource.LOCAL: # transfer file before install file_bytes = install_info['plugin_file'] @@ -613,13 +782,18 @@ async def retrieve_knowledge( return await self.handler.retrieve_knowledge(plugin_author, plugin_name, retriever_name, retrieval_context) def dispose(self): - # No need to consider the shutdown on Windows - # for Windows can kill processes and subprocesses chainly - - if self.is_enable_plugin and isinstance(self.ctrl, stdio_client_controller.StdioClientController): + # On non-Windows stdio mode, terminate via the controller's process handle. + # On Windows, the managed subprocess is cleaned up by the base class. + if ( + self.is_enable_plugin + and hasattr(self, 'ctrl') + and isinstance(self.ctrl, stdio_client_controller.StdioClientController) + ): self.ap.logger.info('Terminating plugin runtime process...') self.ctrl.process.terminate() + self._dispose_subprocess() + if self.heartbeat_task is not None: self.heartbeat_task.cancel() self.heartbeat_task = None diff --git a/src/langbot/pkg/provider/runner.py b/src/langbot/pkg/provider/runner.py index f89c079df..987b3a0e9 100644 --- a/src/langbot/pkg/provider/runner.py +++ b/src/langbot/pkg/provider/runner.py @@ -2,8 +2,12 @@ import abc import typing +from typing import TYPE_CHECKING -from ..core import app +if TYPE_CHECKING: + from ..core import app + import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query + import langbot_plugin.api.entities.builtin.provider.message as provider_message preregistered_runners: list[typing.Type[RequestRunner]] = [] @@ -35,7 +39,7 @@ def __init__(self, ap: app.Application, pipeline_config: dict): @abc.abstractmethod async def run( - self, query: core_entities.Query - ) -> typing.AsyncGenerator[llm_entities.Message | llm_entities.MessageChunk, None]: + self, query: pipeline_query.Query + ) -> typing.AsyncGenerator[provider_message.Message | provider_message.MessageChunk, None]: """运行请求""" pass diff --git a/src/langbot/pkg/provider/runners/localagent.py b/src/langbot/pkg/provider/runners/localagent.py index b48e9cc3b..6cac3e830 100644 --- a/src/langbot/pkg/provider/runners/localagent.py +++ b/src/langbot/pkg/provider/runners/localagent.py @@ -5,6 +5,7 @@ import typing from .. import runner from ..modelmgr import requester as modelmgr_requester +from ..tools.loaders.native import EXEC_TOOL_NAME import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query import langbot_plugin.api.entities.builtin.provider.message as provider_message import langbot_plugin.api.entities.builtin.rag.context as rag_context @@ -24,11 +25,37 @@ """ +SANDBOX_EXEC_TOOL_NAME = 'sandbox_exec' +SANDBOX_EXEC_SYSTEM_GUIDANCE = ( + 'When sandbox_exec is available, use it for exact calculations, statistics, structured data parsing, ' + 'and code execution instead of estimating mentally. If the user provides numbers, tables, CSV-like text, ' + 'JSON, or other data and asks for a computed answer, prefer running a short Python script in sandbox_exec ' + 'and then answer from the tool result.' +) + @runner.runner_class('local-agent') class LocalAgentRunner(runner.RequestRunner): """Local agent request runner""" + def _build_request_messages( + self, + query: pipeline_query.Query, + user_message: provider_message.Message, + ) -> list[provider_message.Message]: + req_messages = query.prompt.messages.copy() + query.messages.copy() + + if any(getattr(tool, 'name', None) == EXEC_TOOL_NAME for tool in query.use_funcs or []): + req_messages.append( + provider_message.Message( + role='system', + content=self.ap.box_service.get_system_guidance(), + ) + ) + + req_messages.append(user_message) + return req_messages + async def _get_model_candidates( self, query: pipeline_query.Query, @@ -131,6 +158,7 @@ async def run( ) -> typing.AsyncGenerator[provider_message.Message | provider_message.MessageChunk, None]: """Run request""" pending_tool_calls = [] + initial_response_emitted = False # Get knowledge bases list from query variables (set by PreProcessor, # may have been modified by plugins during PromptPreProcessing) @@ -236,7 +264,7 @@ async def run( ce.text = final_user_message_text break - req_messages = query.prompt.messages.copy() + query.messages.copy() + [user_message] + req_messages = self._build_request_messages(query, user_message) try: is_stream = await query.adapter.is_stream_output_supported() @@ -264,7 +292,6 @@ async def run( query.use_funcs, remove_think, ) - yield msg final_msg = msg else: # Streaming: invoke with fallback @@ -312,6 +339,7 @@ async def run( is_final=msg.is_final, msg_sequence=msg_sequence, ) + initial_response_emitted = True final_msg = provider_message.MessageChunk( role=last_role, @@ -325,6 +353,12 @@ async def run( if isinstance(final_msg, provider_message.MessageChunk): first_end_sequence = final_msg.msg_sequence + if not is_stream: + yield final_msg + elif not initial_response_emitted: + yield final_msg + initial_response_emitted = True + req_messages.append(final_msg) # Once a model succeeds, commit to it for the tool call loop @@ -369,7 +403,15 @@ async def run( req_messages.append(msg) except Exception as e: - err_msg = provider_message.Message(role='tool', content=f'err: {e}', tool_call_id=tool_call.id) + if is_stream: + err_msg = provider_message.MessageChunk( + role='tool', + content=f'err: {e}', + tool_call_id=tool_call.id, + is_final=True, + ) + else: + err_msg = provider_message.Message(role='tool', content=f'err: {e}', tool_call_id=tool_call.id) yield err_msg diff --git a/src/langbot/pkg/provider/tools/loader.py b/src/langbot/pkg/provider/tools/loader.py index 4719d9bb5..e90f07b32 100644 --- a/src/langbot/pkg/provider/tools/loader.py +++ b/src/langbot/pkg/provider/tools/loader.py @@ -2,12 +2,14 @@ import abc import typing +from typing import TYPE_CHECKING from langbot_plugin.api.entities.events import pipeline_query - -from ...core import app import langbot_plugin.api.entities.builtin.resource.tool as resource_tool +if TYPE_CHECKING: + from ...core import app + preregistered_loaders: list[typing.Type[ToolLoader]] = [] diff --git a/src/langbot/pkg/provider/tools/loaders/mcp.py b/src/langbot/pkg/provider/tools/loaders/mcp.py index 46d63b847..5269e6da0 100644 --- a/src/langbot/pkg/provider/tools/loaders/mcp.py +++ b/src/langbot/pkg/provider/tools/loaders/mcp.py @@ -20,6 +20,7 @@ import langbot_plugin.api.entities.builtin.resource.tool as resource_tool import langbot_plugin.api.entities.builtin.provider.message as provider_message from ....entity.persistence import mcp as persistence_mcp +from .mcp_stdio import BoxStdioSessionRuntime, MCPServerBoxConfig, MCPSessionErrorPhase # noqa: F401 class MCPSessionStatus(enum.Enum): @@ -58,6 +59,12 @@ class RuntimeMCPSession: error_message: str | None = None + error_phase: MCPSessionErrorPhase | None = None + + retry_count: int = 0 + + _box_stdio_runtime: BoxStdioSessionRuntime + def __init__(self, server_name: str, server_config: dict, enable: bool, ap: app.Application): self.server_name = server_name self.server_uuid = server_config.get('uuid', '') @@ -75,7 +82,33 @@ def __init__(self, server_name: str, server_config: dict, enable: bool, ap: app. self._shutdown_event = asyncio.Event() self._ready_event = asyncio.Event() + self._box_stdio_runtime = BoxStdioSessionRuntime(self) + self.box_config = self._box_stdio_runtime.config + async def _init_stdio_python_server(self): + if self._uses_box_stdio(): + await self._box_stdio_runtime.initialize() + return + + # Box is configured (ap.box_service exists) but currently unavailable + # (disabled by config or connection failed). Refuse stdio MCP rather + # than silently falling through to host-stdio — the operator asked + # for the sandbox and the failure mode should be visible. + # + # Set ``error_phase = BOX_UNAVAILABLE`` BEFORE raising so the retry + # wrapper can short-circuit (retrying is pointless when Box is + # deliberately off) and the frontend can render a localized, + # actionable message instead of this raw RuntimeError. Keep the + # message itself short — the frontend ignores it for this phase. + box_service = getattr(self.ap, 'box_service', None) + if box_service is not None and not getattr(box_service, 'available', False): + self.error_phase = MCPSessionErrorPhase.BOX_UNAVAILABLE + if not getattr(box_service, 'enabled', True): + raise RuntimeError('box_disabled_in_config') + raise RuntimeError('box_unavailable') + + # Legacy: no box_service installed at all (pre-Box dev mode). Fall + # through to host-stdio for backward compatibility. server_params = StdioServerParameters( command=self.server_config['command'], args=self.server_config['args'], @@ -90,6 +123,9 @@ async def _init_stdio_python_server(self): await self.session.initialize() + async def _init_box_stdio_server(self): + await self._box_stdio_runtime.initialize() + async def _init_sse_server(self): sse_transport = await self.exit_stack.enter_async_context( sse_client( @@ -124,8 +160,11 @@ async def _init_streamable_http_server(self): await self.session.initialize() + _MAX_RETRIES = 3 + _RETRY_DELAYS = [2, 4, 8] + async def _lifecycle_loop(self): - """在后台任务中管理整个MCP会话的生命周期""" + """Manage the full MCP session lifecycle in a background task.""" try: if self.server_config['mode'] == 'stdio': await self._init_stdio_python_server() @@ -134,49 +173,109 @@ async def _lifecycle_loop(self): elif self.server_config['mode'] == 'http': await self._init_streamable_http_server() else: - raise ValueError(f'无法识别 MCP 服务器类型: {self.server_name}: {self.server_config}') + raise ValueError(f'Unknown MCP server mode: {self.server_name}: {self.server_config}') await self.refresh() self.status = MCPSessionStatus.CONNECTED - # 通知start()方法连接已建立 + # Notify start() that connection is established self._ready_event.set() - # 等待shutdown信号 - await self._shutdown_event.wait() + # Wait for shutdown signal, with optional health monitoring for Box stdio + if self._uses_box_stdio(): + monitor_task = asyncio.create_task(self._box_stdio_runtime.monitor_process_health()) + shutdown_task = asyncio.create_task(self._shutdown_event.wait()) + done, pending = await asyncio.wait( + [shutdown_task, monitor_task], + return_when=asyncio.FIRST_COMPLETED, + ) + for task in pending: + task.cancel() + for task in done: + if task is monitor_task and not self._shutdown_event.is_set(): + self.error_phase = MCPSessionErrorPhase.RUNTIME + raise Exception('Box managed process exited unexpectedly') + else: + await self._shutdown_event.wait() except Exception as e: self.status = MCPSessionStatus.ERROR self.error_message = str(e) self.ap.logger.error(f'Error in MCP session lifecycle {self.server_name}: {e}\n{traceback.format_exc()}') - # 即使出错也要设置ready事件,让start()方法知道初始化已完成 - self._ready_event.set() + # Do NOT set _ready_event here — let _lifecycle_loop_with_retry + # handle retries first. It will set the event when all retries + # are exhausted or on success. + raise # Re-raise so _lifecycle_loop_with_retry can catch it finally: - # 在同一个任务中清理所有资源 + # Clean up all resources in the same task try: if self.exit_stack: await self.exit_stack.aclose() + self.exit_stack = AsyncExitStack() self.functions.clear() self.session = None except Exception as e: self.ap.logger.error(f'Error cleaning up MCP session {self.server_name}: {e}\n{traceback.format_exc()}') + finally: + await self._cleanup_box_stdio_session() + + async def _lifecycle_loop_with_retry(self): + """Wrap _lifecycle_loop with retry and exponential backoff.""" + for attempt in range(self._MAX_RETRIES + 1): + try: + await self._lifecycle_loop() + return # Normal shutdown, don't retry + except Exception as e: + self.retry_count = attempt + 1 + if self._shutdown_event.is_set(): + return # Shutdown requested, don't retry + # BOX_UNAVAILABLE is a deliberate refusal, not a transient + # failure — retrying produces log spam and a misleading + # "Failed after N attempts" message. Surface it immediately. + if self.error_phase == MCPSessionErrorPhase.BOX_UNAVAILABLE: + self.status = MCPSessionStatus.ERROR + self.error_message = str(e) + self._ready_event.set() + return + if attempt >= self._MAX_RETRIES: + self.status = MCPSessionStatus.ERROR + self.error_message = f'Failed after {self._MAX_RETRIES + 1} attempts: {e}' + self._ready_event.set() + return + delay = self._RETRY_DELAYS[attempt] + self.ap.logger.warning( + f'MCP session {self.server_name} failed (attempt {attempt + 1}), retrying in {delay}s: {e}' + ) + await self._cleanup_box_stdio_session() + # Reset status for retry + self.status = MCPSessionStatus.CONNECTING + self.error_message = None + self.error_phase = None + await asyncio.sleep(delay) + + _MONITOR_POLL_INTERVAL = 5 + _MONITOR_MAX_CONSECUTIVE_ERRORS = 3 + + async def _monitor_box_process_health(self): + await self._box_stdio_runtime.monitor_process_health() async def start(self): if not self.enable: return - # 创建后台任务来管理生命周期 - self._lifecycle_task = asyncio.create_task(self._lifecycle_loop()) + # Create background task for lifecycle management with retry + self._lifecycle_task = asyncio.create_task(self._lifecycle_loop_with_retry()) - # 等待连接建立或失败(带超时) + # Wait for connection or failure (with timeout) + startup_timeout = (self.box_config.startup_timeout_sec + 30) if self._uses_box_stdio() else 30.0 try: - await asyncio.wait_for(self._ready_event.wait(), timeout=30.0) + await asyncio.wait_for(self._ready_event.wait(), timeout=startup_timeout) except asyncio.TimeoutError: self.status = MCPSessionStatus.ERROR - raise Exception('Connection timeout after 30 seconds') + raise Exception(f'Connection timeout after {startup_timeout} seconds') - # 检查是否有错误 + # Check for errors if self.status == MCPSessionStatus.ERROR: raise Exception('Connection failed, please check URL') @@ -232,18 +331,25 @@ def get_tools(self) -> list[resource_tool.LLMTool]: return self.functions def get_runtime_info_dict(self) -> dict: - return { + info = { 'status': self.status.value, 'error_message': self.error_message, + 'error_phase': self.error_phase.value if self.error_phase else None, + 'retry_count': self.retry_count, 'tool_count': len(self.get_tools()), 'tools': [ { 'name': tool.name, 'description': tool.description, + 'parameters': tool.parameters, } for tool in self.get_tools() ], } + if self._uses_box_stdio(): + info['box_session_id'] = self._build_box_session_id() + info['box_enabled'] = True + return info async def shutdown(self): """关闭会话并清理资源""" @@ -267,6 +373,41 @@ async def shutdown(self): except Exception as e: self.ap.logger.error(f'Error shutting down MCP session {self.server_name}: {e}\n{traceback.format_exc()}') + def _uses_box_stdio(self) -> bool: + return self._box_stdio_runtime.uses_box_stdio() + + def _build_box_session_id(self) -> str: + return 'mcp-shared' + + def _rewrite_path(self, path: str, host_path: str | None) -> str: + return self._box_stdio_runtime.rewrite_path(path, host_path) + + def _infer_host_path(self) -> str | None: + return self._box_stdio_runtime.infer_host_path() + + @staticmethod + def _unwrap_venv_path(directory: str) -> str: + return BoxStdioSessionRuntime.unwrap_venv_path(directory) + + def _resolve_host_path(self) -> str | None: + return self._box_stdio_runtime.resolve_host_path() + + @staticmethod + def _detect_install_command(host_path: str) -> str | None: + return BoxStdioSessionRuntime.detect_install_command(host_path) + + def _build_box_session_payload(self, session_id: str, host_path: str | None = None) -> dict: + return self._box_stdio_runtime.build_box_session_payload(session_id, host_path) + + def _build_box_process_payload(self, host_path: str | None = None) -> dict: + return self._box_stdio_runtime.build_box_process_payload(host_path) + + def _rewrite_venv_command(self, command: str, host_path: str) -> str: + return self._box_stdio_runtime.rewrite_venv_command(command, host_path) + + async def _cleanup_box_stdio_session(self) -> None: + await self._box_stdio_runtime.cleanup_session() + # @loader.loader_class('mcp') class MCPLoader(loader.ToolLoader): @@ -332,7 +473,7 @@ async def load_mcp_server(self, server_config: dict) -> RuntimeMCPSession: Args: server_config: 服务器配置字典,必须包含: - name: 服务器名称 - - mode: 连接模式 (stdio/sse) + - mode: 连接模式 (stdio/sse/http) - enable: 是否启用 - extra_args: 额外的配置参数 (可选) """ @@ -431,12 +572,13 @@ def get_all_servers_info(self) -> dict[str, dict]: """获取所有服务器的信息""" info = {} for server_name, session in self.sessions.items(): + tools = session.get_tools() info[server_name] = { 'name': server_name, 'mode': session.server_config.get('mode'), 'enable': session.enable, - 'tools_count': len(session.get_tools()), - 'tool_names': [f.name for f in session.get_tools()], + 'tools_count': len(tools), + 'tool_names': [f.name for f in tools], } return info diff --git a/src/langbot/pkg/provider/tools/loaders/mcp_stdio.py b/src/langbot/pkg/provider/tools/loaders/mcp_stdio.py new file mode 100644 index 000000000..bdddcd294 --- /dev/null +++ b/src/langbot/pkg/provider/tools/loaders/mcp_stdio.py @@ -0,0 +1,366 @@ +from __future__ import annotations + +import enum +import asyncio +import os +import shutil +import shlex +from typing import TYPE_CHECKING, Any + +import pydantic +from mcp import ClientSession +from mcp.client.websocket import websocket_client +from ....box.workspace import ( + BoxWorkspaceSession, + classify_python_workspace, + infer_workspace_host_path, + normalize_host_path, + rewrite_mounted_path, + rewrite_venv_command, + unwrap_venv_path, +) + +if TYPE_CHECKING: + from .mcp import RuntimeMCPSession + + +class MCPSessionErrorPhase(enum.Enum): + """Which phase of the MCP lifecycle failed.""" + + SESSION_CREATE = 'session_create' + DEP_INSTALL = 'dep_install' + PROCESS_START = 'process_start' + RELAY_CONNECT = 'relay_connect' + MCP_INIT = 'mcp_init' + RUNTIME = 'runtime' + TOOL_CALL = 'tool_call' + # Stdio MCP refused because Box is disabled in config or currently + # unavailable. Not transient — retries would be pointless. The frontend + # uses this phase to render a localized actionable message instead of + # the raw RuntimeError text. + BOX_UNAVAILABLE = 'box_unavailable' + + +class MCPServerBoxConfig(pydantic.BaseModel): + """Structured configuration for running an MCP server inside a Box container.""" + + image: str | None = None + network: str = 'on' # MCP servers need network for dependency installation + host_path: str | None = None + host_path_mode: str = 'ro' # MCP servers default to read-write mount only when explicitly requested + env: dict[str, str] = pydantic.Field(default_factory=dict) + startup_timeout_sec: int = 120 # Longer default to allow dependency bootstrap + cpus: float | None = None + memory_mb: int | None = None + pids_limit: int | None = None + read_only_rootfs: bool | None = None + + model_config = pydantic.ConfigDict(extra='ignore') + + +class BoxStdioSessionRuntime: + """Encapsulate Box-backed stdio MCP session orchestration.""" + + def __init__(self, owner: RuntimeMCPSession): + self.owner = owner + self.config = MCPServerBoxConfig.model_validate(owner.server_config.get('box', {})) + + @property + def ap(self): + return self.owner.ap + + @property + def server_name(self) -> str: + return self.owner.server_name + + @property + def server_config(self) -> dict: + return self.owner.server_config + + def _build_workspace( + self, + *, + host_path: str | None | object = ..., + workdir: str = '/workspace', + mount_path: str = '/workspace', + ) -> BoxWorkspaceSession: + resolved_host_path = self.resolve_host_path() if host_path is ... else host_path + return BoxWorkspaceSession( + self.ap.box_service, + self.owner._build_box_session_id(), + host_path=resolved_host_path, + host_path_mode=self.config.host_path_mode, + workdir=workdir, + env=self.config.env, + mount_path=mount_path, + network=self.config.network, + read_only_rootfs=self.config.read_only_rootfs if self.config.read_only_rootfs is not None else False, + image=self.config.image, + cpus=self.config.cpus, + memory_mb=self.config.memory_mb, + pids_limit=self.config.pids_limit, + persistent=True, + ) + + @property + def process_id(self) -> str: + """Each MCP server gets a unique process_id within the shared session.""" + return self.owner.server_uuid + + def uses_box_stdio(self) -> bool: + if self.server_config.get('mode') != 'stdio': + return False + box_service = getattr(self.ap, 'box_service', None) + if box_service is None: + return False + # When Box is configured but currently unavailable (disabled or + # connection failed), do NOT silently fall through to host-stdio — + # that would bypass the sandbox the operator asked for. The caller + # is expected to refuse the stdio MCP server with a clear error. + return bool(getattr(box_service, 'available', False)) + + async def initialize(self) -> None: + await self._wait_for_box_runtime() + + # All stdio MCP servers share one Box session. Per-server host paths + # are staged into the shared workspace instead of becoming session + # mounts, because an existing Docker container cannot add bind mounts. + workspace = self._build_workspace(host_path=None) + host_path = self.resolve_host_path() + process_cwd = '/workspace' + + try: + await workspace.create_session() + except Exception: + self.owner.error_phase = MCPSessionErrorPhase.SESSION_CREATE + raise + + if host_path: + process_cwd = await self._stage_host_path_to_shared_workspace(host_path) + install_cmd = self.detect_install_command(host_path, process_cwd) + if install_cmd: + self.ap.logger.info( + f'MCP server {self.server_name}: installing dependencies in Box with: {install_cmd}' + ) + try: + result = await workspace.execute_raw( + install_cmd, + workdir=process_cwd, + timeout_sec=self.config.startup_timeout_sec or 120, + ) + except Exception: + self.owner.error_phase = MCPSessionErrorPhase.DEP_INSTALL + raise + if not result.ok: + self.owner.error_phase = MCPSessionErrorPhase.DEP_INSTALL + stderr_preview = (result.stderr or '')[:500] + raise Exception(f'Dependency install failed (exit code {result.exit_code}): {stderr_preview}') + + try: + process_workspace = ( + self._build_workspace(host_path=host_path, workdir=process_cwd, mount_path=process_cwd) + if host_path + else workspace + ) + payload = process_workspace.build_process_payload( + self.server_config['command'], + self.server_config.get('args', []), + env=self.server_config.get('env', {}), + cwd=process_cwd, + ) + payload['process_id'] = self.process_id + await workspace.box_service.start_managed_process(workspace.session_id, payload) + except Exception: + self.owner.error_phase = MCPSessionErrorPhase.PROCESS_START + raise + + try: + websocket_url = workspace.get_managed_process_websocket_url(self.process_id) + transport = await self.owner.exit_stack.enter_async_context(websocket_client(websocket_url)) + read_stream, write_stream = transport + self.owner.session = await self.owner.exit_stack.enter_async_context( + ClientSession(read_stream, write_stream) + ) + except Exception: + self.owner.error_phase = MCPSessionErrorPhase.RELAY_CONNECT + raise + + try: + await self.owner.session.initialize() + except Exception: + self.owner.error_phase = MCPSessionErrorPhase.MCP_INIT + raise + + async def monitor_process_health(self) -> None: + from langbot_plugin.box.models import BoxManagedProcessStatus + + workspace = self._build_workspace() + consecutive_errors = 0 + while not self.owner._shutdown_event.is_set(): + try: + info = await workspace.get_managed_process(self.process_id) + if isinstance(info, dict): + status = info.get('status', '') + else: + status = getattr(info, 'status', '') + if status == BoxManagedProcessStatus.EXITED.value or status == BoxManagedProcessStatus.EXITED: + return + consecutive_errors = 0 + except Exception as exc: + consecutive_errors += 1 + self.ap.logger.warning( + f'MCP monitor for {self.server_name}: get_managed_process failed ' + f'({consecutive_errors}/{self.owner._MONITOR_MAX_CONSECUTIVE_ERRORS}): ' + f'{type(exc).__name__}: {exc}' + ) + if consecutive_errors >= self.owner._MONITOR_MAX_CONSECUTIVE_ERRORS: + return + await asyncio.sleep(self.owner._MONITOR_POLL_INTERVAL) + + async def _stage_host_path_to_shared_workspace(self, host_path: str) -> str: + source_path = normalize_host_path(host_path) + if not source_path: + return '/workspace' + if not os.path.isdir(source_path): + raise FileNotFoundError(f'MCP host_path does not exist or is not a directory: {host_path}') + + self._validate_host_path(source_path) + + shared_host_path = self._shared_workspace_host_path() + process_host_root = os.path.join(shared_host_path, '.mcp', self.process_id) + process_host_workspace = os.path.join(process_host_root, 'workspace') + await asyncio.to_thread(self._copy_workspace_tree, source_path, process_host_root, process_host_workspace) + return f'/workspace/.mcp/{self.process_id}/workspace' + + def _validate_host_path(self, host_path: str) -> None: + self.ap.box_service.build_spec( + { + 'session_id': f'mcp-validate-{self.process_id}', + 'host_path': host_path, + 'host_path_mode': self.config.host_path_mode, + 'network': self.config.network, + 'read_only_rootfs': self.config.read_only_rootfs if self.config.read_only_rootfs is not None else False, + } + ) + + def _shared_workspace_host_path(self) -> str: + default_workspace = getattr(self.ap.box_service, 'default_workspace', None) + if not default_workspace: + raise RuntimeError('Box default workspace is required for shared MCP host_path staging') + shared_host_path = normalize_host_path(default_workspace) + os.makedirs(shared_host_path, exist_ok=True) + return shared_host_path + + @staticmethod + def _copy_workspace_tree(source_path: str, process_host_root: str, process_host_workspace: str) -> None: + shutil.rmtree(process_host_root, ignore_errors=True) + os.makedirs(process_host_root, exist_ok=True) + shutil.copytree( + source_path, + process_host_workspace, + symlinks=True, + ignore=shutil.ignore_patterns('.git', '__pycache__', '.pytest_cache', '.mypy_cache', '.ruff_cache'), + ) + + async def _cleanup_staged_workspace(self) -> None: + if not self.resolve_host_path(): + return + try: + process_host_root = os.path.join(self._shared_workspace_host_path(), '.mcp', self.process_id) + await asyncio.to_thread(shutil.rmtree, process_host_root, True) + except Exception as exc: + self.ap.logger.warning( + f'MCP server {self.server_name}: failed to clean staged workspace ' + f'process_id={self.process_id}: {type(exc).__name__}: {exc}' + ) + + async def _wait_for_box_runtime(self) -> None: + timeout_sec = max(float(self.config.startup_timeout_sec or 120), 1.0) + deadline = asyncio.get_running_loop().time() + timeout_sec + warned = False + while not getattr(self.ap.box_service, 'available', False): + if not warned: + self.ap.logger.warning( + f'MCP server {self.server_name}: waiting for Box runtime before starting stdio process' + ) + warned = True + if asyncio.get_running_loop().time() >= deadline: + self.owner.error_phase = MCPSessionErrorPhase.SESSION_CREATE + raise Exception(f'Box runtime is not available after {int(timeout_sec)} seconds') + await asyncio.sleep(1) + + async def cleanup_session(self) -> None: + if not self.uses_box_stdio(): + return + + # In the shared-session model, we do not delete the session itself. + # Stop only this MCP server's managed process; deleting the session + # would kill other MCP servers sharing the same container. + workspace = self._build_workspace(host_path=None) + try: + await workspace.stop_managed_process(self.process_id) + except Exception as exc: + self.ap.logger.warning( + f'MCP server {self.server_name}: failed to stop managed process ' + f'process_id={self.process_id}: {type(exc).__name__}: {exc}' + ) + await self._cleanup_staged_workspace() + return + await self._cleanup_staged_workspace() + self.ap.logger.info( + f'MCP server {self.server_name}: stopped process_id={self.process_id} ' + f'(shared session {self.owner._build_box_session_id()} kept alive)' + ) + + def rewrite_path(self, path: str, host_path: str | None) -> str: + return rewrite_mounted_path(path, host_path) + + def infer_host_path(self) -> str | None: + return infer_workspace_host_path(self.server_config.get('command', ''), self.server_config.get('args', [])) + + @staticmethod + def unwrap_venv_path(directory: str) -> str: + return unwrap_venv_path(directory) + + def resolve_host_path(self) -> str | None: + return self.config.host_path or self.infer_host_path() + + @staticmethod + def detect_install_command(host_path: str, workspace_path: str = '/workspace') -> str | None: + workspace_kind = classify_python_workspace(host_path) + quoted_workspace_path = shlex.quote(workspace_path) + if workspace_kind == 'package': + return ( + 'mkdir -p /opt/_lb_src' + f' && tar -C {quoted_workspace_path}' + ' --exclude=.venv --exclude=.git --exclude=__pycache__' + ' --exclude=node_modules --exclude=.tox --exclude=.nox' + ' --exclude="*.egg-info" --exclude=.uv-cache' + ' -cf - .' + ' | tar -C /opt/_lb_src -xf -' + ' && pip install --no-cache-dir /opt/_lb_src' + ' && rm -rf /opt/_lb_src' + ) + if workspace_kind == 'requirements': + return f'pip install --no-cache-dir -r {quoted_workspace_path}/requirements.txt' + return None + + def build_box_session_payload(self, session_id: str, host_path: str | None = None) -> dict[str, Any]: + workspace = self._build_workspace() + workspace.session_id = session_id + if host_path is not None: + workspace.host_path = host_path + return workspace.build_session_payload() + + def build_box_process_payload(self, host_path: str | None = None) -> dict[str, Any]: + workspace = self._build_workspace() + if host_path is not None: + workspace.host_path = host_path + return workspace.build_process_payload( + self.server_config['command'], + self.server_config.get('args', []), + env=self.server_config.get('env', {}), + ) + + def rewrite_venv_command(self, command: str, host_path: str) -> str: + return rewrite_venv_command(command, host_path) diff --git a/src/langbot/pkg/provider/tools/loaders/native.py b/src/langbot/pkg/provider/tools/loaders/native.py new file mode 100644 index 000000000..d6ef11d11 --- /dev/null +++ b/src/langbot/pkg/provider/tools/loaders/native.py @@ -0,0 +1,846 @@ +from __future__ import annotations + +import json +import os + +import langbot_plugin.api.entities.builtin.resource.tool as resource_tool +from langbot_plugin.api.entities.events import pipeline_query + +from .. import loader +from . import skill as skill_loader + +EXEC_TOOL_NAME = 'exec' +READ_TOOL_NAME = 'read' +WRITE_TOOL_NAME = 'write' +EDIT_TOOL_NAME = 'edit' +GLOB_TOOL_NAME = 'glob' +GREP_TOOL_NAME = 'grep' + +_ALL_TOOL_NAMES = {EXEC_TOOL_NAME, READ_TOOL_NAME, WRITE_TOOL_NAME, EDIT_TOOL_NAME, GLOB_TOOL_NAME, GREP_TOOL_NAME} + +# Skip these dirs during grep walk to avoid noise +_SKIP_DIRS = {'.git', 'node_modules', '__pycache__', '.venv', 'venv', '.tox', 'dist', 'build'} + + +class NativeToolLoader(loader.ToolLoader): + def __init__(self, ap): + super().__init__(ap) + self._tools: list[resource_tool.LLMTool] | None = None + self._backend_available: bool | None = None + + async def initialize(self): + """Check if backend is truly available at startup.""" + self._backend_available = await self._check_backend_available() + if self._backend_available: + self.ap.logger.info('Native sandbox tools (exec/read/write/edit/glob/grep) are available.') + else: + self.ap.logger.warning( + 'Native sandbox tools (exec/read/write/edit/glob/grep) are NOT available. ' + 'No sandbox backend (Docker/nsjail/E2B) is ready. ' + 'The LLM will not have access to code execution or file operation tools.' + ) + + async def _check_backend_available(self) -> bool: + """Check if the box backend is truly available (not just the runtime).""" + box_service = getattr(self.ap, 'box_service', None) + if box_service is None: + return False + if not getattr(box_service, 'available', False): + return False + # Check if backend is truly available via get_status + try: + status = await box_service.get_status() + backend_info = status.get('backend', {}) + return backend_info.get('available', False) + except Exception: + return False + + async def get_tools(self, bound_plugins: list[str] | None = None) -> list[resource_tool.LLMTool]: + if not self._is_sandbox_available(): + return [] + if self._tools is None: + self._tools = [ + self._build_exec_tool(), + self._build_read_tool(), + self._build_write_tool(), + self._build_edit_tool(), + self._build_glob_tool(), + self._build_grep_tool(), + ] + return list(self._tools) + + async def has_tool(self, name: str) -> bool: + return name in _ALL_TOOL_NAMES and self._is_sandbox_available() + + async def invoke_tool(self, name: str, parameters: dict, query: pipeline_query.Query): + if name == EXEC_TOOL_NAME: + self.ap.logger.info( + 'exec tool invoked: ' + f'query_id={query.query_id} ' + f'parameters={json.dumps(self._summarize_parameters(parameters), ensure_ascii=False)}' + ) + return await self._invoke_exec(parameters, query) + if name == READ_TOOL_NAME: + return await self._invoke_read(parameters, query) + if name == WRITE_TOOL_NAME: + return await self._invoke_write(parameters, query) + if name == EDIT_TOOL_NAME: + return await self._invoke_edit(parameters, query) + if name == GLOB_TOOL_NAME: + return await self._invoke_glob(parameters, query) + if name == GREP_TOOL_NAME: + return await self._invoke_grep(parameters, query) + raise ValueError(f'未找到工具: {name}') + + async def shutdown(self): + pass + + async def _invoke_exec(self, parameters: dict, query: pipeline_query.Query) -> dict: + command = str(parameters['command']) + workdir = str(parameters.get('workdir', '/workspace') or '/workspace') + + # Validate that skill references target activated skills. + selected_skill, _ = skill_loader.resolve_virtual_skill_path( + self.ap, + query, + workdir, + include_visible=False, + include_activated=True, + ) + referenced_skill_names = skill_loader.find_referenced_skill_names(command) + + if selected_skill is None and referenced_skill_names: + if len(referenced_skill_names) > 1: + raise ValueError('exec can target at most one activated skill package per call.') + selected_skill = skill_loader.get_activated_skill(query, referenced_skill_names[0]) + if selected_skill is None: + raise ValueError( + f'Skill "{referenced_skill_names[0]}" must be activated before exec can run in its package.' + ) + + if selected_skill is not None: + selected_skill_name = str(selected_skill.get('name', '') or '') + if referenced_skill_names and any(name != selected_skill_name for name in referenced_skill_names): + raise ValueError('exec can reference files from only one activated skill package per call.') + + package_root = str(selected_skill.get('package_root', '') or '').strip() + if not package_root: + raise ValueError(f'Activated skill "{selected_skill_name}" has no package_root.') + + # Wrap command with Python venv bootstrap if the skill has a Python project. + # The venv is created inside the skill's mount path. + skill_mount = f'/workspace/.skills/{selected_skill_name}' + if skill_loader.should_prepare_skill_python_env(package_root): + parameters = dict(parameters) + parameters['command'] = skill_loader.wrap_skill_command_with_python_env(command, mount_path=skill_mount) + + # All exec calls (with or without skills) go through the same container + # via execute_tool. Skills are mounted at /workspace/.skills/{name}/ + # via extra_mounts built by BoxService. + result = await self.ap.box_service.execute_tool(parameters, query) + + if selected_skill is not None: + self._refresh_skill_from_disk(selected_skill) + return result + + def _resolve_host_path( + self, + query: pipeline_query.Query, + sandbox_path: str, + *, + include_visible: bool, + include_activated: bool, + ) -> tuple[str, dict | None]: + selected_skill, rewritten_path = skill_loader.resolve_virtual_skill_path( + self.ap, + query, + sandbox_path, + include_visible=include_visible, + include_activated=include_activated, + ) + + box_service = self.ap.box_service + host_root = selected_skill.get('package_root') if selected_skill is not None else box_service.default_workspace + if not host_root: + raise ValueError('No host workspace configured for file operations.') + + mount_path = '/workspace' + if not rewritten_path.startswith(mount_path): + raise ValueError(f'Path must be under {mount_path}.') + + relative = rewritten_path[len(mount_path) :].lstrip('/') + host_path = os.path.realpath(os.path.join(host_root, relative)) + host_root = os.path.realpath(host_root) + + if not (host_path == host_root or host_path.startswith(host_root + os.sep)): + raise ValueError('Path escapes the workspace boundary.') + + return host_path, selected_skill + + def _resolve_skill_relative_path( + self, + query: pipeline_query.Query, + sandbox_path: str, + *, + include_visible: bool, + include_activated: bool, + ) -> tuple[dict, str] | None: + selected_skill, rewritten_path = skill_loader.resolve_virtual_skill_path( + self.ap, + query, + sandbox_path, + include_visible=include_visible, + include_activated=include_activated, + ) + if selected_skill is None: + return None + + mount_path = '/workspace' + if not rewritten_path.startswith(mount_path): + raise ValueError(f'Path must be under {mount_path}.') + relative = rewritten_path[len(mount_path) :].lstrip('/') or '.' + return selected_skill, relative + + def _should_use_box_workspace_files(self, selected_skill: dict | None) -> bool: + if selected_skill is not None: + return False + box_service = getattr(self.ap, 'box_service', None) + if box_service is None or not hasattr(box_service, 'execute_tool'): + return False + default_workspace = getattr(box_service, 'default_workspace', None) + return bool(default_workspace and not os.path.isdir(os.path.realpath(default_workspace))) + + async def _run_workspace_file_script(self, script: str, query: pipeline_query.Query) -> dict: + result = await self.ap.box_service.execute_tool( + { + 'command': f"python - <<'PY'\n{script}\nPY", + 'timeout_sec': 30, + }, + query, + ) + if not result.get('ok'): + return {'ok': False, 'error': result.get('stderr') or result.get('stdout') or 'Box execution failed'} + stdout = str(result.get('stdout') or '').strip() + try: + return json.loads(stdout.splitlines()[-1]) + except Exception: + return {'ok': False, 'error': stdout or 'Box file operation returned no result'} + + async def _read_workspace_via_box(self, path: str, query: pipeline_query.Query) -> dict: + script = f""" +import json, os +path = {json.dumps(path)} +if not path.startswith('/workspace'): + print(json.dumps({{'ok': False, 'error': 'Path must be under /workspace.'}})) +elif not os.path.exists(path): + print(json.dumps({{'ok': False, 'error': f'File not found: {{path}}'}})) +elif os.path.isdir(path): + print(json.dumps({{'ok': True, 'content': '\\n'.join(sorted(os.listdir(path))), 'is_directory': True}})) +else: + with open(path, 'r', encoding='utf-8', errors='replace') as f: + print(json.dumps({{'ok': True, 'content': f.read()}})) +""".strip() + return await self._run_workspace_file_script(script, query) + + async def _write_workspace_via_box(self, path: str, content: str, query: pipeline_query.Query) -> dict: + script = f""" +import json, os +path = {json.dumps(path)} +content = {json.dumps(content)} +if not path.startswith('/workspace'): + print(json.dumps({{'ok': False, 'error': 'Path must be under /workspace.'}})) +else: + os.makedirs(os.path.dirname(path) or '/workspace', exist_ok=True) + with open(path, 'w', encoding='utf-8') as f: + f.write(content) + print(json.dumps({{'ok': True, 'path': path}})) +""".strip() + return await self._run_workspace_file_script(script, query) + + async def _edit_workspace_via_box( + self, + path: str, + old_string: str, + new_string: str, + query: pipeline_query.Query, + ) -> dict: + script = f""" +import json, os +path = {json.dumps(path)} +old_string = {json.dumps(old_string)} +new_string = {json.dumps(new_string)} +if not path.startswith('/workspace'): + print(json.dumps({{'ok': False, 'error': 'Path must be under /workspace.'}})) +elif not os.path.isfile(path): + print(json.dumps({{'ok': False, 'error': f'File not found: {{path}}'}})) +else: + with open(path, 'r', encoding='utf-8', errors='replace') as f: + content = f.read() + count = content.count(old_string) + if count == 0: + print(json.dumps({{'ok': False, 'error': 'old_string not found in file.'}})) + elif count > 1: + print(json.dumps({{'ok': False, 'error': f'old_string matches {{count}} locations; provide a more unique string.'}})) + else: + with open(path, 'w', encoding='utf-8') as f: + f.write(content.replace(old_string, new_string, 1)) + print(json.dumps({{'ok': True, 'path': path}})) +""".strip() + return await self._run_workspace_file_script(script, query) + + async def _glob_workspace_via_box(self, path: str, pattern: str, query: pipeline_query.Query) -> dict: + script = f""" +import json, os +from pathlib import Path +path = {json.dumps(path)} +pattern = {json.dumps(pattern)} +skip_dirs = {json.dumps(sorted(_SKIP_DIRS))} +if not path.startswith('/workspace'): + print(json.dumps({{'ok': False, 'error': 'Path must be under /workspace.'}})) +elif not os.path.isdir(path): + print(json.dumps({{'ok': False, 'error': f'Path is not a directory: {{path}}'}})) +else: + base = Path(path) + hits = [ + item for item in base.rglob(pattern) + if not any(part in skip_dirs for part in item.parts) + ] + hits.sort(key=lambda item: item.stat().st_mtime if item.exists() else 0, reverse=True) + shown = hits[:100] + matches = [] + for item in shown: + rel = os.path.relpath(str(item), path) + matches.append(os.path.join(path, rel).replace(os.sep, '/')) + print(json.dumps({{'ok': True, 'matches': matches, 'total': len(hits), 'truncated': len(hits) > 100}})) +""".strip() + return await self._run_workspace_file_script(script, query) + + async def _grep_workspace_via_box( + self, + path: str, + pattern: str, + include: str | None, + query: pipeline_query.Query, + ) -> dict: + script = f""" +import json, os, re +from pathlib import Path +path = {json.dumps(path)} +pattern = {json.dumps(pattern)} +include = {json.dumps(include)} +skip_dirs = {json.dumps(sorted(_SKIP_DIRS))} +try: + regex = re.compile(pattern) +except re.error as exc: + print(json.dumps({{'ok': False, 'error': f'Invalid regex: {{exc}}'}})) +else: + if not path.startswith('/workspace'): + print(json.dumps({{'ok': False, 'error': 'Path must be under /workspace.'}})) + elif not os.path.exists(path): + print(json.dumps({{'ok': False, 'error': f'Path not found: {{path}}'}})) + else: + base = Path(path) + if base.is_file(): + files = [base] + else: + files = [] + for item in base.rglob(include or '*'): + if any(part in skip_dirs for part in item.parts): + continue + if item.is_file(): + files.append(item) + if len(files) >= 5000: + break + + matches = [] + for fp in files: + try: + text = fp.read_text(errors='ignore') + except OSError: + continue + for lineno, line in enumerate(text.splitlines(), 1): + if regex.search(line): + if base.is_file(): + file_path = path + else: + rel = os.path.relpath(str(fp), path) + file_path = os.path.join(path, rel).replace(os.sep, '/') + matches.append({{'file': file_path, 'line': lineno, 'content': line.rstrip()}}) + if len(matches) >= 200: + break + if len(matches) >= 200: + break + + print(json.dumps({{'ok': True, 'matches': matches, 'total': len(matches), 'truncated': len(matches) >= 200}})) +""".strip() + return await self._run_workspace_file_script(script, query) + + async def _invoke_read(self, parameters: dict, query: pipeline_query.Query) -> dict: + path = parameters['path'] + self.ap.logger.info(f'read tool invoked: query_id={query.query_id} path={path}') + skill_request = self._resolve_skill_relative_path( + query, + path, + include_visible=True, + include_activated=True, + ) + if skill_request is not None and hasattr(self.ap.box_service, 'read_skill_file'): + selected_skill, relative = skill_request + try: + result = await self.ap.box_service.read_skill_file(selected_skill['name'], relative) + return {'ok': True, 'content': result.get('content', '')} + except Exception: + try: + result = await self.ap.box_service.list_skill_files(selected_skill['name'], relative) + entries = [entry['name'] for entry in result.get('entries', [])] + return {'ok': True, 'content': '\n'.join(sorted(entries)), 'is_directory': True} + except Exception as exc: + return {'ok': False, 'error': str(exc)} + + host_path, selected_skill = self._resolve_host_path( + query, + path, + include_visible=True, + include_activated=True, + ) + if self._should_use_box_workspace_files(selected_skill): + return await self._read_workspace_via_box(path, query) + if not os.path.exists(host_path): + return {'ok': False, 'error': f'File not found: {path}'} + if os.path.isdir(host_path): + entries = os.listdir(host_path) + return {'ok': True, 'content': '\n'.join(sorted(entries)), 'is_directory': True} + with open(host_path, 'r', errors='replace') as f: + content = f.read() + return {'ok': True, 'content': content} + + async def _invoke_write(self, parameters: dict, query: pipeline_query.Query) -> dict: + path = parameters['path'] + content = parameters['content'] + self.ap.logger.info(f'write tool invoked: query_id={query.query_id} path={path} length={len(content)}') + skill_request = self._resolve_skill_relative_path( + query, + path, + include_visible=False, + include_activated=True, + ) + if skill_request is not None and hasattr(self.ap.box_service, 'write_skill_file'): + selected_skill, relative = skill_request + await self.ap.box_service.write_skill_file(selected_skill['name'], relative, content) + await self.ap.skill_mgr.reload_skills() + return {'ok': True, 'path': path} + + host_path, selected_skill = self._resolve_host_path( + query, + path, + include_visible=False, + include_activated=True, + ) + if self._should_use_box_workspace_files(selected_skill): + return await self._write_workspace_via_box(path, content, query) + os.makedirs(os.path.dirname(host_path), exist_ok=True) + with open(host_path, 'w', encoding='utf-8') as f: + f.write(content) + self._refresh_skill_from_disk(selected_skill) + return {'ok': True, 'path': path} + + async def _invoke_edit(self, parameters: dict, query: pipeline_query.Query) -> dict: + path = parameters['path'] + old_string = parameters['old_string'] + new_string = parameters['new_string'] + self.ap.logger.info( + f'edit tool invoked: query_id={query.query_id} path={path} ' + f'old_len={len(old_string)} new_len={len(new_string)}' + ) + skill_request = self._resolve_skill_relative_path( + query, + path, + include_visible=False, + include_activated=True, + ) + if ( + skill_request is not None + and hasattr(self.ap.box_service, 'read_skill_file') + and hasattr(self.ap.box_service, 'write_skill_file') + ): + selected_skill, relative = skill_request + try: + result = await self.ap.box_service.read_skill_file(selected_skill['name'], relative) + except Exception: + return {'ok': False, 'error': f'File not found: {path}'} + content = result.get('content', '') + count = content.count(old_string) + if count == 0: + return {'ok': False, 'error': 'old_string not found in file.'} + if count > 1: + return {'ok': False, 'error': f'old_string matches {count} locations; provide a more unique string.'} + new_content = content.replace(old_string, new_string, 1) + await self.ap.box_service.write_skill_file(selected_skill['name'], relative, new_content) + await self.ap.skill_mgr.reload_skills() + return {'ok': True, 'path': path} + + host_path, selected_skill = self._resolve_host_path( + query, + path, + include_visible=False, + include_activated=True, + ) + if self._should_use_box_workspace_files(selected_skill): + return await self._edit_workspace_via_box(path, old_string, new_string, query) + if not os.path.isfile(host_path): + return {'ok': False, 'error': f'File not found: {path}'} + with open(host_path, 'r', encoding='utf-8', errors='replace') as f: + content = f.read() + count = content.count(old_string) + if count == 0: + return {'ok': False, 'error': 'old_string not found in file.'} + if count > 1: + return {'ok': False, 'error': f'old_string matches {count} locations; provide a more unique string.'} + new_content = content.replace(old_string, new_string, 1) + with open(host_path, 'w', encoding='utf-8') as f: + f.write(new_content) + self._refresh_skill_from_disk(selected_skill) + return {'ok': True, 'path': path} + + def _refresh_skill_from_disk(self, selected_skill: dict | None) -> None: + if selected_skill is None: + return + + skill_mgr = getattr(self.ap, 'skill_mgr', None) + if skill_mgr is None: + return + + refresh_skill = getattr(skill_mgr, 'refresh_skill_from_disk', None) + if callable(refresh_skill): + refresh_skill(selected_skill.get('name', '')) + + def _is_sandbox_available(self) -> bool: + """Check if sandbox backend is available. + + This checks the cached backend availability from initialization, + not just whether the box_service process is running. + """ + return bool(self._backend_available) + + def _build_exec_tool(self) -> resource_tool.LLMTool: + return resource_tool.LLMTool( + name=EXEC_TOOL_NAME, + human_desc='Execute a command in an isolated environment', + description=( + 'Run shell commands in an isolated execution environment. ' + 'Use this tool for bash commands, Python execution, and exact calculations over ' + 'user-provided data. Activated skill packages are addressable under ' + '/workspace/.skills/; when running inside one, set workdir to that path. ' + 'To create a new skill package, prepare it under /workspace first, then use register_skill.' + ), + parameters={ + 'type': 'object', + 'properties': { + 'command': { + 'type': 'string', + 'description': 'Shell command to execute.', + }, + 'workdir': { + 'type': 'string', + 'description': 'Working directory for the command. Defaults to /workspace.', + 'default': '/workspace', + }, + 'timeout_sec': { + 'type': 'integer', + 'description': 'Execution timeout in seconds. Defaults to 30.', + 'default': 30, + 'minimum': 1, + }, + 'env': { + 'type': 'object', + 'description': 'Optional environment variables for the execution.', + 'additionalProperties': {'type': 'string'}, + 'default': {}, + }, + 'description': { + 'type': 'string', + 'description': 'Brief description of what this command does, for logging and audit.', + }, + }, + 'required': ['command'], + 'additionalProperties': False, + }, + func=lambda parameters: parameters, + ) + + def _build_read_tool(self) -> resource_tool.LLMTool: + return resource_tool.LLMTool( + name=READ_TOOL_NAME, + human_desc='Read a file from the workspace', + description=( + 'Read the contents of a file at the given path under /workspace. ' + 'Visible skill packages can be inspected through /workspace/.skills//... .' + ), + parameters={ + 'type': 'object', + 'properties': { + 'path': { + 'type': 'string', + 'description': 'Absolute path to the file (must be under /workspace).', + }, + }, + 'required': ['path'], + 'additionalProperties': False, + }, + func=lambda parameters: parameters, + ) + + def _build_write_tool(self) -> resource_tool.LLMTool: + return resource_tool.LLMTool( + name=WRITE_TOOL_NAME, + human_desc='Write a file to the workspace', + description=( + 'Create or overwrite a file at the given path under /workspace with the provided content. ' + 'Activated skill packages can be modified through /workspace/.skills//... . ' + 'For new skills, write files under /workspace and then call register_skill.' + ), + parameters={ + 'type': 'object', + 'properties': { + 'path': { + 'type': 'string', + 'description': 'Absolute path to the file (must be under /workspace).', + }, + 'content': { + 'type': 'string', + 'description': 'Content to write to the file.', + }, + }, + 'required': ['path', 'content'], + 'additionalProperties': False, + }, + func=lambda parameters: parameters, + ) + + def _build_edit_tool(self) -> resource_tool.LLMTool: + return resource_tool.LLMTool( + name=EDIT_TOOL_NAME, + human_desc='Edit a file in the workspace', + description=( + 'Perform an exact string replacement in a file under /workspace. ' + 'The old_string must appear exactly once in the file. Activated skill packages ' + 'can be edited through /workspace/.skills//... . ' + 'For new skills, edit files under /workspace and then call register_skill.' + ), + parameters={ + 'type': 'object', + 'properties': { + 'path': { + 'type': 'string', + 'description': 'Absolute path to the file (must be under /workspace).', + }, + 'old_string': { + 'type': 'string', + 'description': 'The exact string to find and replace.', + }, + 'new_string': { + 'type': 'string', + 'description': 'The replacement string.', + }, + }, + 'required': ['path', 'old_string', 'new_string'], + 'additionalProperties': False, + }, + func=lambda parameters: parameters, + ) + + def _build_glob_tool(self) -> resource_tool.LLMTool: + return resource_tool.LLMTool( + name=GLOB_TOOL_NAME, + human_desc='Find files matching a glob pattern', + description=( + 'Find files matching a glob pattern under /workspace. ' + 'Supports ** for recursive matching (e.g. **/*.py). ' + 'Results are sorted by modification time (newest first). ' + 'Visible and activated skill packages can be searched through /workspace/.skills//...' + ), + parameters={ + 'type': 'object', + 'properties': { + 'pattern': { + 'type': 'string', + 'description': 'Glob pattern, e.g. **/*.py or src/**/*.ts', + }, + 'path': { + 'type': 'string', + 'description': 'Directory to search in (must be under /workspace, default: /workspace)', + 'default': '/workspace', + }, + }, + 'required': ['pattern'], + 'additionalProperties': False, + }, + func=lambda parameters: parameters, + ) + + def _build_grep_tool(self) -> resource_tool.LLMTool: + return resource_tool.LLMTool( + name=GREP_TOOL_NAME, + human_desc='Search file contents with regex', + description=( + 'Search file contents with regex pattern under /workspace. ' + 'Returns matching lines with file path and line number. ' + 'Visible and activated skill packages can be searched through /workspace/.skills//...' + ), + parameters={ + 'type': 'object', + 'properties': { + 'pattern': { + 'type': 'string', + 'description': 'Regex pattern to search for', + }, + 'path': { + 'type': 'string', + 'description': 'File or directory to search (must be under /workspace, default: /workspace)', + 'default': '/workspace', + }, + 'include': { + 'type': 'string', + 'description': 'Only search files matching this glob (e.g. *.py)', + }, + }, + 'required': ['pattern'], + 'additionalProperties': False, + }, + func=lambda parameters: parameters, + ) + + async def _invoke_glob(self, parameters: dict, query: pipeline_query.Query) -> dict: + pattern = parameters['pattern'] + path = str(parameters.get('path', '/workspace') or '/workspace') + self.ap.logger.info(f'glob tool invoked: query_id={query.query_id} pattern={pattern} path={path}') + + host_path, selected_skill = self._resolve_host_path( + query, + path, + include_visible=True, + include_activated=True, + ) + if self._should_use_box_workspace_files(selected_skill): + return await self._glob_workspace_via_box(path, pattern, query) + + if not os.path.isdir(host_path): + return {'ok': False, 'error': f'Path is not a directory: {path}'} + + from pathlib import Path + + base = Path(host_path) + hits = list(base.rglob(pattern)) + + # Filter out skipped directories + hits = [h for h in hits if not any(skip in h.parts for skip in _SKIP_DIRS)] + + # Sort by mtime, newest first + hits.sort(key=lambda p: p.stat().st_mtime if p.exists() else 0, reverse=True) + + total = len(hits) + shown = hits[:100] + + # Convert back to sandbox paths + sandbox_paths = [] + for h in shown: + rel = os.path.relpath(str(h), host_path) + sandbox_path = os.path.join(path, rel) + sandbox_paths.append(sandbox_path) + + result_lines = sandbox_paths + result = '\n'.join(result_lines) + + if total > 100: + result += f'\n... ({total} matches, showing first 100)' + + return {'ok': True, 'matches': result_lines, 'total': total, 'truncated': total > 100} + + async def _invoke_grep(self, parameters: dict, query: pipeline_query.Query) -> dict: + pattern = parameters['pattern'] + path = str(parameters.get('path', '/workspace') or '/workspace') + include = parameters.get('include') + self.ap.logger.info(f'grep tool invoked: query_id={query.query_id} pattern={pattern} path={path}') + + import re + from pathlib import Path + + try: + regex = re.compile(pattern) + except re.error as e: + return {'ok': False, 'error': f'Invalid regex: {e}'} + + host_path, selected_skill = self._resolve_host_path( + query, + path, + include_visible=True, + include_activated=True, + ) + if self._should_use_box_workspace_files(selected_skill): + return await self._grep_workspace_via_box(path, pattern, include, query) + + if not os.path.exists(host_path): + return {'ok': False, 'error': f'Path not found: {path}'} + + base = Path(host_path) + + if base.is_file(): + files = [base] + else: + files = self._grep_walk(base, include) + + matches = [] + for fp in files: + try: + text = fp.read_text(errors='ignore') + except OSError: + continue + for lineno, line in enumerate(text.splitlines(), 1): + if regex.search(line): + rel = os.path.relpath(str(fp), host_path) + sandbox_path = os.path.join(path, rel) + matches.append( + { + 'file': sandbox_path, + 'line': lineno, + 'content': line.rstrip(), + } + ) + if len(matches) >= 200: + break + if len(matches) >= 200: + break + + return { + 'ok': True, + 'matches': matches, + 'total': len(matches), + 'truncated': len(matches) >= 200, + } + + @staticmethod + def _grep_walk(root, include: str | None) -> list: + """Walk dir tree for grep, skipping junk dirs.""" + results = [] + for item in root.rglob(include or '*'): + if any(skip in item.parts for skip in _SKIP_DIRS): + continue + if item.is_file(): + results.append(item) + if len(results) >= 5000: + break + return results + + def _summarize_parameters(self, parameters: dict) -> dict: + summary = dict(parameters) + cmd = str(summary.get('command', '')).strip() + if len(cmd) > 400: + cmd = f'{cmd[:397]}...' + summary['command'] = cmd + + env = summary.get('env') + if isinstance(env, dict): + summary['env_keys'] = sorted(str(key) for key in env.keys()) + del summary['env'] + + return summary diff --git a/src/langbot/pkg/provider/tools/loaders/skill.py b/src/langbot/pkg/provider/tools/loaders/skill.py new file mode 100644 index 000000000..9df94fd28 --- /dev/null +++ b/src/langbot/pkg/provider/tools/loaders/skill.py @@ -0,0 +1,157 @@ +from __future__ import annotations + +import re +import typing + +from ....box import workspace as box_workspace + +if typing.TYPE_CHECKING: + from ....core import app + from langbot_plugin.api.entities.events import pipeline_query + +ACTIVATED_SKILLS_KEY = '_activated_skills' +PIPELINE_BOUND_SKILLS_KEY = '_pipeline_bound_skills' +SKILL_MOUNT_PREFIX = '/workspace/.skills' +_SKILL_MOUNT_PATTERN = re.compile(r'/workspace/\.skills/([A-Za-z0-9_-]+)') + + +def get_virtual_skill_mount_path(skill_name: str) -> str: + return f'{SKILL_MOUNT_PREFIX}/{skill_name}' + + +def get_bound_skill_names(query: pipeline_query.Query) -> list[str] | None: + if query.variables is None: + return None + + bound_skills = query.variables.get(PIPELINE_BOUND_SKILLS_KEY) + if bound_skills is None: + return None + if isinstance(bound_skills, list): + return [str(item) for item in bound_skills] + return None + + +def get_visible_skills(ap: app.Application, query: pipeline_query.Query) -> dict[str, dict]: + skill_mgr = getattr(ap, 'skill_mgr', None) + if skill_mgr is None: + return {} + + visible_skills = getattr(skill_mgr, 'skills', {}) + bound_skills = get_bound_skill_names(query) + if bound_skills is None: + return visible_skills + + return {skill_name: skill_data for skill_name, skill_data in visible_skills.items() if skill_name in bound_skills} + + +def get_visible_skill(ap: app.Application, query: pipeline_query.Query, skill_name: str) -> dict | None: + return get_visible_skills(ap, query).get(skill_name) + + +def get_activated_skills(query: pipeline_query.Query) -> dict[str, dict]: + if query.variables is None: + return {} + + activated = query.variables.get(ACTIVATED_SKILLS_KEY, {}) + if not isinstance(activated, dict): + return {} + return activated + + +def get_activated_skill(query: pipeline_query.Query, skill_name: str) -> dict | None: + return get_activated_skills(query).get(skill_name) + + +def register_activated_skill(query: pipeline_query.Query, skill_data: dict) -> None: + if query.variables is None: + query.variables = {} + + activated = query.variables.setdefault(ACTIVATED_SKILLS_KEY, {}) + skill_name = str(skill_data.get('name', '') or '').strip() + if skill_name and skill_name not in activated: + activated[skill_name] = skill_data + + +def parse_skill_mount_path(sandbox_path: str) -> tuple[str | None, str]: + normalized_path = str(sandbox_path or '/workspace').strip() or '/workspace' + if normalized_path == SKILL_MOUNT_PREFIX: + raise ValueError(f'Path must include a skill name under {SKILL_MOUNT_PREFIX}/.') + prefix = f'{SKILL_MOUNT_PREFIX}/' + if not normalized_path.startswith(prefix): + return None, normalized_path + + remainder = normalized_path[len(prefix) :] + skill_name, separator, tail = remainder.partition('/') + if not skill_name: + raise ValueError(f'Path must include a skill name under {SKILL_MOUNT_PREFIX}/.') + + rewritten_path = '/workspace' + if separator: + rewritten_path = f'/workspace/{tail}' + return skill_name, rewritten_path + + +def resolve_virtual_skill_path( + ap: app.Application, + query: pipeline_query.Query, + sandbox_path: str, + *, + include_visible: bool, + include_activated: bool, +) -> tuple[dict | None, str]: + skill_name, rewritten_path = parse_skill_mount_path(sandbox_path) + if skill_name is None: + return None, rewritten_path + + if include_activated: + activated_skill = get_activated_skill(query, skill_name) + if activated_skill is not None: + return activated_skill, rewritten_path + + if include_visible: + visible_skill = get_visible_skill(ap, query, skill_name) + if visible_skill is not None: + return visible_skill, rewritten_path + + activated_names = ', '.join(sorted(get_activated_skills(query).keys())) or 'none' + visible_names = ', '.join(sorted(get_visible_skills(ap, query).keys())) or 'none' + raise ValueError( + f'Skill "{skill_name}" is not available at this path. ' + f'Activated skills: {activated_names}. Visible skills: {visible_names}.' + ) + + +def find_referenced_skill_names(text: str) -> list[str]: + if not text: + return [] + + seen: list[str] = [] + for match in _SKILL_MOUNT_PATTERN.findall(text): + if match not in seen: + seen.append(match) + return seen + + +def rewrite_command_for_skill_mount(command: str, skill_name: str) -> str: + virtual_root = get_virtual_skill_mount_path(skill_name) + rewritten = command.replace(f'{virtual_root}/', '/workspace/') + return rewritten.replace(virtual_root, '/workspace') + + +def build_skill_session_id(skill_data: dict, query: pipeline_query.Query) -> str: + skill_identifier = str(skill_data.get('name', 'unknown') or 'unknown') + launcher_type = getattr(query, 'launcher_type', None) + launcher_id = getattr(query, 'launcher_id', None) + query_id = getattr(query, 'query_id', 'unknown') + + if launcher_type is not None and launcher_id is not None: + return f'skill-{launcher_type}_{launcher_id}-{skill_identifier}' + return f'skill-{query_id}-{skill_identifier}' + + +def should_prepare_skill_python_env(package_root: str | None) -> bool: + return box_workspace.should_prepare_python_env(package_root) + + +def wrap_skill_command_with_python_env(command: str, *, mount_path: str = '/workspace') -> str: + return box_workspace.wrap_python_command_with_env(command, mount_path=mount_path).rstrip() diff --git a/src/langbot/pkg/provider/tools/loaders/skill_authoring.py b/src/langbot/pkg/provider/tools/loaders/skill_authoring.py new file mode 100644 index 000000000..9d0fe6e9a --- /dev/null +++ b/src/langbot/pkg/provider/tools/loaders/skill_authoring.py @@ -0,0 +1,304 @@ +from __future__ import annotations + +import os +import typing + +import langbot_plugin.api.entities.builtin.resource.tool as resource_tool + +from .. import loader + +# Align with Claude Code's Skill tool design: +# - activate: Activate a skill via Tool Call, returns SKILL.md content +# - register_skill: Register a skill from sandbox directory to data/skills/ +# - This protects KV Cache and follows industry standard + +ACTIVATE_SKILL_TOOL_NAME = 'activate' +REGISTER_SKILL_TOOL_NAME = 'register_skill' + +SKILL_TOOL_NAMES = { + ACTIVATE_SKILL_TOOL_NAME, + REGISTER_SKILL_TOOL_NAME, +} + + +class SkillToolLoader(loader.ToolLoader): + """Skill tools aligned with Claude Code's design.""" + + def __init__(self, ap): + super().__init__(ap) + self._tools: list[resource_tool.LLMTool] = [] + self._sandbox_available: bool = False + + async def initialize(self): + # Check if sandbox backend is available (same check as native tools) + self._sandbox_available = await self._check_sandbox_available() + if self._sandbox_available: + self._tools = [ + self._build_activate_skill_tool(), + self._build_register_skill_tool(), + ] + else: + self.ap.logger.info( + 'Skill tools (activate/register_skill) are NOT available. ' + 'No sandbox backend (Docker/nsjail/E2B) is ready.' + ) + + async def _check_sandbox_available(self) -> bool: + """Check if the box backend is truly available (not just the runtime).""" + box_service = getattr(self.ap, 'box_service', None) + if box_service is None: + return False + if not getattr(box_service, 'available', False): + return False + # Check if backend is truly available via get_status + try: + status = await box_service.get_status() + backend_info = status.get('backend', {}) + return backend_info.get('available', False) + except Exception: + return False + + async def get_tools(self, bound_plugins: list[str] | None = None) -> list[resource_tool.LLMTool]: + if not self._is_available(): + return [] + return list(self._tools) + + async def has_tool(self, name: str) -> bool: + return self._is_available() and name in SKILL_TOOL_NAMES + + def _is_available(self) -> bool: + """Check if skill tools should be available. + + Skill tools require both a skill manager and a sandbox backend. + """ + return self._has_skill_manager() and self._sandbox_available + + async def invoke_tool(self, name: str, parameters: dict, query) -> typing.Any: + if name == ACTIVATE_SKILL_TOOL_NAME: + return await self._invoke_activate_skill(parameters, query) + if name == REGISTER_SKILL_TOOL_NAME: + return await self._invoke_register_skill(parameters) + raise ValueError(f'Unknown skill tool: {name}') + + async def shutdown(self): + pass + + def _has_skill_manager(self) -> bool: + return getattr(self.ap, 'skill_mgr', None) is not None + + async def _invoke_activate_skill(self, parameters: dict, query) -> typing.Any: + """Activate a skill and return SKILL.md content via Tool Result.""" + skill_name = str(parameters.get('skill_name', '') or '').strip() + if not skill_name: + raise ValueError('skill_name is required') + + skill_mgr = self.ap.skill_mgr + skill_data = skill_mgr.get_skill_by_name(skill_name) + if skill_data is None: + visible_skills = getattr(skill_mgr, 'skills', {}) + available_names = ', '.join(sorted(visible_skills.keys())) or 'none' + raise ValueError(f'Skill "{skill_name}" not found. Available skills: {available_names}') + + # Register activated skill for sandbox mount path resolution + from . import skill as skill_loader + + skill_loader.register_activated_skill(query, skill_data) + + # Return SKILL.md content as Tool Result (injects into context) + instructions = skill_data.get('instructions', '') + package_root = skill_data.get('package_root', '') + mount_path = skill_loader.get_virtual_skill_mount_path(skill_name) + + # Build Tool Result content + result_content = f'The "{skill_name}" skill is activated\n' + result_content += '\n' + result_content += f'{skill_name}\n' + result_content += f'{mount_path}\n' + result_content += f'{package_root}\n' + result_content += f'\n## Instructions\n{instructions}\n' + result_content += '\n## Runtime Context\n' + result_content += f'The skill package is mounted at {mount_path}. Use the standard tools to interact with it:\n' + result_content += f'- Use `read` to inspect files under {mount_path}\n' + result_content += f'- Use `exec` with workdir set to {mount_path} to run commands in that package\n' + result_content += '- Use `write` and `edit` on that path when the instructions require updating files\n' + result_content += '\n' + + return { + 'activated': True, + 'skill_name': skill_name, + 'mount_path': mount_path, + 'content': result_content, + } + + async def _invoke_register_skill(self, parameters: dict) -> typing.Any: + """Register a skill from sandbox directory to data/skills/.""" + sandbox_path = str(parameters.get('path', '') or '').strip() + if not sandbox_path: + raise ValueError('path is required') + + # Resolve sandbox path to host path + host_path = self._resolve_workspace_directory(sandbox_path) + + # Get or create skill service + skill_service = getattr(self.ap, 'skill_service', None) + if skill_service is None: + raise ValueError('Skill service not available') + + # Scan and register the skill + scanned = await skill_service.scan_directory_async(host_path) + + # Override name if provided + skill_name = str(parameters.get('name') or scanned['name']).strip() + if not skill_name: + raise ValueError('skill name is required') + + # Create the skill + created = await skill_service.create_skill( + { + 'name': skill_name, + 'display_name': str(parameters.get('display_name') or scanned.get('display_name', '')).strip(), + 'description': str(parameters.get('description') or scanned.get('description', '')).strip(), + 'instructions': str(parameters.get('instructions') or scanned.get('instructions', '')), + 'package_root': host_path, + } + ) + + return { + 'registered': True, + 'skill_name': skill_name, + 'source_path': sandbox_path, + 'skill': created, + } + + def _resolve_workspace_directory(self, sandbox_path: str) -> str: + """Resolve sandbox path to host filesystem path.""" + box_service = getattr(self.ap, 'box_service', None) + workspace_root = getattr(box_service, 'default_workspace', None) + if not workspace_root: + raise ValueError('No default workspace configured') + + normalized_path = str(sandbox_path).strip() or '/workspace' + if not normalized_path.startswith('/workspace'): + raise ValueError('path must be under /workspace') + + relative = normalized_path[len('/workspace') :].lstrip('/') + host_root = os.path.realpath(workspace_root) + host_path = os.path.realpath(os.path.join(host_root, relative)) + + # Security check: ensure path doesn't escape workspace + if not (host_path == host_root or host_path.startswith(host_root + os.sep)): + raise ValueError('path escapes the workspace boundary') + + if getattr(box_service, 'available', False): + return host_path + + if not os.path.isdir(host_path): + raise ValueError(f'Directory does not exist: {sandbox_path}') + + return host_path + + def _build_activate_skill_tool(self) -> resource_tool.LLMTool: + return resource_tool.LLMTool( + name=ACTIVATE_SKILL_TOOL_NAME, + human_desc='Activate a skill', + description=self._build_activate_tool_description(), + parameters={ + 'type': 'object', + 'properties': { + 'skill_name': { + 'type': 'string', + 'description': 'The skill name to activate (no arguments). E.g., "pdf" or "data-analysis"', + }, + }, + 'required': ['skill_name'], + 'additionalProperties': False, + }, + func=lambda parameters: parameters, + ) + + def _build_register_skill_tool(self) -> resource_tool.LLMTool: + return resource_tool.LLMTool( + name=REGISTER_SKILL_TOOL_NAME, + human_desc='Register a skill from sandbox', + description=( + "Register a skill package from a directory under /workspace into LangBot's skill store. " + 'Use this after creating or preparing a skill in the sandbox with exec/read/write/edit. ' + 'The directory must contain a SKILL.md file. ' + 'After registration, the skill can be activated with the activate tool.' + ), + parameters={ + 'type': 'object', + 'properties': { + 'path': { + 'type': 'string', + 'description': 'Directory path under /workspace containing the skill package (must have SKILL.md)', + }, + 'name': { + 'type': 'string', + 'description': 'Optional skill name override. Defaults to the name in SKILL.md or directory name.', + }, + 'display_name': { + 'type': 'string', + 'description': 'Optional display name override.', + }, + 'description': { + 'type': 'string', + 'description': 'Optional description override.', + }, + 'instructions': { + 'type': 'string', + 'description': 'Optional instructions override.', + }, + }, + 'required': ['path'], + 'additionalProperties': False, + }, + func=lambda parameters: parameters, + ) + + def _build_activate_tool_description(self) -> str: + """Build tool description with embedded available_skills list.""" + skill_mgr = getattr(self.ap, 'skill_mgr', None) + if skill_mgr is None: + return 'Activate a skill. No skills are currently available.' + + skills = getattr(skill_mgr, 'skills', {}) + if not skills: + return 'Activate a skill. No skills are currently available.' + + # Build section + available_skills_lines = [''] + for skill_name, skill_data in sorted(skills.items()): + description = skill_data.get('description', '') + available_skills_lines.append('') + available_skills_lines.append(f'{skill_name}') + available_skills_lines.append(f'{description}') + available_skills_lines.append('') + available_skills_lines.append('') + + available_skills_block = '\n'.join(available_skills_lines) + + return f"""Activate a skill within the main conversation. + + +When users ask you to perform tasks, check if any of the available skills +below can help complete the task more effectively. Skills provide specialized +capabilities and domain knowledge. + +How to use skills: +- Invoke skills using this tool with the skill name only (no arguments) +- When you invoke a skill, you will see +The skill is activated + +- The skill's instructions will be provided in the tool result +- Examples: + - skill_name: "pdf" - invoke the pdf skill + - skill_name: "data-analysis" - invoke the data-analysis skill + +Important: +- Only use skills listed in below +- Do not invoke a skill that is already running +- To create a new skill: prepare it in /workspace, then use register_skill tool + + +{available_skills_block}""" diff --git a/src/langbot/pkg/provider/tools/toolmgr.py b/src/langbot/pkg/provider/tools/toolmgr.py index f921c094e..5c510fcd8 100644 --- a/src/langbot/pkg/provider/tools/toolmgr.py +++ b/src/langbot/pkg/provider/tools/toolmgr.py @@ -1,15 +1,19 @@ from __future__ import annotations import typing +from typing import TYPE_CHECKING -from ...core import app -from langbot.pkg.utils import importutil -from langbot.pkg.provider.tools import loaders -from langbot.pkg.provider.tools.loaders import mcp as mcp_loader, plugin as plugin_loader import langbot_plugin.api.entities.builtin.resource.tool as resource_tool from langbot_plugin.api.entities.events import pipeline_query -importutil.import_modules_in_pkg(loaders) +if TYPE_CHECKING: + from ...core import app + from langbot.pkg.provider.tools.loaders import ( + mcp as mcp_loader, + native as native_loader, + plugin as plugin_loader, + skill_authoring as skill_authoring_loader, + ) class ToolManager: @@ -17,31 +21,53 @@ class ToolManager: ap: app.Application + native_tool_loader: native_loader.NativeToolLoader plugin_tool_loader: plugin_loader.PluginToolLoader mcp_tool_loader: mcp_loader.MCPLoader + skill_tool_loader: skill_authoring_loader.SkillToolLoader def __init__(self, ap: app.Application): self.ap = ap async def initialize(self): + from langbot.pkg.utils import importutil + from langbot.pkg.provider.tools import loaders + from langbot.pkg.provider.tools.loaders import ( + mcp as mcp_loader, + native as native_loader, + plugin as plugin_loader, + skill_authoring as skill_authoring_loader, + ) + + importutil.import_modules_in_pkg(loaders) + + self.native_tool_loader = native_loader.NativeToolLoader(self.ap) + await self.native_tool_loader.initialize() + self.plugin_tool_loader = plugin_loader.PluginToolLoader(self.ap) await self.plugin_tool_loader.initialize() self.mcp_tool_loader = mcp_loader.MCPLoader(self.ap) await self.mcp_tool_loader.initialize() + self.skill_tool_loader = skill_authoring_loader.SkillToolLoader(self.ap) + await self.skill_tool_loader.initialize() async def get_all_tools( - self, bound_plugins: list[str] | None = None, bound_mcp_servers: list[str] | None = None + self, + bound_plugins: list[str] | None = None, + bound_mcp_servers: list[str] | None = None, + include_skill_authoring: bool = False, ) -> list[resource_tool.LLMTool]: - """获取所有函数""" all_functions: list[resource_tool.LLMTool] = [] + all_functions.extend(await self.native_tool_loader.get_tools()) + if include_skill_authoring: + all_functions.extend(await self.skill_tool_loader.get_tools()) all_functions.extend(await self.plugin_tool_loader.get_tools(bound_plugins)) all_functions.extend(await self.mcp_tool_loader.get_tools(bound_mcp_servers)) return all_functions async def generate_tools_for_openai(self, use_funcs: list[resource_tool.LLMTool]) -> list: - """生成函数列表""" tools = [] for function in use_funcs: @@ -58,28 +84,6 @@ async def generate_tools_for_openai(self, use_funcs: list[resource_tool.LLMTool] return tools async def generate_tools_for_anthropic(self, use_funcs: list[resource_tool.LLMTool]) -> list: - """为anthropic生成函数列表 - - e.g. - - [ - { - "name": "get_stock_price", - "description": "Get the current stock price for a given ticker symbol.", - "input_schema": { - "type": "object", - "properties": { - "ticker": { - "type": "string", - "description": "The stock ticker symbol, e.g. AAPL for Apple Inc." - } - }, - "required": ["ticker"] - } - } - ] - """ - tools = [] for function in use_funcs: @@ -93,16 +97,18 @@ async def generate_tools_for_anthropic(self, use_funcs: list[resource_tool.LLMTo return tools async def execute_func_call(self, name: str, parameters: dict, query: pipeline_query.Query) -> typing.Any: - """执行函数调用""" - + if await self.native_tool_loader.has_tool(name): + return await self.native_tool_loader.invoke_tool(name, parameters, query) if await self.plugin_tool_loader.has_tool(name): return await self.plugin_tool_loader.invoke_tool(name, parameters, query) - elif await self.mcp_tool_loader.has_tool(name): + if await self.mcp_tool_loader.has_tool(name): return await self.mcp_tool_loader.invoke_tool(name, parameters, query) - else: - raise ValueError(f'未找到工具: {name}') + if await self.skill_tool_loader.has_tool(name): + return await self.skill_tool_loader.invoke_tool(name, parameters, query) + raise ValueError(f'未找到工具: {name}') async def shutdown(self): - """关闭所有工具""" + await self.native_tool_loader.shutdown() await self.plugin_tool_loader.shutdown() await self.mcp_tool_loader.shutdown() + await self.skill_tool_loader.shutdown() diff --git a/src/langbot/pkg/skill/__init__.py b/src/langbot/pkg/skill/__init__.py new file mode 100644 index 000000000..b96f23ca1 --- /dev/null +++ b/src/langbot/pkg/skill/__init__.py @@ -0,0 +1,3 @@ +from .manager import SkillManager + +__all__ = ['SkillManager'] diff --git a/src/langbot/pkg/skill/activation.py b/src/langbot/pkg/skill/activation.py new file mode 100644 index 000000000..706747060 --- /dev/null +++ b/src/langbot/pkg/skill/activation.py @@ -0,0 +1,35 @@ +from __future__ import annotations + +import typing + +from ..provider.tools.loaders import skill as skill_loader + +if typing.TYPE_CHECKING: + from ..core import app + import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query + + +# Skill activation is now handled through Tool Call mechanism (activate tool). +# This file is kept for potential future extensions but the text marker +# detection mechanism has been removed. + + +def register_activated_skill( + ap: app.Application, + query: pipeline_query.Query, + skill_name: str, +) -> bool: + """Register an activated skill for sandbox mount path resolution. + + This is called by the activate tool when a skill is activated via Tool Call. + """ + skill_mgr = getattr(ap, 'skill_mgr', None) + if skill_mgr is None: + return False + + skill_data = skill_mgr.get_skill_by_name(skill_name) + if skill_data is None: + return False + + skill_loader.register_activated_skill(query, skill_data) + return True diff --git a/src/langbot/pkg/skill/manager.py b/src/langbot/pkg/skill/manager.py new file mode 100644 index 000000000..a053697f2 --- /dev/null +++ b/src/langbot/pkg/skill/manager.py @@ -0,0 +1,135 @@ +from __future__ import annotations + +import os +import typing + +from ..core import app + +if typing.TYPE_CHECKING: + pass + + +class SkillManager: + """Skill manager backed by Box-managed or local filesystem packages. + + In sandbox deployments, skills are loaded from the Box runtime. Local + data/skills remains as the fallback for non-Box development. + + Skills are activated through the `activate` tool (Tool Call mechanism), + aligned with Claude Code's design. This protects KV Cache and follows + industry standard. + """ + + ap: app.Application + skills: dict[str, dict] + + def __init__(self, ap: app.Application): + self.ap = ap + self.skills = {} + + async def initialize(self): + await self.reload_skills() + + async def reload_skills(self): + """Reload all skills from the Box runtime. + + Box is the only source of truth for skills. When Box is unavailable + (disabled in config or unreachable) the cache is emptied — there is + no local filesystem fallback. Skills whose ``package_root`` is no + longer visible on the LangBot-side filesystem are dropped so they + don't surface as stale ``extra_mounts``. + """ + self.skills = {} + + box_service = getattr(self.ap, 'box_service', None) + if box_service is None or not getattr(box_service, 'available', False): + self.ap.logger.info('Box runtime unavailable; skill cache is empty.') + return + + try: + dropped = 0 + for skill_data in await box_service.list_skills(): + skill_name = skill_data.get('name') + if not skill_name: + continue + package_root = str(skill_data.get('package_root', '') or '').strip() + if package_root and not os.path.isdir(package_root): + self.ap.logger.warning( + f'Skill "{skill_name}" reported by Box runtime but ' + f'package_root missing on LangBot filesystem ' + f'({package_root}); dropping from in-memory cache.' + ) + dropped += 1 + continue + self.skills[skill_name] = skill_data + if dropped: + self.ap.logger.warning( + f'Loaded {len(self.skills)} skills from Box runtime ' + f'({dropped} dropped due to missing package_root).' + ) + else: + self.ap.logger.info(f'Loaded {len(self.skills)} skills from Box runtime') + except Exception as exc: + self.ap.logger.warning(f'Failed to load skills from Box runtime: {exc}') + + def refresh_skill_from_disk(self, skill_name: str) -> bool: + """Confirm a single skill is present in the cache. + + With Box as the only source of truth, the actual reload is driven by + SkillService callers awaiting ``reload_skills``; this method only + reports whether the cache still has the skill. + """ + if not skill_name: + return False + return skill_name in self.skills + + def get_skill_by_name(self, name: str) -> dict | None: + """Get skill data by name.""" + return self.skills.get(name) + + def get_skill_index(self, bound_skills: list[str] | None = None) -> str: + """Render the pipeline-visible skills as a short ``name: description`` + index suitable for the system prompt. + + ``bound_skills`` follows the same convention as + ``query.variables['_pipeline_bound_skills']``: ``None`` means every + loaded skill is exposed; an explicit list filters to that subset. + Returns an empty string when no skills are visible. + """ + lines: list[str] = [] + for skill in self.skills.values(): + name = skill.get('name') + if not name: + continue + if bound_skills is not None and name not in bound_skills: + continue + display = skill.get('display_name') or name + description = (skill.get('description') or '').strip().replace('\n', ' ') + lines.append(f'- {name} ({display}): {description}') + + if not lines: + return '' + return 'Available Skills:\n' + '\n'.join(lines) + + def build_skill_aware_prompt_addition(self, bound_skills: list[str] | None = None) -> str: + """Build the system-prompt addendum that makes the LLM aware of the + pipeline-visible skills. + + Only metadata (name + description) is injected — the full SKILL.md is + loaded later via the ``activate`` Tool Call, protecting KV cache and + matching Claude Code's progressive disclosure pattern. Returns an + empty string when no skills are visible (no prompt change at all). + """ + skill_index = self.get_skill_index(bound_skills) + if not skill_index: + return '' + return ( + '\n\n' + f'{skill_index}\n\n' + "When the user's request clearly matches one or more skills " + 'based on their descriptions above, call the `activate` tool with ' + 'the skill name to load its full instructions. Only the name and ' + 'description are visible here; the actual instructions arrive as ' + 'the tool result. If no skill is a clear match, respond normally ' + 'without activating any skill.' + ) diff --git a/src/langbot/pkg/skill/utils.py b/src/langbot/pkg/skill/utils.py new file mode 100644 index 000000000..fc143362f --- /dev/null +++ b/src/langbot/pkg/skill/utils.py @@ -0,0 +1,37 @@ +"""Shared utilities for skill file parsing.""" + +import yaml + + +def parse_frontmatter(content: str) -> tuple[dict, str]: + """Parse YAML frontmatter from markdown content. + + Expects format: + --- + name: my-skill + description: Does something + --- + # Actual instructions... + + Returns: + Tuple of (metadata dict, remaining content) + """ + if not content.startswith('---'): + return {}, content + + parts = content.split('---', 2) + if len(parts) < 3: + return {}, content + + frontmatter_str = parts[1].strip() + instructions = parts[2].strip() + + try: + metadata = yaml.safe_load(frontmatter_str) or {} + except yaml.YAMLError: + metadata = {} + + if not isinstance(metadata, dict): + metadata = {} + + return metadata, instructions diff --git a/src/langbot/pkg/utils/managed_runtime.py b/src/langbot/pkg/utils/managed_runtime.py new file mode 100644 index 000000000..77f59be4c --- /dev/null +++ b/src/langbot/pkg/utils/managed_runtime.py @@ -0,0 +1,88 @@ +"""Base class for connectors that may manage a local runtime subprocess.""" + +from __future__ import annotations + +import asyncio +import os +import sys +from typing import TYPE_CHECKING, Awaitable, Callable + +if TYPE_CHECKING: + from ..core import app as core_app + + +class ManagedRuntimeConnector: + """Base class for connectors that may manage a local runtime subprocess. + + Provides shared lifecycle helpers: subprocess launch, health-check retry, + and graceful termination. Concrete connectors (plugin, box, …) inherit + this and add their own protocol-specific logic. + """ + + ap: 'core_app.Application' + runtime_subprocess: asyncio.subprocess.Process | None + runtime_subprocess_task: asyncio.Task | None + + def __init__(self, ap: 'core_app.Application'): + self.ap = ap + self.runtime_subprocess = None + self.runtime_subprocess_task = None + + async def _start_runtime_subprocess(self, *args: str) -> None: + """Launch a local runtime as a subprocess of the current Python interpreter. + + If a subprocess is already running (no *returncode* yet), this is a no-op. + """ + if self.runtime_subprocess is not None and self.runtime_subprocess.returncode is None: + return + + python_path = sys.executable + env = os.environ.copy() + self.runtime_subprocess = await asyncio.create_subprocess_exec( + python_path, + *args, + env=env, + ) + self.runtime_subprocess_task = asyncio.create_task(self.runtime_subprocess.wait()) + + async def _wait_until_ready( + self, + check: Callable[[], Awaitable[None]], + retries: int = 40, + interval: float = 0.25, + runtime_name: str = 'runtime', + ) -> None: + """Repeatedly call *check* until it succeeds or retries are exhausted. + + Between attempts the method sleeps for *interval* seconds. If the + managed subprocess exits before readiness is confirmed, a + ``RuntimeError`` is raised immediately. + """ + last_exc: Exception | None = None + for _ in range(retries): + # Fast-fail if the process already died. + if self.runtime_subprocess is not None and self.runtime_subprocess.returncode is not None: + raise RuntimeError( + f'local {runtime_name} exited before becoming ready (code {self.runtime_subprocess.returncode})' + ) + + try: + await check() + return + except Exception as exc: + last_exc = exc + await asyncio.sleep(interval) + + if last_exc is not None: + raise last_exc + raise RuntimeError(f'local {runtime_name} did not become ready') + + def _dispose_subprocess(self) -> None: + """Terminate the managed subprocess and cancel its wait task.""" + if self.runtime_subprocess is not None and self.runtime_subprocess.returncode is None: + self.ap.logger.info('Terminating managed runtime process...') + self.runtime_subprocess.terminate() + + if self.runtime_subprocess_task is not None: + self.runtime_subprocess_task.cancel() + self.runtime_subprocess_task = None diff --git a/src/langbot/pkg/utils/paths.py b/src/langbot/pkg/utils/paths.py index fd052c507..6f95ec82b 100644 --- a/src/langbot/pkg/utils/paths.py +++ b/src/langbot/pkg/utils/paths.py @@ -1,37 +1,70 @@ -"""Utility functions for finding package resources""" +"""Utility functions for finding package resources and runtime data roots.""" import os from pathlib import Path _is_source_install = None +_source_root = None + + +def _find_source_root() -> Path | None: + """Locate the LangBot repository root when running from source.""" + global _source_root + + if _source_root is not None: + return _source_root + + current = Path(__file__).resolve() + for parent in current.parents: + if (parent / 'pyproject.toml').exists() and (parent / 'main.py').exists(): + _source_root = parent + return parent + + _source_root = None + return None def _check_if_source_install() -> bool: """ - Check if we're running from source directory or an installed package. - Cached to avoid repeated file I/O. + Check if we're running from the LangBot source tree. + Cached to avoid repeated filesystem scans. """ global _is_source_install if _is_source_install is not None: return _is_source_install - # Check if main.py exists in current directory with LangBot marker - if os.path.exists('main.py'): - try: - with open('main.py', 'r', encoding='utf-8') as f: - # Only read first 500 chars to check for marker - content = f.read(500) - if 'LangBot/main.py' in content: - _is_source_install = True - return True - except (IOError, OSError, UnicodeDecodeError): - # If we can't read the file, assume not a source install - pass + _is_source_install = _find_source_root() is not None + return _is_source_install + + +def get_data_root() -> str: + """ + Get the runtime data root. + + Priority: + 1. LANGBOT_DATA_ROOT environment override + 2. Source checkout root /data when running from source + 3. Current working directory /data for installed-package usage + """ + env_root = os.environ.get('LANGBOT_DATA_ROOT', '').strip() + if env_root: + return str(Path(env_root).expanduser().resolve()) + + source_root = _find_source_root() + if source_root is not None: + return str((source_root / 'data').resolve()) - _is_source_install = False - return False + return str((Path.cwd() / 'data').resolve()) + + +def get_data_path(*parts: str) -> str: + """Join path segments under the resolved data root.""" + data_root = Path(get_data_root()) + if not parts: + return str(data_root) + return str((data_root.joinpath(*parts)).resolve()) def get_frontend_path() -> str: @@ -76,8 +109,11 @@ def get_resource_path(resource: str) -> str: Absolute path to the resource """ # First, check if resource exists in current directory (source install) - if _check_if_source_install() and os.path.exists(resource): - return resource + source_root = _find_source_root() + if source_root is not None: + source_resource = source_root / resource + if source_resource.exists(): + return str(source_resource) # Second, check current directory anyway if os.path.exists(resource): diff --git a/src/langbot/pkg/utils/platform.py b/src/langbot/pkg/utils/platform.py index b3f7a6df9..9badb42ee 100644 --- a/src/langbot/pkg/utils/platform.py +++ b/src/langbot/pkg/utils/platform.py @@ -16,7 +16,14 @@ def get_platform() -> str: standalone_runtime = False +standalone_box = False + def use_websocket_to_connect_plugin_runtime() -> bool: """是否使用 websocket 连接插件运行时""" return standalone_runtime + + +def use_websocket_to_connect_box_runtime() -> bool: + """Whether to use WebSocket to connect to an external box runtime.""" + return standalone_box diff --git a/src/langbot/pkg/utils/version.py b/src/langbot/pkg/utils/version.py index 23440e4a9..1e19420db 100644 --- a/src/langbot/pkg/utils/version.py +++ b/src/langbot/pkg/utils/version.py @@ -1,6 +1,5 @@ from __future__ import annotations -import os import typing import logging @@ -11,7 +10,7 @@ class VersionManager: - """版本管理器""" + """Version manager""" ap: app.Application @@ -22,190 +21,68 @@ async def initialize(self): pass def get_current_version(self) -> str: - current_tag = constants.semantic_version - - return current_tag + return constants.semantic_version async def get_release_list(self) -> list: - """获取发行列表""" + """Fetch release list from Space API (cached GitHub releases).""" try: rls_list_resp = requests.get( - url='https://api.github.com/repos/langbot-app/LangBot/releases', + url='https://space.langbot.app/api/v1/dist/info/releases', proxies=self.ap.proxy_mgr.get_forward_proxies(), - timeout=5, + timeout=10, ) - rls_list_resp.raise_for_status() # 检查请求是否成功 - rls_list = rls_list_resp.json() - return rls_list + rls_list_resp.raise_for_status() + resp_json = rls_list_resp.json() + if resp_json.get('code') == 0 and isinstance(resp_json.get('data'), list): + return resp_json['data'] + self.ap.logger.warning(f'Failed to fetch release list: unexpected response: {resp_json.get("msg", "")}') + return [] except Exception as e: - self.ap.logger.warning(f'获取发行列表失败: {e}') - pass + self.ap.logger.warning(f'Failed to fetch release list: {e}') return [] - async def update_all(self): - """检查更新并下载源码""" - - current_tag = self.get_current_version() - - rls_list = await self.get_release_list() - - latest_rls = {} - rls_notes = [] - latest_tag_name = '' - for rls in rls_list: - rls_notes.append(rls['name']) # 使用发行名称作为note - if latest_tag_name == '': - latest_tag_name = rls['tag_name'] - - if rls['tag_name'] == current_tag: - break - - if latest_rls == {}: - latest_rls = rls - self.ap.logger.info('更新日志: {}'.format(rls_notes)) - - if latest_rls == {} and not self.is_newer(latest_tag_name, current_tag): # 没有新版本 - return False - - # 下载最新版本的zip到temp目录 - self.ap.logger.info('开始下载最新版本: {}'.format(latest_rls['zipball_url'])) - - zip_url = latest_rls['zipball_url'] - zip_resp = requests.get(url=zip_url, proxies=self.ap.proxy_mgr.get_forward_proxies()) - zip_data = zip_resp.content - - # 检查temp/updater目录 - if not os.path.exists('temp'): - os.mkdir('temp') - if not os.path.exists('temp/updater'): - os.mkdir('temp/updater') - with open('temp/updater/{}.zip'.format(latest_rls['tag_name']), 'wb') as f: - f.write(zip_data) - - self.ap.logger.info('下载最新版本完成: {}'.format('temp/updater/{}.zip'.format(latest_rls['tag_name']))) - - # 解压zip到temp/updater// - import zipfile - - # 检查目标文件夹 - if os.path.exists('temp/updater/{}'.format(latest_rls['tag_name'])): - import shutil - - shutil.rmtree('temp/updater/{}'.format(latest_rls['tag_name'])) - os.mkdir('temp/updater/{}'.format(latest_rls['tag_name'])) - with zipfile.ZipFile('temp/updater/{}.zip'.format(latest_rls['tag_name']), 'r') as zip_ref: - zip_ref.extractall('temp/updater/{}'.format(latest_rls['tag_name'])) - - # 覆盖源码 - source_root = '' - # 找到temp/updater//中的第一个子目录路径 - for root, dirs, files in os.walk('temp/updater/{}'.format(latest_rls['tag_name'])): - if root != 'temp/updater/{}'.format(latest_rls['tag_name']): - source_root = root - break - - # 覆盖源码 - import shutil - - for root, dirs, files in os.walk(source_root): - # 覆盖所有子文件子目录 - for file in files: - src = os.path.join(root, file) - dst = src.replace(source_root, '.') - if os.path.exists(dst): - os.remove(dst) - - # 检查目标文件夹是否存在 - if not os.path.exists(os.path.dirname(dst)): - os.makedirs(os.path.dirname(dst)) - # 检查目标文件是否存在 - if not os.path.exists(dst): - # 创建目标文件 - open(dst, 'w').close() - - shutil.copy(src, dst) - - # 把current_tag写入文件 - current_tag = latest_rls['tag_name'] - with open('current_tag', 'w') as f: - f.write(current_tag) - - # TODO statistics - async def is_new_version_available(self) -> bool: - """检查是否有新版本""" - # 从github获取release列表 + """Check whether a newer version is available.""" rls_list = await self.get_release_list() - if rls_list is None: + if not rls_list: return False - # 获取当前版本 current_tag = self.get_current_version() - # 检查是否有新版本 latest_tag_name = '' for rls in rls_list: - if latest_tag_name == '': - latest_tag_name = rls['tag_name'] - break + latest_tag_name = rls.get('tag_name', '') + break + + return self._is_newer(latest_tag_name, current_tag) - return self.is_newer(latest_tag_name, current_tag) + def _is_newer(self, new_tag: str, old_tag: str) -> bool: + """Check if new_tag is a newer version than old_tag. - def is_newer(self, new_tag: str, old_tag: str): - """判断版本是否更新,忽略第四位版本和第一位版本""" - if new_tag == old_tag: + Compares the first three segments (major.minor.patch) only. + Returns False if the major version differs (breaking change boundary). + """ + if not new_tag or not old_tag or new_tag == old_tag: return False - new_tag = new_tag.split('.') - old_tag = old_tag.split('.') + new_parts = new_tag.split('.') + old_parts = old_tag.split('.') - # 判断主版本是否相同 - if new_tag[0] != old_tag[0]: + # Different major version — not considered an upgrade + if new_parts[0] != old_parts[0]: return False - if len(new_tag) < 4: + if len(new_parts) < 4: return True - # 合成前三段,判断是否相同 - new_tag = '.'.join(new_tag[:3]) - old_tag = '.'.join(old_tag[:3]) - - return new_tag != old_tag - - def compare_version_str(v0: str, v1: str) -> int: - """比较两个版本号""" - - # 删除版本号前的v - if v0.startswith('v'): - v0 = v0[1:] - if v1.startswith('v'): - v1 = v1[1:] - - v0: list = v0.split('.') - v1: list = v1.split('.') - - # 如果两个版本号节数不同,把短的后面用0补齐 - if len(v0) < len(v1): - v0.extend(['0'] * (len(v1) - len(v0))) - elif len(v0) > len(v1): - v1.extend(['0'] * (len(v0) - len(v1))) - - # 从高位向低位比较 - for i in range(len(v0)): - if int(v0[i]) > int(v1[i]): - return 1 - elif int(v0[i]) < int(v1[i]): - return -1 - - return 0 + return '.'.join(new_parts[:3]) != '.'.join(old_parts[:3]) async def show_version_update(self) -> typing.Tuple[str, int]: try: - if await self.ap.ver_mgr.is_new_version_available(): + if await self.is_new_version_available(): return ( - 'New version available:\n有新版本可用,根据文档更新: \nhttps://link.langbot.app/zh/docs/update', + 'New version available. Update guide: https://link.langbot.app/en/docs/update', logging.INFO, ) - except Exception as e: return f'Error checking version update: {e}', logging.WARNING diff --git a/src/langbot/templates/config.yaml b/src/langbot/templates/config.yaml index 5cf9b98b8..753b59d8b 100644 --- a/src/langbot/templates/config.yaml +++ b/src/langbot/templates/config.yaml @@ -104,6 +104,31 @@ monitoring: check_interval_hours: 1 # Number of expired rows to delete per table batch delete_batch_size: 1000 +box: + # Master switch for the Box sandbox runtime. When false, LangBot does NOT + # attempt to connect to a remote Box runtime nor start a local stdio Box + # subprocess. Disabling Box also disables every feature that depends on it: + # the native sandbox tools (exec/read/write/edit/glob/grep), the activate + # skill tool, skill add/edit, and stdio-mode MCP servers. Skills can still + # be listed read-only and http/sse MCP servers continue to work. + enabled: true + backend: 'local' # 'local' (Docker/nsjail), 'docker', 'nsjail', or 'e2b'. Can be written via BOX__BACKEND. + runtime: + endpoint: '' # External Box Runtime base URL, e.g. 'ws://127.0.0.1:5410'. Leave empty for local auto-managed runtime. + local: + profile: 'default' + image: '' # Custom local sandbox image. Leave empty to use the profile default. + host_root: './data/box' # Base host directory for local workspace mounts. Docker deployments should override this with an absolute host path. + default_workspace: '' # Defaults to '/default'. Relative paths are resolved under host_root. + skills_root: 'skills' # Box-owned skill package directory. Relative paths are resolved under host_root. + allowed_mount_roots: # Defaults to [''] when left empty. + - './data/box' + - '/tmp' + workspace_quota_mb: null # Optional disk quota override (>= 0). null = profile default. + e2b: + api_key: '' # Can also be set via E2B_API_KEY env var. + api_url: '' # Custom API URL for self-hosted deployments. + template: '' # Default template ID (e.g. 'base', 'python-3.11'). space: # Space service URL for OAuth and API url: 'https://space.langbot.app' diff --git a/src/langbot/templates/default-pipeline-config.json b/src/langbot/templates/default-pipeline-config.json index fe6e28427..78e2ec958 100644 --- a/src/langbot/templates/default-pipeline-config.json +++ b/src/langbot/templates/default-pipeline-config.json @@ -50,10 +50,11 @@ "prompt": [ { "role": "system", - "content": "You are a helpful assistant." + "content": "You are a helpful assistant. When tools are available, use them for exact calculations, data processing, and code execution instead of guessing. Unless the user explicitly asks for code or a script, return the result directly instead of printing the generated code." } ], "knowledge-bases": [], + "box-session-id-template": "{launcher_type}_{launcher_id}", "rerank-model": "", "rerank-top-k": 5 }, diff --git a/src/langbot/templates/metadata/pipeline/ai.yaml b/src/langbot/templates/metadata/pipeline/ai.yaml index fd68fb475..32f4115f1 100644 --- a/src/langbot/templates/metadata/pipeline/ai.yaml +++ b/src/langbot/templates/metadata/pipeline/ai.yaml @@ -124,6 +124,99 @@ stages: field: __system.is_wizard operator: neq value: true + - name: box-session-id-template + label: + en_US: Sandbox Scope + zh_Hans: 沙箱作用域 + zh_Hant: 沙箱作用域 + ja_JP: サンドボックススコープ + vi_VN: Phạm vi Sandbox + th_TH: ขอบเขต Sandbox + es_ES: Alcance del Sandbox + ru_RU: Область песочницы + description: + en_US: Determines how sandbox environments are shared across messages. + zh_Hans: 决定沙箱环境在不同消息间的共享方式。 + zh_Hant: 決定沙箱環境在不同訊息間的共享方式。 + ja_JP: メッセージ間でサンドボックス環境を共有する方法を決定します。 + vi_VN: Xác định cách chia sẻ môi trường sandbox giữa các tin nhắn. + th_TH: กำหนดวิธีแชร์สภาพแวดล้อม Sandbox ระหว่างข้อความ + es_ES: Determina cómo se comparten los entornos sandbox entre mensajes. + ru_RU: Определяет, как песочницы используются совместно между сообщениями. + disable_if: + field: __system.box_available + operator: eq + value: false + disabled_tooltip: + en_US: >- + Box sandbox is disabled or unavailable. Enable it in config.yaml + (box.enabled = true) and ensure the runtime is reachable to change + this setting. + zh_Hans: Box 沙箱已禁用或不可用。请在配置中启用(box.enabled = true)并确认运行时连接正常,才能修改此项。 + zh_Hant: Box 沙箱已停用或無法使用。請在設定中啟用(box.enabled = true)並確認執行時連線正常,才能修改此項。 + ja_JP: Box サンドボックスが無効または利用できません。設定で有効化(box.enabled = true)し、ランタイムが接続できることを確認してから変更してください。 + vi_VN: Sandbox Box đã tắt hoặc không khả dụng. Hãy bật trong cấu hình (box.enabled = true) và đảm bảo runtime hoạt động để chỉnh sửa. + th_TH: Sandbox Box ถูกปิดใช้งานหรือไม่พร้อมใช้งาน กรุณาเปิดใช้งานในการตั้งค่า (box.enabled = true) และตรวจสอบว่ารันไทม์เชื่อมต่อปกติก่อนปรับค่า + es_ES: El sandbox de Box está desactivado o no disponible. Actívelo en la configuración (box.enabled = true) y asegúrese de que el runtime esté conectado para modificar este ajuste. + ru_RU: Песочница Box отключена или недоступна. Включите её в конфигурации (box.enabled = true) и убедитесь, что среда выполнения работает, чтобы изменить эту настройку. + type: select + required: false + default: "{launcher_type}_{launcher_id}" + options: + - name: "{global}" + label: + en_US: Global (shared by all) + zh_Hans: 全局(所有人共享) + zh_Hant: 全域(所有人共用) + ja_JP: グローバル(全員共有) + vi_VN: Toàn cục (chia sẻ cho tất cả) + th_TH: ทั่วไป (แชร์ทั้งหมด) + es_ES: Global (compartido por todos) + ru_RU: Глобальный (общий для всех) + - name: "{launcher_type}_{launcher_id}" + label: + en_US: Per chat (Recommended) + zh_Hans: 每个会话(推荐) + zh_Hant: 每個會話(推薦) + ja_JP: チャットごと(推奨) + vi_VN: Mỗi cuộc trò chuyện (Khuyến nghị) + th_TH: ต่อแชท (แนะนำ) + es_ES: Por chat (Recomendado) + ru_RU: По чату (Рекомендуется) + - name: "{launcher_type}_{launcher_id}_{sender_id}" + label: + en_US: Per user in chat + zh_Hans: 会话中每个用户 + zh_Hant: 會話中每個用戶 + ja_JP: チャット内のユーザーごと + vi_VN: Mỗi người dùng trong cuộc trò chuyện + th_TH: ต่อผู้ใช้ในแชท + es_ES: Por usuario en chat + ru_RU: По пользователю в чате + - name: "{launcher_type}_{launcher_id}_{conversation_id}" + label: + en_US: Per conversation context + zh_Hans: 每个对话上下文 + zh_Hant: 每個對話上下文 + ja_JP: 会話コンテキストごと + vi_VN: Mỗi ngữ cảnh hội thoại + th_TH: ต่อบริบทการสนทนา + es_ES: Por contexto de conversación + ru_RU: По контексту разговора + - name: "{query_id}" + label: + en_US: Per message (isolated) + zh_Hans: 每条消息(完全隔离) + zh_Hant: 每條訊息(完全隔離) + ja_JP: メッセージごと(隔離) + vi_VN: Mỗi tin nhắn (cách ly) + th_TH: ต่อข้อความ (แยกส่วน) + es_ES: Por mensaje (aislado) + ru_RU: По сообщению (изолированно) + show_if: + field: __system.is_wizard + operator: neq + value: true - name: rerank-model label: en_US: Rerank Model diff --git a/test-embed.html b/test-embed.html new file mode 100644 index 000000000..84231924a --- /dev/null +++ b/test-embed.html @@ -0,0 +1,21 @@ + + + + + LangBot Embed Widget Test + + + +

LangBot Embed Widget Test Page

+

If the widget loaded correctly, you should see a blue chat bubble in the bottom-right corner.

+

Replace the BOT_UUID below with your actual bot UUID.

+ + + + + diff --git a/tests/factories/app.py b/tests/factories/app.py index 5f36df846..d1edf56a2 100644 --- a/tests/factories/app.py +++ b/tests/factories/app.py @@ -15,7 +15,7 @@ class FakeApp: def __init__( self, *, - command_prefix: list[str] = ["/", "!"], + command_prefix: list[str] = ['/', '!'], command_enable: bool = True, pipeline_concurrency: int = 10, admins: list[str] | None = None, @@ -40,6 +40,8 @@ def __init__( self.telemetry = self._create_mock_telemetry() self.survey = None self.cmd_mgr = self._create_mock_cmd_mgr() + self.skill_mgr = self._create_mock_skill_mgr() + self.pipeline_service = self._create_mock_pipeline_service() # Apply any extra attributes for specific test scenarios for name, value in extra_attrs.items(): @@ -98,9 +100,9 @@ def _create_mock_instance_config( ): instance_config = Mock() instance_config.data = { - "command": {"prefix": command_prefix, "enable": command_enable}, - "concurrency": {"pipeline": pipeline_concurrency}, - "admins": admins, + 'command': {'prefix': command_prefix, 'enable': command_enable}, + 'concurrency': {'pipeline': pipeline_concurrency}, + 'admins': admins, } return instance_config @@ -119,6 +121,20 @@ def _create_mock_cmd_mgr(self): cmd_mgr.execute = AsyncMock() return cmd_mgr + def _create_mock_skill_mgr(self): + """Mock SkillManager that returns no skill index addition by default.""" + skill_mgr = Mock() + skill_mgr.skills = {} + skill_mgr.build_skill_aware_prompt_addition = Mock(return_value='') + skill_mgr.get_skill_index = Mock(return_value=[]) + return skill_mgr + + def _create_mock_pipeline_service(self): + """Mock PipelineService.get_pipeline returning empty extensions prefs.""" + pipeline_service = AsyncMock() + pipeline_service.get_pipeline = AsyncMock(return_value={'extensions_preferences': {}}) + return pipeline_service + def capture_message(self, message): """Capture an outbound message for test assertions.""" self._outbound_messages.append(message) @@ -134,4 +150,4 @@ def clear_outbound_messages(self): def fake_app(**kwargs) -> FakeApp: """Create a FakeApp instance with optional overrides.""" - return FakeApp(**kwargs) \ No newline at end of file + return FakeApp(**kwargs) diff --git a/tests/integration/api/test_pipelines.py b/tests/integration/api/test_pipelines.py index 502b12c2e..50ac37bc5 100644 --- a/tests/integration/api/test_pipelines.py +++ b/tests/integration/api/test_pipelines.py @@ -20,6 +20,7 @@ # ============== FIXTURE FOR SYS.MODULES ISOLATION ============== + @pytest.fixture(scope='module') def mock_circular_import_chain(): """Break circular import chain for API controller.""" @@ -53,21 +54,25 @@ class FakeMinimalApplication: ): # Import groups after mocking to populate preregistered_groups import langbot.pkg.api.http.controller.groups.pipelines.pipelines as _pipelines # noqa: E402, F401 + yield # ============== FAKE APPLICATION WITH PIPELINE SERVICES ============== + @pytest.fixture(scope='module') def fake_pipeline_app(): """Create FakeApp with pipeline-specific services (module scope for reuse).""" app = FakeApp() # Pipeline config - app.instance_config.data.update({ - 'api': {'port': 5300}, - 'system': {'allow_modify_login_info': True, 'limitation': {}}, - }) + app.instance_config.data.update( + { + 'api': {'port': 5300}, + 'system': {'allow_modify_login_info': True, 'limitation': {}}, + } + ) # Auth services app.user_service = Mock() @@ -79,25 +84,31 @@ def fake_pipeline_app(): # Pipeline service app.pipeline_service = Mock() - app.pipeline_service.get_pipeline_metadata = AsyncMock(return_value=[ - {'name': 'trigger', 'stages': []}, - {'name': 'ai', 'stages': []}, - ]) - app.pipeline_service.get_pipelines = AsyncMock(return_value=[ - { + app.pipeline_service.get_pipeline_metadata = AsyncMock( + return_value=[ + {'name': 'trigger', 'stages': []}, + {'name': 'ai', 'stages': []}, + ] + ) + app.pipeline_service.get_pipelines = AsyncMock( + return_value=[ + { + 'uuid': 'test-pipeline-uuid', + 'name': 'Test Pipeline', + 'description': 'Test description', + 'created_at': '2024-01-01T00:00:00', + 'updated_at': '2024-01-01T00:00:00', + 'is_default': False, + } + ] + ) + app.pipeline_service.get_pipeline = AsyncMock( + return_value={ 'uuid': 'test-pipeline-uuid', 'name': 'Test Pipeline', - 'description': 'Test description', - 'created_at': '2024-01-01T00:00:00', - 'updated_at': '2024-01-01T00:00:00', - 'is_default': False, + 'config': {}, } - ]) - app.pipeline_service.get_pipeline = AsyncMock(return_value={ - 'uuid': 'test-pipeline-uuid', - 'name': 'Test Pipeline', - 'config': {}, - }) + ) app.pipeline_service.create_pipeline = AsyncMock(return_value={'uuid': 'new-pipeline-uuid'}) app.pipeline_service.update_pipeline = AsyncMock(return_value={}) app.pipeline_service.delete_pipeline = AsyncMock() @@ -112,6 +123,10 @@ def fake_pipeline_app(): app.mcp_service = Mock() app.mcp_service.get_mcp_servers = AsyncMock(return_value=[]) + # Skill service (for extensions endpoint) + app.skill_service = Mock() + app.skill_service.list_skills = AsyncMock(return_value=[]) + # Plugin connector (for extensions endpoint) app.plugin_connector.list_plugins = AsyncMock(return_value=[]) @@ -130,6 +145,7 @@ async def quart_test_client(fake_pipeline_app, http_controller_cls): # ============== PIPELINE ENDPOINT TESTS ============== + @pytest.mark.usefixtures('mock_circular_import_chain') class TestPipelineMetadataEndpoint: """Tests for /api/v1/pipelines/_/metadata endpoint.""" @@ -138,8 +154,7 @@ class TestPipelineMetadataEndpoint: async def test_get_pipeline_metadata_success(self, quart_test_client): """GET /api/v1/pipelines/_/metadata returns metadata list.""" response = await quart_test_client.get( - '/api/v1/pipelines/_/metadata', - headers={'Authorization': 'Bearer test_token'} + '/api/v1/pipelines/_/metadata', headers={'Authorization': 'Bearer test_token'} ) assert response.status_code == 200 @@ -162,10 +177,7 @@ class TestPipelinesListEndpoint: @pytest.mark.asyncio async def test_get_pipelines_success(self, quart_test_client): """GET /api/v1/pipelines returns pipeline list.""" - response = await quart_test_client.get( - '/api/v1/pipelines', - headers={'Authorization': 'Bearer test_token'} - ) + response = await quart_test_client.get('/api/v1/pipelines', headers={'Authorization': 'Bearer test_token'}) assert response.status_code == 200 data = await response.get_json() @@ -176,8 +188,7 @@ async def test_get_pipelines_success(self, quart_test_client): async def test_get_pipelines_with_sort_param(self, quart_test_client): """GET pipelines with sort parameter.""" response = await quart_test_client.get( - '/api/v1/pipelines?sort_by=created_at&sort_order=DESC', - headers={'Authorization': 'Bearer test_token'} + '/api/v1/pipelines?sort_by=created_at&sort_order=DESC', headers={'Authorization': 'Bearer test_token'} ) assert response.status_code == 200 @@ -193,8 +204,7 @@ class TestPipelinesCRUDEndpoints: async def test_get_single_pipeline_success(self, quart_test_client): """GET /api/v1/pipelines/{uuid} returns pipeline.""" response = await quart_test_client.get( - '/api/v1/pipelines/test-pipeline-uuid', - headers={'Authorization': 'Bearer test_token'} + '/api/v1/pipelines/test-pipeline-uuid', headers={'Authorization': 'Bearer test_token'} ) assert response.status_code == 200 @@ -208,7 +218,7 @@ async def test_create_pipeline_success(self, quart_test_client): response = await quart_test_client.post( '/api/v1/pipelines', headers={'Authorization': 'Bearer test_token'}, - json={'name': 'New Pipeline', 'config': {}} + json={'name': 'New Pipeline', 'config': {}}, ) assert response.status_code == 200 @@ -222,7 +232,7 @@ async def test_update_pipeline_success(self, quart_test_client): response = await quart_test_client.put( '/api/v1/pipelines/test-pipeline-uuid', headers={'Authorization': 'Bearer test_token'}, - json={'name': 'Updated Pipeline'} + json={'name': 'Updated Pipeline'}, ) assert response.status_code == 200 @@ -233,8 +243,7 @@ async def test_update_pipeline_success(self, quart_test_client): async def test_delete_pipeline_success(self, quart_test_client): """DELETE /api/v1/pipelines/{uuid} deletes pipeline.""" response = await quart_test_client.delete( - '/api/v1/pipelines/test-pipeline-uuid', - headers={'Authorization': 'Bearer test_token'} + '/api/v1/pipelines/test-pipeline-uuid', headers={'Authorization': 'Bearer test_token'} ) assert response.status_code == 200 @@ -245,8 +254,7 @@ async def test_delete_pipeline_success(self, quart_test_client): async def test_copy_pipeline_success(self, quart_test_client): """POST /api/v1/pipelines/{uuid}/copy copies pipeline.""" response = await quart_test_client.post( - '/api/v1/pipelines/test-pipeline-uuid/copy', - headers={'Authorization': 'Bearer test_token'} + '/api/v1/pipelines/test-pipeline-uuid/copy', headers={'Authorization': 'Bearer test_token'} ) assert response.status_code == 200 @@ -263,8 +271,7 @@ class TestPipelineExtensionsEndpoint: async def test_get_extensions(self, quart_test_client): """GET /api/v1/pipelines/{uuid}/extensions.""" response = await quart_test_client.get( - '/api/v1/pipelines/test-pipeline-uuid/extensions', - headers={'Authorization': 'Bearer test_token'} + '/api/v1/pipelines/test-pipeline-uuid/extensions', headers={'Authorization': 'Bearer test_token'} ) # Should return 200 if pipeline found diff --git a/tests/integration_tests/__init__.py b/tests/integration_tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/integration_tests/box/__init__.py b/tests/integration_tests/box/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/integration_tests/box/test_box_integration.py b/tests/integration_tests/box/test_box_integration.py new file mode 100644 index 000000000..c20a1d87f --- /dev/null +++ b/tests/integration_tests/box/test_box_integration.py @@ -0,0 +1,329 @@ +"""Integration tests for LangBot Box. + +These tests verify the end-to-end behavior of the Box sandbox execution +system. Tests decorated with ``requires_container`` need a real container +runtime (Podman or Docker) and are skipped otherwise. + +CI only runs ``tests/unit_tests/``, so these tests never execute in the +CI pipeline. Run them locally with:: + + pytest tests/integration_tests/ -v +""" + +from __future__ import annotations + +import asyncio +import logging +import shutil +import socket +import subprocess +from types import SimpleNamespace + +import pytest + +from langbot.pkg.box.service import BoxService +from langbot_plugin.box.backend import BaseSandboxBackend +from langbot_plugin.box.client import ActionRPCBoxClient +from langbot_plugin.box.errors import BoxBackendUnavailableError +from langbot_plugin.box.models import BoxExecutionStatus, BoxNetworkMode, BoxSpec +from langbot_plugin.box.runtime import BoxRuntime +from langbot_plugin.box.server import BoxServerHandler + +import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query + +_logger = logging.getLogger('test.box.integration') + +# Default image for integration tests — small and fast to pull. +_TEST_IMAGE = 'alpine:latest' + + +# ── Skip helpers ────────────────────────────────────────────────────── + + +def _has_container_runtime() -> bool: + for cmd in ('podman', 'docker'): + if shutil.which(cmd) is None: + continue + try: + result = subprocess.run( + [cmd, 'info'], + capture_output=True, + timeout=10, + ) + if result.returncode == 0: + return True + except Exception: + continue + return False + + +def _can_open_test_socket() -> bool: + try: + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + except OSError: + return False + sock.close() + return True + + +requires_container = pytest.mark.skipif( + not _has_container_runtime(), + reason='no container runtime (podman/docker) available', +) + +requires_socket = pytest.mark.skipif( + not _can_open_test_socket(), + reason='local test environment does not permit opening TCP sockets', +) + + +# ── Helpers ────────────────────────────────────────────────────────── + + +class _QueueConnection: + """In-process Connection backed by asyncio Queues — no real IO.""" + + def __init__(self, rx: asyncio.Queue[str], tx: asyncio.Queue[str]): + self._rx = rx + self._tx = tx + + async def send(self, message: str) -> None: + await self._tx.put(message) + + async def receive(self) -> str: + return await self._rx.get() + + async def close(self) -> None: + pass + + +async def _make_rpc_pair(runtime: BoxRuntime): + """Create an in-process (ActionRPCBoxClient, server_task, client_task) connected via queues.""" + from langbot_plugin.runtime.io.handler import Handler + + c2s: asyncio.Queue[str] = asyncio.Queue() + s2c: asyncio.Queue[str] = asyncio.Queue() + client_conn = _QueueConnection(rx=s2c, tx=c2s) + server_conn = _QueueConnection(rx=c2s, tx=s2c) + + server_handler = BoxServerHandler(server_conn, runtime) + server_task = asyncio.create_task(server_handler.run()) + + client_handler = Handler.__new__(Handler) + Handler.__init__(client_handler, client_conn) + client_task = asyncio.create_task(client_handler.run()) + + client = ActionRPCBoxClient(logger=_logger) + client.set_handler(client_handler) + + return client, server_task, client_task + + +# ── Fixtures ────────────────────────────────────────────────────────── + + +@pytest.fixture +async def box_client(): + """Yield an ActionRPCBoxClient backed by a real BoxRuntime via in-process RPC.""" + runtime = BoxRuntime(logger=_logger) + await runtime.initialize() + client, server_task, client_task = await _make_rpc_pair(runtime) + yield client + server_task.cancel() + client_task.cancel() + await runtime.shutdown() + + +# ── 1. Simple command execution ─────────────────────────────────────── + + +@requires_container +@requires_socket +@pytest.mark.asyncio +async def test_exec_simple_command(box_client: ActionRPCBoxClient): + """Box starts a simple command and returns stdout.""" + spec = BoxSpec( + cmd='echo hello-box', + session_id='int-simple', + workdir='/tmp', + image=_TEST_IMAGE, + ) + result = await box_client.execute(spec) + + assert result.status == BoxExecutionStatus.COMPLETED + assert result.exit_code == 0 + assert 'hello-box' in result.stdout + + +# ── 2. Session file persistence ─────────────────────────────────────── + + +@requires_container +@requires_socket +@pytest.mark.asyncio +async def test_session_persists_files(box_client: ActionRPCBoxClient): + """Write a file in one exec, read it back in a second exec on the same session.""" + sid = 'int-persist' + + write_result = await box_client.execute( + BoxSpec( + cmd='echo "hello from file" > /tmp/testfile.txt', + session_id=sid, + workdir='/tmp', + image=_TEST_IMAGE, + ) + ) + assert write_result.exit_code == 0 + + read_result = await box_client.execute( + BoxSpec( + cmd='cat /tmp/testfile.txt', + session_id=sid, + workdir='/tmp', + image=_TEST_IMAGE, + ) + ) + assert read_result.exit_code == 0 + assert 'hello from file' in read_result.stdout + + +# ── 3. Timeout handling ─────────────────────────────────────────────── + + +@requires_container +@requires_socket +@pytest.mark.asyncio +async def test_timeout_kills_command(box_client: ActionRPCBoxClient): + """A long-running command is killed after timeout_sec.""" + session_id = 'int-timeout' + spec = BoxSpec( + cmd='sleep 120', + session_id=session_id, + workdir='/tmp', + timeout_sec=3, + image=_TEST_IMAGE, + ) + result = await box_client.execute(spec) + + assert result.status == BoxExecutionStatus.TIMED_OUT + assert result.exit_code is None + + sessions = await box_client.get_sessions() + assert all(session['session_id'] != session_id for session in sessions) + + +# ── 4. Network isolation ───────────────────────────────────────────── + + +@requires_container +@requires_socket +@pytest.mark.asyncio +async def test_offline_cannot_reach_network(box_client: ActionRPCBoxClient): + """With network=OFF the sandbox cannot reach the internet.""" + spec = BoxSpec( + cmd='wget -q -O /dev/null --timeout=3 http://1.1.1.1 2>&1; exit $?', + session_id='int-offline', + workdir='/tmp', + network=BoxNetworkMode.OFF, + image=_TEST_IMAGE, + ) + result = await box_client.execute(spec) + + assert result.exit_code != 0 + + +# ── 5. Backend unavailable ─────────────────────────────────────────── + + +class _UnavailableBackend(BaseSandboxBackend): + """A backend that always reports itself as unavailable.""" + + name = 'unavailable' + + def __init__(self): + super().__init__(logging.getLogger('test')) + + async def is_available(self) -> bool: + return False + + async def start_session(self, spec): + raise NotImplementedError + + async def exec(self, session, spec): + raise NotImplementedError + + async def stop_session(self, session): + pass + + +@requires_socket +@pytest.mark.asyncio +async def test_backend_unavailable_returns_error(): + """When no backend is available the full RPC path returns BoxBackendUnavailableError.""" + runtime = BoxRuntime(logger=_logger, backends=[_UnavailableBackend()]) + await runtime.initialize() + client, server_task, client_task = await _make_rpc_pair(runtime) + try: + spec = BoxSpec( + cmd='echo hello', + session_id='int-no-backend', + workdir='/tmp', + ) + with pytest.raises(BoxBackendUnavailableError): + await client.execute(spec) + finally: + server_task.cancel() + client_task.cancel() + await runtime.shutdown() + + +# ── 6. Full service-to-runtime path ────────────────────────────────── + + +@requires_container +@requires_socket +@pytest.mark.asyncio +async def test_full_service_to_remote_runtime(tmp_path): + """BoxService -> ActionRPCBoxClient -> RPC -> BoxRuntime -> real backend.""" + runtime = BoxRuntime(logger=_logger) + await runtime.initialize() + client, server_task, client_task = await _make_rpc_pair(runtime) + try: + host_dir = tmp_path / 'workspace' + host_dir.mkdir() + + mock_ap = SimpleNamespace( + logger=_logger, + instance_config=SimpleNamespace( + data={ + 'box': { + 'backend': 'local', + 'runtime': {'endpoint': ''}, + 'local': { + 'profile': 'default', + 'allowed_mount_roots': [str(tmp_path)], + 'default_workspace': str(host_dir), + }, + 'e2b': {'api_key': '', 'api_url': '', 'template': ''}, + } + } + ), + ) + + service = BoxService(mock_ap, client=client) + await service.initialize() + + query = pipeline_query.Query.model_construct(query_id=42) + result = await service.execute_tool( + {'command': 'echo service-path'}, + query, + ) + + assert result['ok'] is True + assert result['status'] == 'completed' + assert 'service-path' in result['stdout'] + assert result['session_id'] == 'query_42' + finally: + server_task.cancel() + client_task.cancel() + await runtime.shutdown() diff --git a/tests/integration_tests/box/test_box_mcp_integration.py b/tests/integration_tests/box/test_box_mcp_integration.py new file mode 100644 index 000000000..2fcfcb934 --- /dev/null +++ b/tests/integration_tests/box/test_box_mcp_integration.py @@ -0,0 +1,368 @@ +"""Integration tests for Box MCP-related features. + +These tests verify managed process lifecycle, WebSocket stdio attach, +session cleanup, and the single-session query API using a real container +runtime. + +CI only runs ``tests/unit_tests/``, so these tests never execute in the +CI pipeline. Run them locally with:: + + pytest tests/integration_tests/box/test_box_mcp_integration.py -v +""" + +from __future__ import annotations + +import asyncio +import logging +import shutil +import socket +import subprocess + +import aiohttp +import pytest +from aiohttp.test_utils import TestServer + +from langbot_plugin.box.client import ActionRPCBoxClient +from langbot_plugin.box.errors import BoxManagedProcessNotFoundError, BoxSessionNotFoundError +from langbot_plugin.box.models import BoxManagedProcessSpec, BoxManagedProcessStatus, BoxSpec +from langbot_plugin.box.runtime import BoxRuntime +from langbot_plugin.box.server import BoxServerHandler, create_ws_relay_app + +_logger = logging.getLogger('test.box.mcp_integration') + +_TEST_IMAGE = 'alpine:latest' + + +# ── Skip helpers ────────────────────────────────────────────────────── + + +def _has_container_runtime() -> bool: + for cmd in ('podman', 'docker'): + if shutil.which(cmd) is None: + continue + try: + result = subprocess.run([cmd, 'info'], capture_output=True, timeout=10) + if result.returncode == 0: + return True + except Exception: + continue + return False + + +def _can_open_test_socket() -> bool: + try: + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + except OSError: + return False + sock.close() + return True + + +requires_container = pytest.mark.skipif( + not _has_container_runtime(), + reason='no container runtime (podman/docker) available', +) + +requires_socket = pytest.mark.skipif( + not _can_open_test_socket(), + reason='local test environment does not permit opening TCP sockets', +) + + +# ── Helpers ────────────────────────────────────────────────────────── + + +class _QueueConnection: + """In-process Connection backed by asyncio Queues — no real IO.""" + + def __init__(self, rx: asyncio.Queue[str], tx: asyncio.Queue[str]): + self._rx = rx + self._tx = tx + + async def send(self, message: str) -> None: + await self._tx.put(message) + + async def receive(self) -> str: + return await self._rx.get() + + async def close(self) -> None: + pass + + +async def _make_rpc_pair(runtime: BoxRuntime): + """Create an in-process RPC pair connected via queues.""" + from langbot_plugin.runtime.io.handler import Handler + + c2s: asyncio.Queue[str] = asyncio.Queue() + s2c: asyncio.Queue[str] = asyncio.Queue() + client_conn = _QueueConnection(rx=s2c, tx=c2s) + server_conn = _QueueConnection(rx=c2s, tx=s2c) + + server_handler = BoxServerHandler(server_conn, runtime) + server_task = asyncio.create_task(server_handler.run()) + + client_handler = Handler.__new__(Handler) + Handler.__init__(client_handler, client_conn) + client_task = asyncio.create_task(client_handler.run()) + + client = ActionRPCBoxClient(logger=_logger) + client.set_handler(client_handler) + + return client, server_task, client_task + + +# ── Fixtures ────────────────────────────────────────────────────────── + + +@pytest.fixture +async def box_server(): + """Yield a (ws_relay_url, ActionRPCBoxClient) backed by a real BoxRuntime.""" + runtime = BoxRuntime(logger=_logger) + await runtime.initialize() + + # Start ws relay for managed process attach + ws_app = create_ws_relay_app(runtime) + ws_server = TestServer(ws_app) + await ws_server.start_server() + + client, server_task, client_task = await _make_rpc_pair(runtime) + + ws_relay_url = str(ws_server.make_url('')) + yield ws_relay_url, client + + server_task.cancel() + client_task.cancel() + await runtime.shutdown() + await ws_server.close() + + +# ── 1. Managed process lifecycle ───────────────────────────────────── + + +@requires_container +@requires_socket +@pytest.mark.asyncio +async def test_managed_process_start_and_query(box_server): + """Start a managed process and query its status.""" + ws_relay_url, client = box_server + + # Create session + spec = BoxSpec( + cmd='', + session_id='mcp-int-lifecycle', + workdir='/tmp', + image=_TEST_IMAGE, + ) + await client.create_session(spec) + + # Start a managed process that stays alive + proc_spec = BoxManagedProcessSpec( + command='sh', + args=['-c', 'while true; do sleep 1; done'], + cwd='/tmp', + ) + info = await client.start_managed_process('mcp-int-lifecycle', proc_spec) + assert info.status == BoxManagedProcessStatus.RUNNING + + # Query it + info2 = await client.get_managed_process('mcp-int-lifecycle') + assert info2.status == BoxManagedProcessStatus.RUNNING + assert info2.command == 'sh' + + # Stop only the managed process while keeping the session available + await client.stop_managed_process('mcp-int-lifecycle') + with pytest.raises(BoxManagedProcessNotFoundError): + await client.get_managed_process('mcp-int-lifecycle') + session_info = await client.get_session('mcp-int-lifecycle') + assert session_info['session_id'] == 'mcp-int-lifecycle' + + # Cleanup + await client.delete_session('mcp-int-lifecycle') + + +# ── 2. WebSocket stdio attach ──────────────────────────────────────── + + +@requires_container +@requires_socket +@pytest.mark.asyncio +async def test_ws_stdio_attach_echo(box_server): + """Attach to a managed process via WebSocket and verify bidirectional IO.""" + ws_relay_url, client = box_server + + spec = BoxSpec( + cmd='', + session_id='mcp-int-ws', + workdir='/tmp', + image=_TEST_IMAGE, + ) + await client.create_session(spec) + + # Start a cat process (echoes stdin to stdout) + proc_spec = BoxManagedProcessSpec( + command='cat', + args=[], + cwd='/tmp', + ) + await client.start_managed_process('mcp-int-ws', proc_spec) + + # Connect via WebSocket (ws relay) + ws_url = client.get_managed_process_websocket_url('mcp-int-ws', ws_relay_url) + session = aiohttp.ClientSession() + try: + async with session.ws_connect(ws_url) as ws: + # Send a line + await ws.send_str('hello from test') + + # Expect to receive it back (cat echoes) + msg = await asyncio.wait_for(ws.receive(), timeout=5) + assert msg.type == aiohttp.WSMsgType.TEXT + assert 'hello from test' in msg.data + finally: + await session.close() + + await client.delete_session('mcp-int-ws') + + +# ── 3. Session cleanup removes container ───────────────────────────── + + +@requires_container +@requires_socket +@pytest.mark.asyncio +async def test_delete_session_cleans_up(box_server): + """After deleting a session, it should no longer exist.""" + ws_relay_url, client = box_server + + spec = BoxSpec( + cmd='', + session_id='mcp-int-cleanup', + workdir='/tmp', + image=_TEST_IMAGE, + ) + await client.create_session(spec) + + # Start a process + proc_spec = BoxManagedProcessSpec( + command='sleep', + args=['3600'], + cwd='/tmp', + ) + await client.start_managed_process('mcp-int-cleanup', proc_spec) + + # Delete + await client.delete_session('mcp-int-cleanup') + + # Session should be gone + with pytest.raises(BoxSessionNotFoundError): + await client.get_session('mcp-int-cleanup') + + +# ── 4. GET session details ──────────────────────────────────────── + + +@requires_container +@requires_socket +@pytest.mark.asyncio +async def test_get_session_returns_details(box_server): + """Get single session returns session details and managed process info.""" + ws_relay_url, client = box_server + + spec = BoxSpec( + cmd='', + session_id='mcp-int-get', + workdir='/tmp', + image=_TEST_IMAGE, + ) + await client.create_session(spec) + + # Query without managed process + info = await client.get_session('mcp-int-get') + assert info['session_id'] == 'mcp-int-get' + assert info['image'] == _TEST_IMAGE + assert 'managed_process' not in info + + # Start a process and query again + proc_spec = BoxManagedProcessSpec( + command='sleep', + args=['3600'], + cwd='/tmp', + ) + await client.start_managed_process('mcp-int-get', proc_spec) + + info2 = await client.get_session('mcp-int-get') + assert info2['session_id'] == 'mcp-int-get' + assert 'managed_process' in info2 + assert info2['managed_process']['status'] == BoxManagedProcessStatus.RUNNING.value + + await client.delete_session('mcp-int-get') + + +# ── 5. Process exit detected ──────────────────────────────────────── + + +@requires_container +@requires_socket +@pytest.mark.asyncio +async def test_process_exit_detected(box_server): + """When a managed process exits, its status should reflect EXITED.""" + ws_relay_url, client = box_server + + spec = BoxSpec( + cmd='', + session_id='mcp-int-exit', + workdir='/tmp', + image=_TEST_IMAGE, + ) + await client.create_session(spec) + + # Start a process that exits immediately + proc_spec = BoxManagedProcessSpec( + command='sh', + args=['-c', 'echo done && exit 0'], + cwd='/tmp', + ) + await client.start_managed_process('mcp-int-exit', proc_spec) + + # Wait a bit for process to exit + await asyncio.sleep(2) + + info = await client.get_managed_process('mcp-int-exit') + assert info.status == BoxManagedProcessStatus.EXITED + assert info.exit_code == 0 + + await client.delete_session('mcp-int-exit') + + +# ── 6. Instance ID orphan cleanup ─────────────────────────────────── + + +@requires_container +@requires_socket +@pytest.mark.asyncio +async def test_orphan_cleanup_preserves_own_containers(box_server): + """Orphan cleanup should not remove containers belonging to the current instance.""" + ws_relay_url, client = box_server + + # Create a session (container gets current instance ID label) + spec = BoxSpec( + cmd='', + session_id='mcp-int-orphan', + workdir='/tmp', + image=_TEST_IMAGE, + ) + await client.create_session(spec) + + # Verify session exists + sessions = await client.get_sessions() + assert any(s['session_id'] == 'mcp-int-orphan' for s in sessions) + + # Trigger status check (which doesn't clean up own containers) + status = await client.get_status() + assert status['active_sessions'] >= 1 + + # Our session should still exist + sessions = await client.get_sessions() + assert any(s['session_id'] == 'mcp-int-orphan' for s in sessions) + + await client.delete_session('mcp-int-orphan') diff --git a/tests/unit_tests/box/test_box_connector.py b/tests/unit_tests/box/test_box_connector.py new file mode 100644 index 000000000..ddd4899b0 --- /dev/null +++ b/tests/unit_tests/box/test_box_connector.py @@ -0,0 +1,106 @@ +from __future__ import annotations + +from types import SimpleNamespace +from unittest.mock import Mock + +import pytest + +from langbot_plugin.box.client import ActionRPCBoxClient +from langbot.pkg.box.connector import BoxRuntimeConnector + + +def make_app(logger: Mock, runtime_endpoint: str = ''): + return SimpleNamespace( + logger=logger, + instance_config=SimpleNamespace( + data={ + 'box': { + 'backend': 'local', + 'runtime': {'endpoint': runtime_endpoint}, + 'local': { + 'profile': 'default', + 'allowed_mount_roots': [], + 'default_workspace': '', + }, + 'e2b': {'api_key': '', 'api_url': '', 'template': ''}, + } + } + ), + ) + + +def test_box_runtime_connector_stdio_when_no_url(monkeypatch: pytest.MonkeyPatch): + """Without runtime.endpoint, on a non-Docker Unix platform, use stdio.""" + monkeypatch.setattr('langbot.pkg.utils.platform.get_platform', lambda: 'linux') + monkeypatch.setattr('langbot.pkg.utils.platform.standalone_box', False) + connector = BoxRuntimeConnector(make_app(Mock())) + + assert connector._uses_websocket() is False + assert isinstance(connector.client, ActionRPCBoxClient) + + +def test_box_runtime_connector_ws_when_url_configured(monkeypatch: pytest.MonkeyPatch): + """With an explicit runtime.endpoint, always use WebSocket.""" + monkeypatch.setattr('langbot.pkg.utils.platform.get_platform', lambda: 'linux') + monkeypatch.setattr('langbot.pkg.utils.platform.standalone_box', False) + logger = Mock() + connector = BoxRuntimeConnector(make_app(logger, runtime_endpoint='http://box-runtime:5410')) + + assert connector._uses_websocket() is True + assert isinstance(connector.client, ActionRPCBoxClient) + + +def test_box_runtime_connector_ws_in_docker(monkeypatch: pytest.MonkeyPatch): + """Inside Docker (no explicit URL), use WebSocket to reach a sibling container.""" + monkeypatch.setattr('langbot.pkg.utils.platform.get_platform', lambda: 'docker') + monkeypatch.setattr('langbot.pkg.utils.platform.standalone_box', False) + connector = BoxRuntimeConnector(make_app(Mock())) + + assert connector._uses_websocket() is True + assert connector.ws_relay_base_url == 'http://langbot_box:5410' + + +def test_box_runtime_connector_ws_with_standalone_flag(monkeypatch: pytest.MonkeyPatch): + """With --standalone-box flag, use WebSocket even on a local Unix platform.""" + monkeypatch.setattr('langbot.pkg.utils.platform.get_platform', lambda: 'linux') + monkeypatch.setattr('langbot.pkg.utils.platform.standalone_box', True) + connector = BoxRuntimeConnector(make_app(Mock())) + + assert connector._uses_websocket() is True + + +def test_box_runtime_connector_ws_relay_url_default(monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr('langbot.pkg.utils.platform.get_platform', lambda: 'linux') + monkeypatch.setattr('langbot.pkg.utils.platform.standalone_box', False) + connector = BoxRuntimeConnector(make_app(Mock())) + + assert connector.ws_relay_base_url == 'http://127.0.0.1:5410' + + +def test_box_runtime_connector_ws_relay_url_explicit(monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr('langbot.pkg.utils.platform.get_platform', lambda: 'linux') + monkeypatch.setattr('langbot.pkg.utils.platform.standalone_box', False) + connector = BoxRuntimeConnector(make_app(Mock(), runtime_endpoint='http://box-runtime:5410')) + assert connector.ws_relay_base_url == 'http://box-runtime:5410' + + +def test_box_runtime_connector_dispose_terminates_subprocess(monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr('langbot.pkg.utils.platform.get_platform', lambda: 'linux') + monkeypatch.setattr('langbot.pkg.utils.platform.standalone_box', False) + logger = Mock() + connector = BoxRuntimeConnector(make_app(logger)) + subprocess = Mock() + subprocess.returncode = None + handler_task = Mock() + ctrl_task = Mock() + connector._subprocess = subprocess + connector._handler_task = handler_task + connector._ctrl_task = ctrl_task + + connector.dispose() + + subprocess.terminate.assert_called_once() + handler_task.cancel.assert_called_once() + ctrl_task.cancel.assert_called_once() + assert connector._handler_task is None + assert connector._ctrl_task is None diff --git a/tests/unit_tests/box/test_box_service.py b/tests/unit_tests/box/test_box_service.py new file mode 100644 index 000000000..44f42ec17 --- /dev/null +++ b/tests/unit_tests/box/test_box_service.py @@ -0,0 +1,1392 @@ +from __future__ import annotations + +import asyncio +import datetime as dt +import os +import tempfile +from types import SimpleNamespace +from unittest.mock import AsyncMock, Mock + +import pytest + +import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query + +from langbot_plugin.box.backend import BaseSandboxBackend +from langbot_plugin.box.client import BoxRuntimeClient, ActionRPCBoxClient +from langbot_plugin.box.errors import ( + BoxBackendUnavailableError, + BoxSessionConflictError, + BoxSessionNotFoundError, + BoxValidationError, +) +from langbot_plugin.box.models import ( + BUILTIN_PROFILES, + BoxExecutionResult, + BoxExecutionStatus, + BoxHostMountMode, + BoxManagedProcessSpec, + BoxNetworkMode, + BoxSessionInfo, + BoxSpec, +) +from langbot_plugin.box.runtime import BoxRuntime +from langbot.pkg.box.service import BoxService + +_UTC = dt.timezone.utc + + +class _InProcessBoxRuntimeClient(BoxRuntimeClient): + """Test-only client that wraps a BoxRuntime in-process (no HTTP).""" + + def __init__(self, logger, runtime=None): + self._runtime = runtime or BoxRuntime(logger=logger) + + async def initialize(self): + await self._runtime.initialize() + + async def execute(self, spec): + return await self._runtime.execute(spec) + + async def shutdown(self): + await self._runtime.shutdown() + + async def get_status(self): + return await self._runtime.get_status() + + async def get_sessions(self): + return self._runtime.get_sessions() + + async def get_backend_info(self): + return await self._runtime.get_backend_info() + + async def delete_session(self, session_id): + await self._runtime.delete_session(session_id) + + async def create_session(self, spec): + return await self._runtime.create_session(spec) + + async def start_managed_process(self, session_id: str, spec: BoxManagedProcessSpec): + return await self._runtime.start_managed_process(session_id, spec) + + async def get_managed_process(self, session_id: str, process_id: str = 'default'): + return self._runtime.get_managed_process(session_id, process_id) + + async def stop_managed_process(self, session_id: str, process_id: str = 'default'): + await self._runtime.stop_managed_process(session_id, process_id) + + async def get_session(self, session_id: str): + return self._runtime.get_session(session_id) + + async def init(self, config: dict) -> None: + self._runtime.init(config) + + +class FakeBackend(BaseSandboxBackend): + def __init__(self, logger: Mock, available: bool = True): + super().__init__(logger) + self.name = 'fake' + self.available = available + self.start_calls: list[str] = [] + self.start_specs: list[BoxSpec] = [] + self.exec_calls: list[tuple[str, str]] = [] + self.stop_calls: list[str] = [] + + async def is_available(self) -> bool: + return self.available + + async def start_session(self, spec: BoxSpec) -> BoxSessionInfo: + self.start_calls.append(spec.session_id) + self.start_specs.append(spec) + now = dt.datetime.now(_UTC) + return BoxSessionInfo( + session_id=spec.session_id, + backend_name=self.name, + backend_session_id=f'backend-{spec.session_id}', + image=spec.image, + network=spec.network, + host_path=spec.host_path, + host_path_mode=spec.host_path_mode, + mount_path=spec.mount_path, + cpus=spec.cpus, + memory_mb=spec.memory_mb, + pids_limit=spec.pids_limit, + read_only_rootfs=spec.read_only_rootfs, + created_at=now, + last_used_at=now, + ) + + async def exec(self, session: BoxSessionInfo, spec: BoxSpec) -> BoxExecutionResult: + self.exec_calls.append((session.session_id, spec.cmd)) + return BoxExecutionResult( + session_id=session.session_id, + backend_name=self.name, + status=BoxExecutionStatus.COMPLETED, + exit_code=0, + stdout=f'executed: {spec.cmd}', + stderr='', + duration_ms=12, + ) + + async def stop_session(self, session: BoxSessionInfo): + self.stop_calls.append(session.session_id) + + +def make_query(query_id: int = 42) -> pipeline_query.Query: + return pipeline_query.Query.model_construct( + query_id=query_id, + launcher_type='person', + launcher_id='test_user', + sender_id='test_user', + variables={ + 'launcher_type': 'person', + 'launcher_id': 'test_user', + 'sender_id': 'test_user', + 'query_id': str(query_id), + }, + ) + + +def make_app( + logger: Mock, + allowed_mount_roots: list[str] | None = None, + profile: str = 'default', + host_root: str = '', + workspace_quota_mb: int | None = None, + enabled: bool = True, +): + box_config = { + 'enabled': enabled, + 'backend': 'local', + 'runtime': {'endpoint': ''}, + 'local': { + 'profile': profile, + 'host_root': host_root, + 'allowed_mount_roots': allowed_mount_roots or [], + 'default_workspace': '', + }, + 'e2b': {'api_key': '', 'api_url': '', 'template': ''}, + } + if workspace_quota_mb is not None: + box_config['local']['workspace_quota_mb'] = workspace_quota_mb + + return SimpleNamespace( + logger=logger, + instance_config=SimpleNamespace(data={'box': box_config}), + ) + + +@pytest.mark.asyncio +async def test_box_service_without_explicit_client_initializes_internal_connector(monkeypatch: pytest.MonkeyPatch): + connector = Mock() + connector.client = Mock() + connector.initialize = AsyncMock() + + monkeypatch.setattr('langbot.pkg.box.service.BoxRuntimeConnector', Mock(return_value=connector)) + + service = BoxService(make_app(Mock())) + await service.initialize() + + assert service.client is connector.client + connector.initialize.assert_awaited_once() + + +@pytest.mark.asyncio +async def test_box_service_get_sessions_delegates_to_client(): + client = Mock() + client.get_sessions = AsyncMock(return_value=[{'session_id': 'test-session'}]) + + service = BoxService(make_app(Mock()), client=client) + service._available = True + + sessions = await service.get_sessions() + + assert sessions == [{'session_id': 'test-session'}] + client.get_sessions.assert_awaited_once() + + +def test_box_service_dispose_delegates_to_internal_connector(monkeypatch: pytest.MonkeyPatch): + connector = Mock() + connector.client = Mock() + + monkeypatch.setattr('langbot.pkg.box.service.BoxRuntimeConnector', Mock(return_value=connector)) + + service = BoxService(make_app(Mock())) + service.dispose() + + connector.dispose.assert_called_once() + + +@pytest.mark.asyncio +async def test_box_service_dispose_schedules_shutdown_on_event_loop(monkeypatch: pytest.MonkeyPatch): + connector = Mock() + connector.client = Mock() + connector.dispose = Mock() + + monkeypatch.setattr('langbot.pkg.box.service.BoxRuntimeConnector', Mock(return_value=connector)) + + app = make_app(Mock()) + loop = asyncio.get_running_loop() + app.event_loop = loop + + service = BoxService(app) + service.shutdown = AsyncMock() + + service.dispose() + await asyncio.sleep(0) + + connector.dispose.assert_called_once() + service.shutdown.assert_awaited_once() + + +@pytest.mark.asyncio +async def test_box_runtime_reuses_request_session(): + logger = Mock() + backend = FakeBackend(logger) + runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300) + await runtime.initialize() + + first = BoxSpec.model_validate({'cmd': 'echo first', 'session_id': 'req-1'}) + second = BoxSpec.model_validate({'cmd': 'echo second', 'session_id': 'req-1'}) + + await runtime.execute(first) + await runtime.execute(second) + + assert backend.start_calls == ['req-1'] + assert backend.exec_calls == [('req-1', 'echo first'), ('req-1', 'echo second')] + + +@pytest.mark.asyncio +async def test_box_service_defaults_session_id_from_query(): + logger = Mock() + backend = FakeBackend(logger) + runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300) + service = BoxService(make_app(logger), client=_InProcessBoxRuntimeClient(logger, runtime)) + await service.initialize() + + result = await service.execute_tool({'command': 'pwd'}, make_query(7)) + + assert result['session_id'] == 'person_test_user' + assert result['ok'] is True + assert backend.start_calls == ['person_test_user'] + + +@pytest.mark.asyncio +async def test_box_service_session_id_uses_query_attributes_without_variables(): + logger = Mock() + backend = FakeBackend(logger) + runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300) + service = BoxService(make_app(logger), client=_InProcessBoxRuntimeClient(logger, runtime)) + await service.initialize() + + query = pipeline_query.Query.model_construct(query_id=7, launcher_type='group', launcher_id='room-1') + result = await service.execute_tool({'command': 'pwd'}, query) + + assert result['session_id'] == 'group_room-1' + assert result['ok'] is True + assert backend.start_calls == ['group_room-1'] + + +@pytest.mark.asyncio +async def test_box_service_session_id_falls_back_to_query_id_for_synthetic_queries(): + logger = Mock() + backend = FakeBackend(logger) + runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300) + service = BoxService(make_app(logger), client=_InProcessBoxRuntimeClient(logger, runtime)) + await service.initialize() + + query = pipeline_query.Query.model_construct(query_id=7) + result = await service.execute_tool({'command': 'pwd'}, query) + + assert result['session_id'] == 'query_7' + assert result['ok'] is True + assert backend.start_calls == ['query_7'] + + +@pytest.mark.asyncio +async def test_box_service_fails_closed_when_backend_unavailable(): + logger = Mock() + backend = FakeBackend(logger, available=False) + runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300) + service = BoxService(make_app(logger), client=_InProcessBoxRuntimeClient(logger, runtime)) + await service.initialize() + + with pytest.raises(BoxBackendUnavailableError): + await service.execute_tool({'command': 'echo hello'}, make_query(9)) + + +@pytest.mark.asyncio +async def test_box_service_allows_host_mount_under_configured_root(tmp_path): + logger = Mock() + backend = FakeBackend(logger) + runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300) + host_dir = tmp_path / 'mounted-workspace' + host_dir.mkdir() + service = BoxService(make_app(logger, [str(tmp_path)]), client=_InProcessBoxRuntimeClient(logger, runtime)) + await service.initialize() + + result = await service.execute_spec_payload( + { + 'cmd': 'pwd', + 'host_path': str(host_dir), + 'host_path_mode': BoxHostMountMode.READ_WRITE.value, + 'session_id': '11', + }, + make_query(11), + ) + + assert result['ok'] is True + assert backend.start_calls == ['11'] + + +@pytest.mark.asyncio +async def test_box_service_uses_default_workspace_when_host_path_omitted(tmp_path): + logger = Mock() + backend = FakeBackend(logger) + runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300) + host_dir = tmp_path / 'default-workspace' + host_dir.mkdir() + app = make_app(logger, [str(tmp_path)]) + app.instance_config.data['box']['local']['default_workspace'] = str(host_dir) + service = BoxService(app, client=_InProcessBoxRuntimeClient(logger, runtime)) + await service.initialize() + + result = await service.execute_tool({'command': 'pwd'}, make_query(15)) + + assert result['ok'] is True + assert backend.start_calls == ['person_test_user'] + assert backend.exec_calls == [('person_test_user', 'pwd')] + assert backend.start_specs[0].host_path == os.path.realpath(host_dir) + + +@pytest.mark.asyncio +async def test_box_service_creates_default_workspace_on_initialize(tmp_path): + logger = Mock() + backend = FakeBackend(logger) + runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300) + allowed_root = tmp_path / 'allowed-root' + allowed_root.mkdir() + default_workspace = allowed_root / 'default-workspace' + app = make_app(logger, [str(allowed_root)]) + app.instance_config.data['box']['local']['default_workspace'] = str(default_workspace) + service = BoxService(app, client=_InProcessBoxRuntimeClient(logger, runtime)) + + await service.initialize() + + assert default_workspace.is_dir() + + +@pytest.mark.asyncio +async def test_box_service_derives_workspace_and_allowed_root_from_host_root(tmp_path): + logger = Mock() + backend = FakeBackend(logger) + runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300) + shared_root = tmp_path / 'shared-box-root' + app = make_app(logger, host_root=str(shared_root)) + service = BoxService(app, client=_InProcessBoxRuntimeClient(logger, runtime)) + + await service.initialize() + + assert service.host_root == os.path.realpath(shared_root) + assert service.default_workspace == os.path.realpath(shared_root / 'default') + assert service.allowed_mount_roots == [os.path.realpath(shared_root)] + assert (shared_root / 'default').is_dir() + + +@pytest.mark.asyncio +async def test_box_service_rejects_host_mount_outside_allowed_roots(tmp_path): + logger = Mock() + backend = FakeBackend(logger) + runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300) + allowed_root = tmp_path / 'allowed' + disallowed_root = tmp_path / 'disallowed' + allowed_root.mkdir() + disallowed_root.mkdir() + service = BoxService(make_app(logger, [str(allowed_root)]), client=_InProcessBoxRuntimeClient(logger, runtime)) + await service.initialize() + + with pytest.raises(BoxValidationError): + await service.execute_spec_payload( + { + 'cmd': 'pwd', + 'host_path': str(disallowed_root), + 'session_id': '12', + }, + make_query(12), + ) + + +@pytest.mark.asyncio +async def test_box_runtime_rejects_host_mount_conflict_in_same_session(tmp_path): + logger = Mock() + backend = FakeBackend(logger) + runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300) + await runtime.initialize() + + first_host_dir = tmp_path / 'first' + second_host_dir = tmp_path / 'second' + first_host_dir.mkdir() + second_host_dir.mkdir() + + first = BoxSpec.model_validate( + { + 'cmd': 'echo first', + 'session_id': 'req-mount', + 'host_path': os.path.realpath(first_host_dir), + } + ) + second = BoxSpec.model_validate( + { + 'cmd': 'echo second', + 'session_id': 'req-mount', + 'host_path': os.path.realpath(second_host_dir), + } + ) + + await runtime.execute(first) + + with pytest.raises(BoxSessionConflictError): + await runtime.execute(second) + + +@pytest.mark.asyncio +async def test_box_runtime_rejects_resource_limit_conflict_in_same_session(): + logger = Mock() + backend = FakeBackend(logger) + runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300) + await runtime.initialize() + + first = BoxSpec.model_validate({'cmd': 'echo first', 'session_id': 'req-resource', 'cpus': 1.0}) + second = BoxSpec.model_validate({'cmd': 'echo second', 'session_id': 'req-resource', 'cpus': 2.0}) + + await runtime.execute(first) + + with pytest.raises(BoxSessionConflictError): + await runtime.execute(second) + + +# ── Truncation tests ────────────────────────────────────────────────── + + +class FakeBackendWithOutput(FakeBackend): + """FakeBackend that returns configurable stdout/stderr.""" + + def __init__(self, logger: Mock, stdout: str = '', stderr: str = ''): + super().__init__(logger) + self._stdout = stdout + self._stderr = stderr + + async def exec(self, session: BoxSessionInfo, spec: BoxSpec) -> BoxExecutionResult: + self.exec_calls.append((session.session_id, spec.cmd)) + return BoxExecutionResult( + session_id=session.session_id, + backend_name=self.name, + status=BoxExecutionStatus.COMPLETED, + exit_code=0, + stdout=self._stdout, + stderr=self._stderr, + duration_ms=5, + ) + + +class FakeBackendWritingFiles(FakeBackend): + """Fake backend that writes files into the mounted host workspace during exec.""" + + def __init__(self, logger: Mock, files_to_write: list[tuple[str, int]]): + super().__init__(logger) + self._files_to_write = files_to_write + + async def exec(self, session: BoxSessionInfo, spec: BoxSpec) -> BoxExecutionResult: + self.exec_calls.append((session.session_id, spec.cmd)) + if session.host_path: + for relative_path, size in self._files_to_write: + host_path = os.path.join(session.host_path, relative_path) + os.makedirs(os.path.dirname(host_path), exist_ok=True) + with open(host_path, 'wb') as f: + f.write(b'x' * size) + return BoxExecutionResult( + session_id=session.session_id, + backend_name=self.name, + status=BoxExecutionStatus.COMPLETED, + exit_code=0, + stdout='wrote files', + stderr='', + duration_ms=5, + ) + + +@pytest.mark.asyncio +async def test_truncate_short_output_unchanged(): + logger = Mock() + backend = FakeBackendWithOutput(logger, stdout='hello world') + runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300) + service = BoxService(make_app(logger), client=_InProcessBoxRuntimeClient(logger, runtime), output_limit_chars=100) + await service.initialize() + + result = await service.execute_tool({'command': 'echo hello'}, make_query(20)) + + assert result['stdout'] == 'hello world' + assert result['stdout_truncated'] is False + + +@pytest.mark.asyncio +async def test_truncate_preserves_head_and_tail(): + logger = Mock() + # Build output: "AAAA...BBB..." where each section is identifiable + head_marker = 'HEAD_START|' + tail_marker = '|TAIL_END' + filler = 'x' * 500 + big_output = f'{head_marker}{filler}{tail_marker}' + + backend = FakeBackendWithOutput(logger, stdout=big_output) + runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300) + limit = 100 + service = BoxService(make_app(logger), client=_InProcessBoxRuntimeClient(logger, runtime), output_limit_chars=limit) + await service.initialize() + + result = await service.execute_tool({'command': 'cat big'}, make_query(21)) + + assert result['stdout_truncated'] is True + stdout = result['stdout'] + # Head part should contain the head marker + assert stdout.startswith(head_marker) + # Tail part should contain the tail marker + assert stdout.endswith(tail_marker) + # Should contain the truncation notice + assert 'characters truncated' in stdout + assert len(stdout) <= limit + + +@pytest.mark.asyncio +async def test_truncate_at_exact_limit_not_truncated(): + logger = Mock() + exact_output = 'a' * 200 + backend = FakeBackendWithOutput(logger, stdout=exact_output) + runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300) + service = BoxService(make_app(logger), client=_InProcessBoxRuntimeClient(logger, runtime), output_limit_chars=200) + await service.initialize() + + result = await service.execute_tool({'command': 'echo a'}, make_query(22)) + + assert result['stdout'] == exact_output + assert result['stdout_truncated'] is False + + +@pytest.mark.asyncio +async def test_truncate_stderr_independently(): + logger = Mock() + backend = FakeBackendWithOutput(logger, stdout='short', stderr='E' * 300) + runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300) + service = BoxService(make_app(logger), client=_InProcessBoxRuntimeClient(logger, runtime), output_limit_chars=100) + await service.initialize() + + result = await service.execute_tool({'command': 'fail'}, make_query(23)) + + assert result['stdout_truncated'] is False + assert result['stderr_truncated'] is True + assert 'characters truncated' in result['stderr'] + assert len(result['stderr']) <= 100 + + +# ── Profile tests ───────────────────────────────────────────────────── + + +@pytest.mark.asyncio +async def test_profile_default_provides_defaults(): + """When tool call omits network/image, profile defaults are used.""" + logger = Mock() + backend = FakeBackend(logger) + runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300) + service = BoxService(make_app(logger), client=_InProcessBoxRuntimeClient(logger, runtime)) + await service.initialize() + + result = await service.execute_tool({'command': 'echo hi'}, make_query(30)) + + assert result['ok'] is True + spec = backend.start_specs[0] + profile = BUILTIN_PROFILES['default'] + assert spec.network == BoxNetworkMode.OFF + assert spec.image == profile.image + assert spec.timeout_sec == profile.timeout_sec + + +@pytest.mark.asyncio +async def test_profile_unlocked_field_can_be_overridden(): + """Spec payload can override unlocked profile fields.""" + logger = Mock() + backend = FakeBackend(logger) + runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300) + service = BoxService(make_app(logger), client=_InProcessBoxRuntimeClient(logger, runtime)) + await service.initialize() + + result = await service.execute_spec_payload( + {'cmd': 'echo hi', 'timeout_sec': 60, 'network': 'on', 'session_id': '31'}, + make_query(31), + ) + + assert result['ok'] is True + spec = backend.start_specs[0] + assert spec.timeout_sec == 60 + assert spec.network == BoxNetworkMode.ON + + +@pytest.mark.asyncio +async def test_profile_locked_field_cannot_be_overridden(): + """offline_readonly profile locks network and host_path_mode.""" + logger = Mock() + backend = FakeBackend(logger) + runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300) + service = BoxService( + make_app(logger, profile='offline_readonly'), client=_InProcessBoxRuntimeClient(logger, runtime) + ) + await service.initialize() + + result = await service.execute_spec_payload( + {'cmd': 'echo hi', 'network': 'on', 'host_path_mode': 'rw', 'session_id': '32'}, + make_query(32), + ) + + assert result['ok'] is True + spec = backend.start_specs[0] + assert spec.network == BoxNetworkMode.OFF + assert spec.host_path_mode == BoxHostMountMode.READ_ONLY + + +@pytest.mark.asyncio +async def test_profile_timeout_clamped_to_max(): + """timeout_sec exceeding max_timeout_sec is clamped.""" + logger = Mock() + backend = FakeBackend(logger) + runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300) + service = BoxService(make_app(logger), client=_InProcessBoxRuntimeClient(logger, runtime)) + await service.initialize() + + result = await service.execute_tool({'command': 'echo hi', 'timeout_sec': 999}, make_query(33)) + + assert result['ok'] is True + spec = backend.start_specs[0] + # default profile max_timeout_sec = 120 + assert spec.timeout_sec == 120 + + +@pytest.mark.asyncio +@pytest.mark.parametrize('timeout_value', ['999', 999.0]) +async def test_profile_timeout_clamped_for_coercible_inputs(timeout_value): + logger = Mock() + backend = FakeBackend(logger) + runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300) + service = BoxService(make_app(logger), client=_InProcessBoxRuntimeClient(logger, runtime)) + await service.initialize() + + await service.execute_tool({'command': 'echo hi', 'timeout_sec': timeout_value}, make_query(34)) + + spec = backend.start_specs[0] + assert spec.timeout_sec == 120 + + +def test_unknown_profile_raises_error(): + """Config referencing a non-existent profile name raises immediately.""" + logger = Mock() + runtime = BoxRuntime(logger=logger, backends=[FakeBackend(logger)], session_ttl_sec=300) + with pytest.raises(BoxValidationError, match='unknown box profile'): + BoxService(make_app(logger, profile='nonexistent'), client=_InProcessBoxRuntimeClient(logger, runtime)) + + +def test_builtin_profiles_are_consistent(): + """Basic sanity check on all built-in profiles.""" + assert 'default' in BUILTIN_PROFILES + assert 'offline_readonly' in BUILTIN_PROFILES + assert 'network_basic' in BUILTIN_PROFILES + assert 'network_extended' in BUILTIN_PROFILES + + offline = BUILTIN_PROFILES['offline_readonly'] + assert offline.network == BoxNetworkMode.OFF + assert offline.host_path_mode == BoxHostMountMode.READ_ONLY + assert 'network' in offline.locked + assert 'host_path_mode' in offline.locked + assert 'read_only_rootfs' in offline.locked + assert offline.max_timeout_sec <= BUILTIN_PROFILES['default'].max_timeout_sec + + basic = BUILTIN_PROFILES['network_basic'] + assert basic.network == BoxNetworkMode.ON + assert basic.read_only_rootfs is True + + extended = BUILTIN_PROFILES['network_extended'] + assert extended.network == BoxNetworkMode.ON + assert extended.read_only_rootfs is False + assert extended.cpus > BUILTIN_PROFILES['default'].cpus + assert extended.memory_mb > BUILTIN_PROFILES['default'].memory_mb + + +@pytest.mark.asyncio +async def test_profile_default_applies_resource_limits(): + """Default profile resource limits are applied to BoxSpec.""" + logger = Mock() + backend = FakeBackend(logger) + runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300) + service = BoxService(make_app(logger), client=_InProcessBoxRuntimeClient(logger, runtime)) + await service.initialize() + + await service.execute_tool({'command': 'echo hi'}, make_query(40)) + + spec = backend.start_specs[0] + profile = BUILTIN_PROFILES['default'] + assert spec.cpus == profile.cpus + assert spec.memory_mb == profile.memory_mb + assert spec.pids_limit == profile.pids_limit + assert spec.read_only_rootfs == profile.read_only_rootfs + assert spec.workspace_quota_mb == profile.workspace_quota_mb + + +@pytest.mark.asyncio +async def test_box_service_applies_workspace_quota_from_config(tmp_path): + logger = Mock() + backend = FakeBackend(logger) + runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300) + host_dir = tmp_path / 'default-workspace' + host_dir.mkdir() + app = make_app(logger, [str(tmp_path)], workspace_quota_mb=32) + app.instance_config.data['box']['local']['default_workspace'] = str(host_dir) + service = BoxService(app, client=_InProcessBoxRuntimeClient(logger, runtime)) + + await service.initialize() + await service.execute_tool({'command': 'echo hi'}, make_query(43)) + + assert backend.start_specs[0].workspace_quota_mb == 32 + + +@pytest.mark.asyncio +async def test_box_service_rejects_execution_when_workspace_already_exceeds_quota(tmp_path): + logger = Mock() + backend = FakeBackend(logger) + runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300) + host_dir = tmp_path / 'quota-workspace' + host_dir.mkdir() + (host_dir / 'already-too-large.bin').write_bytes(b'x' * (2 * 1024 * 1024)) + app = make_app(logger, [str(tmp_path)], workspace_quota_mb=1) + app.instance_config.data['box']['local']['default_workspace'] = str(host_dir) + service = BoxService(app, client=_InProcessBoxRuntimeClient(logger, runtime)) + + await service.initialize() + + with pytest.raises(BoxValidationError, match='workspace quota exceeded before execution'): + await service.execute_tool({'command': 'echo hi'}, make_query(44)) + + assert backend.start_calls == [] + + +@pytest.mark.asyncio +async def test_box_service_rejects_and_cleans_up_when_execution_exceeds_workspace_quota(tmp_path): + logger = Mock() + backend = FakeBackendWritingFiles(logger, files_to_write=[('output.bin', 2 * 1024 * 1024)]) + runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300) + host_dir = tmp_path / 'quota-workspace-post' + host_dir.mkdir() + app = make_app(logger, [str(tmp_path)], workspace_quota_mb=1) + app.instance_config.data['box']['local']['default_workspace'] = str(host_dir) + service = BoxService(app, client=_InProcessBoxRuntimeClient(logger, runtime)) + + await service.initialize() + + with pytest.raises(BoxValidationError, match='workspace quota exceeded after execution'): + await service.execute_tool({'command': 'generate-output'}, make_query(45)) + + assert backend.start_calls == ['person_test_user'] + assert backend.stop_calls == ['person_test_user'] + + +@pytest.mark.asyncio +async def test_profile_offline_readonly_locks_read_only_rootfs(): + """offline_readonly locks read_only_rootfs so it cannot be overridden.""" + logger = Mock() + backend = FakeBackend(logger) + runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300) + service = BoxService( + make_app(logger, profile='offline_readonly'), client=_InProcessBoxRuntimeClient(logger, runtime) + ) + await service.initialize() + + await service.execute_spec_payload( + {'cmd': 'echo hi', 'read_only_rootfs': False, 'session_id': '41'}, make_query(41) + ) + + spec = backend.start_specs[0] + assert spec.read_only_rootfs is True + + +@pytest.mark.asyncio +async def test_profile_network_extended_has_relaxed_limits(): + """network_extended profile provides higher resource limits.""" + logger = Mock() + backend = FakeBackend(logger) + runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300) + service = BoxService( + make_app(logger, profile='network_extended'), client=_InProcessBoxRuntimeClient(logger, runtime) + ) + await service.initialize() + + await service.execute_tool({'command': 'echo hi'}, make_query(42)) + + spec = backend.start_specs[0] + assert spec.network == BoxNetworkMode.ON + assert spec.cpus == 2.0 + assert spec.memory_mb == 1024 + assert spec.read_only_rootfs is False + + +def test_box_spec_validates_resource_limits(): + """BoxSpec rejects invalid resource limit values.""" + with pytest.raises(Exception): + BoxSpec.model_validate({'cmd': 'echo', 'session_id': 's1', 'cpus': 0}) + with pytest.raises(Exception): + BoxSpec.model_validate({'cmd': 'echo', 'session_id': 's1', 'memory_mb': 10}) + with pytest.raises(Exception): + BoxSpec.model_validate({'cmd': 'echo', 'session_id': 's1', 'pids_limit': 0}) + with pytest.raises(Exception): + BoxSpec.model_validate({'cmd': 'echo', 'session_id': 's1', 'workspace_quota_mb': -1}) + + +# ── Observability tests ─────────────────────────────────────────────── + + +@pytest.mark.asyncio +async def test_runtime_get_status_reports_backend_and_sessions(): + logger = Mock() + backend = FakeBackend(logger) + runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300) + await runtime.initialize() + + status = await runtime.get_status() + assert status['backend']['name'] == 'fake' + assert status['backend']['available'] is True + assert status['active_sessions'] == 0 + + await runtime.execute(BoxSpec.model_validate({'cmd': 'echo', 'session_id': 'obs-1'})) + status = await runtime.get_status() + assert status['active_sessions'] == 1 + + +@pytest.mark.asyncio +async def test_runtime_get_sessions_returns_session_info(): + logger = Mock() + backend = FakeBackend(logger) + runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300) + await runtime.initialize() + + await runtime.execute(BoxSpec.model_validate({'cmd': 'echo', 'session_id': 'obs-2'})) + sessions = runtime.get_sessions() + assert len(sessions) == 1 + assert sessions[0]['session_id'] == 'obs-2' + assert sessions[0]['backend_name'] == 'fake' + assert 'created_at' in sessions[0] + assert 'last_used_at' in sessions[0] + + +@pytest.mark.asyncio +async def test_runtime_get_backend_info_when_no_backend(): + logger = Mock() + backend = FakeBackend(logger, available=False) + runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300) + await runtime.initialize() + + info = await runtime.get_backend_info() + assert info['name'] is None + assert info['available'] is False + + +@pytest.mark.asyncio +async def test_service_records_errors_on_failure(): + logger = Mock() + backend = FakeBackend(logger, available=False) + runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300) + service = BoxService(make_app(logger), client=_InProcessBoxRuntimeClient(logger, runtime)) + await service.initialize() + + with pytest.raises(Exception): + await service.execute_tool({'command': 'echo hello'}, make_query(50)) + + errors = service.get_recent_errors() + assert len(errors) == 1 + assert errors[0]['type'] == 'BoxBackendUnavailableError' + assert errors[0]['query_id'] == '50' + assert 'timestamp' in errors[0] + + +@pytest.mark.asyncio +async def test_service_error_ring_buffer_capped(): + logger = Mock() + backend = FakeBackend(logger, available=False) + runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300) + service = BoxService(make_app(logger), client=_InProcessBoxRuntimeClient(logger, runtime)) + await service.initialize() + + for i in range(60): + with pytest.raises(Exception): + await service.execute_tool({'command': 'fail'}, make_query(100 + i)) + + errors = service.get_recent_errors() + assert len(errors) == 50 + # Oldest should have been evicted, newest kept + assert errors[0]['query_id'] == '110' + assert errors[-1]['query_id'] == '159' + + +@pytest.mark.asyncio +async def test_service_get_status_aggregates_runtime_and_profile(): + logger = Mock() + backend = FakeBackend(logger) + runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300) + service = BoxService(make_app(logger), client=_InProcessBoxRuntimeClient(logger, runtime)) + await service.initialize() + + status = await service.get_status() + assert status['profile'] == 'default' + assert status['backend']['name'] == 'fake' + assert status['backend']['available'] is True + assert status['active_sessions'] == 0 + assert status['recent_error_count'] == 0 + + +# ── In-process RPC client/server tests ───────────────────────────────── + + +class _QueueConnection: + """In-process Connection backed by asyncio Queues — no real IO.""" + + def __init__(self, rx: asyncio.Queue[str], tx: asyncio.Queue[str]): + self._rx = rx + self._tx = tx + + async def send(self, message: str) -> None: + await self._tx.put(message) + + async def receive(self) -> str: + return await self._rx.get() + + async def close(self) -> None: + pass + + +def _make_queue_connection_pair(): + """Return (client_conn, server_conn) linked by queues.""" + c2s: asyncio.Queue[str] = asyncio.Queue() + s2c: asyncio.Queue[str] = asyncio.Queue() + client_conn = _QueueConnection(rx=s2c, tx=c2s) + server_conn = _QueueConnection(rx=c2s, tx=s2c) + return client_conn, server_conn + + +async def _make_rpc_pair(runtime: BoxRuntime): + """Create an in-process (ActionRPCBoxClient, server_task, client_task) connected via queues.""" + from langbot_plugin.box.server import BoxServerHandler + from langbot_plugin.runtime.io.handler import Handler + + client_conn, server_conn = _make_queue_connection_pair() + + server_handler = BoxServerHandler(server_conn, runtime) + server_task = asyncio.create_task(server_handler.run()) + + client_handler = Handler.__new__(Handler) + Handler.__init__(client_handler, client_conn) + client_task = asyncio.create_task(client_handler.run()) + + client = ActionRPCBoxClient(logger=Mock()) + client.set_handler(client_handler) + + return client, server_task, client_task + + +@pytest.mark.asyncio +async def test_rpc_client_execute(): + """ActionRPCBoxClient correctly calls server and parses result.""" + logger = Mock() + backend = FakeBackend(logger) + runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300) + await runtime.initialize() + + client, server_task, client_task = await _make_rpc_pair(runtime) + try: + spec = BoxSpec.model_validate({'cmd': 'echo remote', 'session_id': 'r-1'}) + result = await client.execute(spec) + + assert result.session_id == 'r-1' + assert result.status == BoxExecutionStatus.COMPLETED + assert result.exit_code == 0 + assert result.stdout == 'executed: echo remote' + finally: + server_task.cancel() + client_task.cancel() + await runtime.shutdown() + + +@pytest.mark.asyncio +async def test_rpc_client_get_sessions(): + logger = Mock() + backend = FakeBackend(logger) + runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300) + await runtime.initialize() + + client, server_task, client_task = await _make_rpc_pair(runtime) + try: + spec = BoxSpec.model_validate({'cmd': 'echo hi', 'session_id': 'r-2'}) + await client.execute(spec) + + sessions = await client.get_sessions() + assert len(sessions) == 1 + assert sessions[0]['session_id'] == 'r-2' + finally: + server_task.cancel() + client_task.cancel() + await runtime.shutdown() + + +@pytest.mark.asyncio +async def test_rpc_client_get_status(): + logger = Mock() + backend = FakeBackend(logger) + runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300) + await runtime.initialize() + + client, server_task, client_task = await _make_rpc_pair(runtime) + try: + status = await client.get_status() + + assert 'backend' in status + assert 'active_sessions' in status + finally: + server_task.cancel() + client_task.cancel() + await runtime.shutdown() + + +@pytest.mark.asyncio +async def test_rpc_client_get_backend_info(): + logger = Mock() + backend = FakeBackend(logger) + runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300) + await runtime.initialize() + + client, server_task, client_task = await _make_rpc_pair(runtime) + try: + info = await client.get_backend_info() + + assert info['name'] == 'fake' + assert info['available'] is True + finally: + server_task.cancel() + client_task.cancel() + await runtime.shutdown() + + +# ── RPC-based delete/create/conflict tests ──────────────────────────── + + +@pytest.mark.asyncio +async def test_rpc_client_delete_session(): + logger = Mock() + backend = FakeBackend(logger) + runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300) + await runtime.initialize() + + client, server_task, client_task = await _make_rpc_pair(runtime) + try: + spec = BoxSpec.model_validate({'cmd': 'echo hi', 'session_id': 'r-del-1'}) + await client.execute(spec) + + await client.delete_session('r-del-1') + + sessions = await client.get_sessions() + assert len(sessions) == 0 + finally: + server_task.cancel() + client_task.cancel() + await runtime.shutdown() + + +@pytest.mark.asyncio +async def test_rpc_client_delete_session_raises_not_found(): + logger = Mock() + backend = FakeBackend(logger) + runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300) + await runtime.initialize() + + client, server_task, client_task = await _make_rpc_pair(runtime) + try: + with pytest.raises(BoxSessionNotFoundError): + await client.delete_session('nonexistent') + finally: + server_task.cancel() + client_task.cancel() + await runtime.shutdown() + + +@pytest.mark.asyncio +async def test_rpc_client_create_session(): + logger = Mock() + backend = FakeBackend(logger) + runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300) + await runtime.initialize() + + client, server_task, client_task = await _make_rpc_pair(runtime) + try: + spec = BoxSpec.model_validate({'cmd': 'placeholder', 'session_id': 'r-create-1'}) + info = await client.create_session(spec) + assert info['session_id'] == 'r-create-1' + assert info['backend_name'] == 'fake' + + sessions = await client.get_sessions() + assert len(sessions) == 1 + finally: + server_task.cancel() + client_task.cancel() + await runtime.shutdown() + + +@pytest.mark.asyncio +async def test_rpc_client_exec_raises_conflict_error(): + logger = Mock() + backend = FakeBackend(logger) + runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300) + await runtime.initialize() + + client, server_task, client_task = await _make_rpc_pair(runtime) + try: + spec1 = BoxSpec.model_validate({'cmd': 'echo first', 'session_id': 'r-conflict-1', 'network': 'off'}) + await client.execute(spec1) + + spec2 = BoxSpec.model_validate({'cmd': 'echo second', 'session_id': 'r-conflict-1', 'network': 'on'}) + with pytest.raises(BoxSessionConflictError): + await client.execute(spec2) + finally: + server_task.cancel() + client_task.cancel() + await runtime.shutdown() + + +# ── BoxHostMountMode.NONE tests ───────────────────────────────────── + + +class TestBoxHostMountModeNone: + def test_none_mode_is_valid_enum(self): + assert BoxHostMountMode.NONE.value == 'none' + + def test_spec_with_none_mode_skips_workdir_check(self): + """When host_path_mode is NONE, workdir validation is skipped.""" + spec = BoxSpec( + session_id='test', + cmd='echo hi', + host_path='/home/user/data', + host_path_mode=BoxHostMountMode.NONE, + workdir='/opt/custom', # Not under /workspace, should be allowed + ) + assert spec.host_path_mode == BoxHostMountMode.NONE + assert spec.workdir == '/opt/custom' + + def test_spec_with_rw_mode_requires_workspace_workdir(self): + """When host_path_mode is RW, workdir must be under mount_path.""" + with pytest.raises(Exception): + BoxSpec( + session_id='test', + cmd='echo hi', + host_path='/home/user/data', + host_path_mode=BoxHostMountMode.READ_WRITE, + workdir='/opt/custom', + ) + + def test_spec_with_ro_mode_requires_workspace_workdir(self): + """When host_path_mode is RO, workdir must be under mount_path.""" + with pytest.raises(Exception): + BoxSpec( + session_id='test', + cmd='echo hi', + host_path='/home/user/data', + host_path_mode=BoxHostMountMode.READ_ONLY, + workdir='/opt/custom', + ) + + def test_spec_with_custom_mount_path_allows_matching_workdir(self): + spec = BoxSpec( + session_id='test', + cmd='echo hi', + host_path='/home/user/data', + host_path_mode=BoxHostMountMode.READ_WRITE, + mount_path='/project', + workdir='/project/src', + ) + assert spec.mount_path == '/project' + assert spec.workdir == '/project/src' + + def test_spec_with_custom_mount_path_rejects_outside_workdir(self): + with pytest.raises(Exception): + BoxSpec( + session_id='test', + cmd='echo hi', + host_path='/home/user/data', + host_path_mode=BoxHostMountMode.READ_WRITE, + mount_path='/project', + workdir='/workspace', + ) + + +class TestBoxDisabledByConfig: + """``box.enabled = false`` must keep the BoxService usable as a status + surface but skip every connection attempt and report unavailable.""" + + @pytest.mark.asyncio + async def test_initialize_skips_connector_when_disabled(self): + logger = Mock() + app = make_app(logger, enabled=False) + client = Mock(spec=BoxRuntimeClient) + client.initialize = AsyncMock() + service = BoxService(app, client=client) + + await service.initialize() + + # The client must not be touched; we did not even open a connection. + client.initialize.assert_not_awaited() + assert service.enabled is False + assert service.available is False + # The reason is captured so the dashboard / UI can show it. + assert 'disabled' in service._connector_error.lower() + + @pytest.mark.asyncio + async def test_get_status_reports_disabled(self): + logger = Mock() + service = BoxService(make_app(logger, enabled=False), client=Mock(spec=BoxRuntimeClient)) + await service.initialize() + + status = await service.get_status() + + assert status['available'] is False + assert status['enabled'] is False + assert 'disabled' in status['connector_error'].lower() + + @pytest.mark.asyncio + async def test_get_status_distinguishes_enabled_but_unavailable(self): + logger = Mock() + client = Mock(spec=BoxRuntimeClient) + client.initialize = AsyncMock(side_effect=RuntimeError('docker daemon not running')) + service = BoxService(make_app(logger, enabled=True), client=client) + + await service.initialize() + + status = await service.get_status() + assert status['available'] is False + assert status['enabled'] is True + assert 'docker daemon' in status['connector_error'] + + @pytest.mark.asyncio + async def test_get_status_downgrades_available_when_backend_dead(self): + """The connector can be healthy while the runtime reports no usable + backend (operator selected nsjail but binary missing, Docker daemon + crashed after handshake, ...). The top-level ``available`` must + reflect the combined state so the dashboard / useBoxStatus hook / + skill_service gate stay consistent with the native-tool gate.""" + logger = Mock() + client = Mock(spec=BoxRuntimeClient) + client.initialize = AsyncMock() + client.get_status = AsyncMock( + return_value={ + 'backend': {'name': 'nsjail', 'available': False}, + 'active_sessions': 0, + } + ) + service = BoxService(make_app(logger, enabled=True), client=client) + await service.initialize() + + status = await service.get_status() + assert status['available'] is False + assert status['enabled'] is True + # The detailed backend object is preserved for the dialog + assert status['backend'] == {'name': 'nsjail', 'available': False} + assert 'nsjail' in status['connector_error'] + + @pytest.mark.asyncio + async def test_get_status_keeps_available_true_when_backend_ok(self): + logger = Mock() + client = Mock(spec=BoxRuntimeClient) + client.initialize = AsyncMock() + client.get_status = AsyncMock( + return_value={ + 'backend': {'name': 'docker', 'available': True}, + 'active_sessions': 2, + } + ) + service = BoxService(make_app(logger, enabled=True), client=client) + await service.initialize() + + status = await service.get_status() + assert status['available'] is True + assert status['backend'] == {'name': 'docker', 'available': True} + # No spurious connector_error overlay when everything is healthy + assert 'connector_error' not in status or not status['connector_error'] + + @pytest.mark.asyncio + async def test_disconnect_callback_is_no_op_when_disabled(self): + logger = Mock() + service = BoxService(make_app(logger, enabled=False), client=Mock(spec=BoxRuntimeClient)) + + # Should be safe to fire; must not flip reconnect state on a disabled + # service. If it tried to schedule a reconnect, the test would hang. + await service._on_runtime_disconnect(connector=Mock()) + + assert service._reconnecting is False + + +class TestBuildSkillExtraMounts: + """Robustness of skill mount construction against a stale skill cache. + + The three sandbox backends behave inconsistently when a skill's + package_root no longer exists on disk (nsjail aborts the whole sandbox + start, Docker silently auto-creates a root-owned empty directory, E2B + silently skips). Mount construction must filter these out up front so + the backend never sees a bad mount. + """ + + def _make_service(self, logger, skills): + app = make_app(logger) + app.skill_mgr = SimpleNamespace(skills=skills) + client = Mock(spec=BoxRuntimeClient) + return BoxService(app, client=client) + + def test_skips_skill_with_missing_package_root(self): + logger = Mock() + with tempfile.TemporaryDirectory() as live_dir: + skills = { + 'alive': {'name': 'alive', 'package_root': live_dir}, + 'ghost': {'name': 'ghost', 'package_root': '/nonexistent/path/should/never/exist'}, + } + service = self._make_service(logger, skills) + query = make_query() + + mounts = service.build_skill_extra_mounts(query) + + assert mounts == [ + { + 'host_path': live_dir, + 'mount_path': '/workspace/.skills/alive', + 'mode': 'rw', + } + ] + # Warning logged so operators can see what was dropped + assert any( + 'ghost' in str(call.args[0]) and 'package_root missing' in str(call.args[0]) + for call in logger.warning.call_args_list + ) + + def test_skips_skill_with_empty_package_root(self): + logger = Mock() + skills = { + 'no_root': {'name': 'no_root', 'package_root': ''}, + 'whitespace': {'name': 'whitespace', 'package_root': ' '}, + } + service = self._make_service(logger, skills) + + assert service.build_skill_extra_mounts(make_query()) == [] + + def test_returns_empty_when_no_skill_manager(self): + logger = Mock() + app = make_app(logger) + # no skill_mgr attribute + service = BoxService(app, client=Mock(spec=BoxRuntimeClient)) + + assert service.build_skill_extra_mounts(make_query()) == [] diff --git a/tests/unit_tests/box/test_workspace.py b/tests/unit_tests/box/test_workspace.py new file mode 100644 index 000000000..809347e56 --- /dev/null +++ b/tests/unit_tests/box/test_workspace.py @@ -0,0 +1,147 @@ +from __future__ import annotations + +import os +import tempfile +from types import SimpleNamespace +from unittest.mock import AsyncMock, Mock + +import pytest + +from langbot.pkg.box.workspace import ( + BoxWorkspaceSession, + classify_python_workspace, + infer_workspace_host_path, + rewrite_mounted_path, + wrap_python_command_with_env, +) + + +def test_rewrite_mounted_path_translates_host_prefix(): + result = rewrite_mounted_path('/tmp/demo/project/app.py', '/tmp/demo/project') + assert result == '/workspace/app.py' + + +def test_infer_workspace_host_path_unwraps_virtualenv_bin_dir(): + with tempfile.TemporaryDirectory() as tmpdir: + project_root = os.path.join(tmpdir, 'project') + os.makedirs(os.path.join(project_root, '.venv', 'bin')) + python_bin = os.path.join(project_root, '.venv', 'bin', 'python') + script = os.path.join(project_root, 'server.py') + + with open(python_bin, 'w', encoding='utf-8') as handle: + handle.write('') + with open(script, 'w', encoding='utf-8') as handle: + handle.write('print("ok")\n') + + result = infer_workspace_host_path(python_bin, [script]) + + assert result == os.path.realpath(project_root) + + +def test_classify_python_workspace_detects_package_and_requirements(): + with tempfile.TemporaryDirectory() as tmpdir: + assert classify_python_workspace(tmpdir) is None + + with open(os.path.join(tmpdir, 'requirements.txt'), 'w', encoding='utf-8') as handle: + handle.write('requests\n') + assert classify_python_workspace(tmpdir) == 'requirements' + + with open(os.path.join(tmpdir, 'pyproject.toml'), 'w', encoding='utf-8') as handle: + handle.write('[project]\nname = "demo"\n') + assert classify_python_workspace(tmpdir) == 'package' + + +def test_wrap_python_command_with_env_contains_bootstrap_and_command(): + command = wrap_python_command_with_env('python script.py') + + assert 'python -m venv "$_LB_VENV_DIR"' in command + assert 'export VIRTUAL_ENV="$_LB_VENV_DIR"' in command + assert command.rstrip().endswith('python script.py') + + +@pytest.mark.asyncio +async def test_workspace_session_execute_for_query_uses_session_payload(): + box_service = SimpleNamespace(execute_spec_payload=AsyncMock(return_value={'ok': True})) + workspace = BoxWorkspaceSession( + box_service, + 'skill-person_123-demo', + host_path='/tmp/project', + host_path_mode='rw', + env={'FOO': 'bar'}, + ) + + query = SimpleNamespace(query_id='q1') + result = await workspace.execute_for_query(query, 'python run.py', workdir='/workspace', timeout_sec=30) + + assert result == {'ok': True} + payload = box_service.execute_spec_payload.await_args.args[0] + assert payload == { + 'session_id': 'skill-person_123-demo', + 'workdir': '/workspace', + 'env': {'FOO': 'bar'}, + 'persistent': False, + 'host_path': '/tmp/project', + 'host_path_mode': 'rw', + 'cmd': 'python run.py', + 'timeout_sec': 30, + } + + +@pytest.mark.asyncio +async def test_workspace_session_start_managed_process_rewrites_command_and_args(): + box_service = SimpleNamespace(start_managed_process=AsyncMock(return_value={'status': 'running'})) + workspace = BoxWorkspaceSession( + box_service, + 'mcp-u1', + host_path='/tmp/project', + host_path_mode='ro', + ) + + result = await workspace.start_managed_process( + '/tmp/project/.venv/bin/python', + ['/tmp/project/server.py', '--config', '/tmp/project/config.json'], + env={'TOKEN': '1'}, + ) + + assert result == {'status': 'running'} + session_id = box_service.start_managed_process.await_args.args[0] + payload = box_service.start_managed_process.await_args.args[1] + assert session_id == 'mcp-u1' + assert payload == { + 'command': 'python', + 'args': ['/workspace/server.py', '--config', '/workspace/config.json'], + 'env': {'TOKEN': '1'}, + 'cwd': '/workspace', + 'process_id': 'default', + } + + +def test_workspace_session_build_session_payload_keeps_generic_workspace_shape(): + workspace = BoxWorkspaceSession( + Mock(), + 'workspace-1', + host_path='/tmp/project', + host_path_mode='rw', + env={'FOO': 'bar'}, + network='on', + read_only_rootfs=False, + image='python:3.11', + cpus=1.0, + memory_mb=512, + pids_limit=128, + ) + + assert workspace.build_session_payload() == { + 'session_id': 'workspace-1', + 'workdir': '/workspace', + 'env': {'FOO': 'bar'}, + 'persistent': False, + 'network': 'on', + 'read_only_rootfs': False, + 'host_path': '/tmp/project', + 'host_path_mode': 'rw', + 'image': 'python:3.11', + 'cpus': 1.0, + 'memory_mb': 512, + 'pids_limit': 128, + } diff --git a/tests/unit_tests/pipeline/conftest.py b/tests/unit_tests/pipeline/conftest.py index a10e0aba1..ce8ee7eb0 100644 --- a/tests/unit_tests/pipeline/conftest.py +++ b/tests/unit_tests/pipeline/conftest.py @@ -12,6 +12,12 @@ import pytest from unittest.mock import AsyncMock, Mock +# Preload pipelinemgr so the pipeline.stage module is fully initialised before +# any individual stage test (e.g. preproc, longtext) tries to import it. Without +# this, running a stage test in isolation triggers a circular-import error: +# stage.py → core.app → pipelinemgr → stage.stage_class (not yet bound). +import langbot.pkg.pipeline.pipelinemgr # noqa: F401 + import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query import langbot_plugin.api.entities.builtin.platform.message as platform_message import langbot_plugin.api.entities.builtin.platform.events as platform_events @@ -34,6 +40,9 @@ def __init__(self): self.query_pool = self._create_mock_query_pool() self.instance_config = self._create_mock_instance_config() self.task_mgr = self._create_mock_task_manager() + # Skill manager is optional; PreProcessor only touches it for the + # local-agent runner. None keeps the skill-binding branch inert. + self.skill_mgr = None def _create_mock_logger(self): logger = Mock() diff --git a/tests/unit_tests/pipeline/test_chat_handler_logging.py b/tests/unit_tests/pipeline/test_chat_handler_logging.py new file mode 100644 index 000000000..6ae85558f --- /dev/null +++ b/tests/unit_tests/pipeline/test_chat_handler_logging.py @@ -0,0 +1,78 @@ +from __future__ import annotations + +from unittest.mock import Mock + +import pytest +import langbot_plugin.api.entities.builtin.provider.message as provider_message + +# TODO: unskip once the handler ↔ app circular import is resolved +pytest.skip( + 'circular import in handler ↔ app; will be unblocked once resolved', + allow_module_level=True, +) + +from langbot.pkg.pipeline.process.handler import MessageHandler # noqa: E402 + + +class _StubHandler(MessageHandler): + async def handle(self, query): + raise NotImplementedError + + +handler = _StubHandler(ap=Mock()) + + +def test_chat_handler_formats_tool_call_request_log(): + result = provider_message.Message( + role='assistant', + content='', + tool_calls=[ + provider_message.ToolCall( + id='call-1', + type='function', + function=provider_message.FunctionCall(name='exec', arguments='{}'), + ) + ], + ) + + summary = handler.format_result_log(result) + + assert summary == 'assistant: requested tools: exec' + + +def test_chat_handler_formats_tool_result_log(): + result = provider_message.Message( + role='tool', + content='{"status":"completed","exit_code":0,"backend":"podman","stdout":"42\\n"}', + tool_call_id='call-1', + ) + + summary = handler.format_result_log(result) + + # Tool results use generic cut_str truncation + assert summary is not None + assert summary.startswith('tool: {"status":"com') + assert summary.endswith('...') + + +def test_chat_handler_formats_tool_error_log(): + result = provider_message.MessageChunk( + role='tool', + content='err: host_path must point to an existing directory on the host', + tool_call_id='call-1', + is_final=True, + ) + + summary = handler.format_result_log(result) + + assert summary is not None + assert summary.startswith('tool error: err: host_path must') + assert summary.endswith('...') + + +def test_chat_handler_skips_empty_assistant_log(): + result = provider_message.Message(role='assistant', content='') + + summary = handler.format_result_log(result) + + assert summary is None diff --git a/tests/unit_tests/pipeline/test_n8nsvapi.py b/tests/unit_tests/pipeline/test_n8nsvapi.py index b9bbcc2da..787472375 100644 --- a/tests/unit_tests/pipeline/test_n8nsvapi.py +++ b/tests/unit_tests/pipeline/test_n8nsvapi.py @@ -14,33 +14,43 @@ import sys from unittest.mock import AsyncMock, MagicMock, Mock, patch -# Break the circular import chain before importing n8nsvapi: +import pytest +import langbot_plugin.api.entities.builtin.provider.message as provider_message + +# Break the circular import chain while importing n8nsvapi: # n8nsvapi → runner → app → pipelinemgr → all runners → runner (partially init) -_mock_runner = MagicMock() -_mock_runner.runner_class = lambda name: (lambda cls: cls) # no-op decorator -_mock_runner.RequestRunner = object -_mocked_imports = { - 'langbot.pkg.provider.runner': _mock_runner, +# The stubs are restored in a ``finally`` block so this module does NOT pollute +# sys.modules for other test modules (e.g. ones importing the real +# LocalAgentRunner, which would otherwise inherit ``object`` and break). +# Mirrors master's intent but uses try/finally so a raised import doesn't +# leave the global namespace in a stubbed state, and includes +# ``langbot.pkg.utils.httpclient`` which master didn't stub. +_runner_stub = MagicMock() +_runner_stub.runner_class = lambda name: (lambda cls: cls) # no-op decorator +_runner_stub.RequestRunner = object +_import_stubs = { + 'langbot.pkg.provider.runner': _runner_stub, 'langbot.pkg.core.app': MagicMock(), + 'langbot.pkg.utils.httpclient': MagicMock(), } -_original_imports = {name: sys.modules.get(name) for name in _mocked_imports} -sys.modules.update(_mocked_imports) - -import pytest # noqa: E402 -import langbot_plugin.api.entities.builtin.provider.message as provider_message # noqa: E402 -from langbot.pkg.provider.runners.n8nsvapi import N8nServiceAPIRunner # noqa: E402 - -for _name, _original in _original_imports.items(): - if _original is None: - sys.modules.pop(_name, None) - else: - sys.modules[_name] = _original +_saved_modules = {name: sys.modules.get(name) for name in _import_stubs} +for _name, _stub in _import_stubs.items(): + sys.modules[_name] = _stub +try: + from langbot.pkg.provider.runners.n8nsvapi import N8nServiceAPIRunner +finally: + for _name, _original in _saved_modules.items(): + if _original is None: + sys.modules.pop(_name, None) + else: + sys.modules[_name] = _original # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- + def make_runner(output_key: str = 'response') -> N8nServiceAPIRunner: ap = Mock() ap.logger = Mock() @@ -83,6 +93,7 @@ async def collect_chunks(runner: N8nServiceAPIRunner, chunks: list[bytes | str]) # _process_response: stream format (type:item/end) # --------------------------------------------------------------------------- + @pytest.mark.asyncio async def test_stream_format_single_item(): """Single item + end in one chunk yields final chunk with full content.""" @@ -165,6 +176,7 @@ async def test_stream_format_no_spurious_empty_yield(): # _process_response: plain JSON fallback # --------------------------------------------------------------------------- + @pytest.mark.asyncio async def test_plain_json_with_output_key(): """Plain JSON with matching output_key extracts value via output_key.""" @@ -235,6 +247,7 @@ async def test_invalid_json_returns_raw_text(): # _call_webhook: output type depends on is_stream # --------------------------------------------------------------------------- + def make_query(is_stream: bool): """Build a minimal Query mock.""" query = Mock() diff --git a/tests/unit_tests/provider/test_localagent_sandbox_exec.py b/tests/unit_tests/provider/test_localagent_sandbox_exec.py new file mode 100644 index 000000000..daa4eb2dd --- /dev/null +++ b/tests/unit_tests/provider/test_localagent_sandbox_exec.py @@ -0,0 +1,242 @@ +from __future__ import annotations + +import json +from types import SimpleNamespace +from unittest.mock import AsyncMock, Mock + +import pytest + +import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query +import langbot_plugin.api.entities.builtin.provider.message as provider_message +import langbot_plugin.api.entities.builtin.provider.session as provider_session + +from langbot.pkg.provider.runners.localagent import LocalAgentRunner + + +class RecordingProvider: + def __init__(self): + self.requests: list[dict] = [] + + async def invoke_llm(self, query, model, messages, funcs, extra_args=None, remove_think=None): + self.requests.append( + { + 'messages': list(messages), + 'funcs': list(funcs), + 'remove_think': remove_think, + } + ) + + if len(self.requests) == 1: + return provider_message.Message( + role='assistant', + content='Let me calculate that exactly.', + tool_calls=[ + provider_message.ToolCall( + id='call-1', + type='function', + function=provider_message.FunctionCall( + name='exec', + arguments=json.dumps( + {'command': ("python - <<'PY'\nnums = [1, 2, 3, 4]\nprint(sum(nums) / len(nums))\nPY")} + ), + ), + ) + ], + ) + + tool_result = json.loads(messages[-1].content) + return provider_message.Message( + role='assistant', + content=f'The average is {tool_result["stdout"]}.', + ) + + +class RecordingStreamProvider: + def __init__(self): + self.stream_requests: list[dict] = [] + + def invoke_llm_stream(self, query, model, messages, funcs, extra_args=None, remove_think=None): + self.stream_requests.append( + { + 'messages': list(messages), + 'funcs': list(funcs), + 'remove_think': remove_think, + } + ) + + async def _stream(): + if len(self.stream_requests) == 1: + yield provider_message.MessageChunk( + role='assistant', + tool_calls=[ + provider_message.ToolCall( + id='call-1', + type='function', + function=provider_message.FunctionCall( + name='exec', + arguments=json.dumps({'command': "python -c 'print(1)'"}), + ), + ) + ], + is_final=True, + ) + return + + yield provider_message.MessageChunk( + role='assistant', + content='Tool execution failed.', + is_final=True, + ) + + return _stream() + + +def make_query() -> pipeline_query.Query: + adapter = AsyncMock() + adapter.is_stream_output_supported = AsyncMock(return_value=False) + + return pipeline_query.Query.model_construct( + query_id='avg-query', + launcher_type=provider_session.LauncherTypes.PERSON, + launcher_id=12345, + sender_id=12345, + message_chain=[], + message_event=None, + adapter=adapter, + pipeline_uuid='pipeline-uuid', + bot_uuid='bot-uuid', + pipeline_config={ + 'ai': { + 'runner': {'runner': 'local-agent'}, + 'local-agent': {'model': {'primary': 'test-model-uuid', 'fallbacks': []}, 'prompt': 'test-prompt'}, + }, + 'output': {'misc': {'remove-think': False}}, + }, + prompt=SimpleNamespace(messages=[]), + messages=[], + user_message=provider_message.Message( + role='user', + content='Please calculate the average of 1, 2, 3, and 4.', + ), + use_funcs=[SimpleNamespace(name='exec')], + use_llm_model_uuid='test-model-uuid', + variables={}, + ) + + +@pytest.mark.asyncio +async def test_localagent_uses_exec_for_exact_calculation(): + provider = RecordingProvider() + model = SimpleNamespace( + provider=provider, + model_entity=SimpleNamespace( + uuid='test-model-uuid', + name='test-model', + abilities=['func_call'], + extra_args={}, + ), + ) + + tool_manager = SimpleNamespace( + execute_func_call=AsyncMock( + return_value={ + 'session_id': 'avg-query', + 'backend': 'podman', + 'status': 'completed', + 'ok': True, + 'exit_code': 0, + 'stdout': '2.5', + 'stderr': '', + 'duration_ms': 18, + } + ) + ) + + app = SimpleNamespace( + logger=Mock(), + model_mgr=SimpleNamespace(get_model_by_uuid=AsyncMock(return_value=model)), + tool_mgr=tool_manager, + rag_mgr=SimpleNamespace(), + box_service=SimpleNamespace( + get_system_guidance=Mock( + return_value=( + 'When the exec tool is available, use it for exact calculations, statistics, ' + 'structured data parsing, and code execution instead of estimating mentally. ' + 'Unless the user explicitly asks for the script, code, or implementation details, ' + 'do not include the generated script in the final answer. ' + 'A default workspace is mounted at /workspace for file tasks.' + ) + ), + ), + skill_mgr=SimpleNamespace( + get_skills_for_pipeline=AsyncMock(return_value=[]), + detect_skill_activation=AsyncMock(return_value=None), + build_activation_prompt=Mock(return_value=None), + ), + ) + + runner = LocalAgentRunner(app, pipeline_config={}) + query = make_query() + + results = [message async for message in runner.run(query)] + + assert [message.role for message in results] == ['assistant', 'tool', 'assistant'] + assert results[-1].content == 'The average is 2.5.' + + tool_manager.execute_func_call.assert_awaited_once() + tool_name, tool_parameters = tool_manager.execute_func_call.await_args.args[:2] + assert tool_name == 'exec' + assert 'print(sum(nums) / len(nums))' in tool_parameters['command'] + + first_request = provider.requests[0] + assert any( + message.role == 'system' + and 'exec' in str(message.content) + and 'exact calculations' in str(message.content) + and 'Unless the user explicitly asks for the script' in str(message.content) + and '/workspace' in str(message.content) + for message in first_request['messages'] + ) + assert [tool.name for tool in first_request['funcs']] == ['exec'] + + +@pytest.mark.asyncio +async def test_localagent_streaming_tool_error_yields_message_chunks(): + provider = RecordingStreamProvider() + model = SimpleNamespace( + provider=provider, + model_entity=SimpleNamespace( + uuid='test-model-uuid', + name='test-model', + abilities=['func_call'], + extra_args={}, + ), + ) + + adapter = AsyncMock() + adapter.is_stream_output_supported = AsyncMock(return_value=True) + + query = make_query() + query.adapter = adapter + + app = SimpleNamespace( + logger=Mock(), + model_mgr=SimpleNamespace(get_model_by_uuid=AsyncMock(return_value=model)), + tool_mgr=SimpleNamespace(execute_func_call=AsyncMock(side_effect=RuntimeError('boom'))), + rag_mgr=SimpleNamespace(), + box_service=SimpleNamespace( + get_system_guidance=Mock(return_value='sandbox guidance'), + ), + skill_mgr=SimpleNamespace( + get_skills_for_pipeline=AsyncMock(return_value=[]), + detect_skill_activation=AsyncMock(return_value=None), + build_activation_prompt=Mock(return_value=None), + ), + ) + + runner = LocalAgentRunner(app, pipeline_config={}) + + results = [message async for message in runner.run(query)] + + assert all(isinstance(message, provider_message.MessageChunk) for message in results) + assert any(message.role == 'tool' and message.content == 'err: boom' for message in results) diff --git a/tests/unit_tests/provider/test_mcp_box_integration.py b/tests/unit_tests/provider/test_mcp_box_integration.py new file mode 100644 index 000000000..0123af4b6 --- /dev/null +++ b/tests/unit_tests/provider/test_mcp_box_integration.py @@ -0,0 +1,712 @@ +"""Tests for MCP Box integration: path rewriting, host_path inference, config model, payloads. + +Uses importlib.util.spec_from_file_location to load mcp.py directly without +triggering the circular import chain through the app module. +""" + +from __future__ import annotations + +import importlib +import importlib.util +import os +import sys +import tempfile +import types +from contextlib import asynccontextmanager +from types import SimpleNamespace +from unittest.mock import AsyncMock, Mock + +import pytest + + +# --------------------------------------------------------------------------- +# Load mcp.py directly from file path, with stub dependencies +# --------------------------------------------------------------------------- + + +def _stub_module(fqn: str, attrs: dict | None = None, is_package: bool = False): + """Create or return a stub module and register it in sys.modules.""" + if fqn in sys.modules: + mod = sys.modules[fqn] + else: + mod = types.ModuleType(fqn) + mod.__spec__ = importlib.machinery.ModuleSpec(fqn, None, is_package=is_package) + if is_package: + mod.__path__ = [] + sys.modules[fqn] = mod + parts = fqn.rsplit('.', 1) + if len(parts) == 2 and parts[0] in sys.modules: + setattr(sys.modules[parts[0]], parts[1], mod) + if attrs: + for k, v in attrs.items(): + setattr(mod, k, v) + return mod + + +@pytest.fixture(scope='module', autouse=True) +def mcp_module(): + """Load mcp.py with minimal stubs to avoid circular imports.""" + saved = {} + + def _save_and_stub(name, attrs=None, is_package=False): + saved[name] = sys.modules.get(name) + # Don't overwrite modules that already exist (from other test modules) + if name in sys.modules: + return + _stub_module(name, attrs, is_package) + + # Stub entire dependency chains as packages / modules + _save_and_stub('langbot_plugin', is_package=True) + _save_and_stub('langbot_plugin.api', is_package=True) + _save_and_stub('langbot_plugin.api.entities', is_package=True) + _save_and_stub('langbot_plugin.api.entities.events', is_package=True) + _save_and_stub('langbot_plugin.api.entities.events.pipeline_query', {}) + _save_and_stub('langbot_plugin.api.entities.builtin', is_package=True) + _save_and_stub('langbot_plugin.api.entities.builtin.resource', is_package=True) + _save_and_stub( + 'langbot_plugin.api.entities.builtin.resource.tool', + { + 'LLMTool': type('LLMTool', (), {}), + }, + ) + _save_and_stub('langbot_plugin.api.entities.builtin.provider', is_package=True) + _save_and_stub('langbot_plugin.api.entities.builtin.provider.message', {}) + _save_and_stub('sqlalchemy', {'select': Mock()}) + _save_and_stub('httpx', {'AsyncClient': Mock()}) + _save_and_stub('mcp', {'ClientSession': Mock, 'StdioServerParameters': Mock}, is_package=True) + _save_and_stub('mcp.client', is_package=True) + _save_and_stub('mcp.client.stdio', {'stdio_client': Mock()}) + _save_and_stub('mcp.client.sse', {'sse_client': Mock()}) + _save_and_stub('mcp.client.streamable_http', {'streamable_http_client': Mock()}) + _save_and_stub('mcp.client.websocket', {'websocket_client': Mock()}) + + # Stub the provider.tools.loader (source of circular import) + _save_and_stub('langbot', is_package=True) + _save_and_stub('langbot.pkg', is_package=True) + _save_and_stub('langbot.pkg.provider', is_package=True) + _save_and_stub('langbot.pkg.provider.tools', is_package=True) + _save_and_stub( + 'langbot.pkg.provider.tools.loader', + { + 'ToolLoader': type('ToolLoader', (), {'__init__': lambda self, ap: None}), + }, + ) + _save_and_stub('langbot.pkg.provider.tools.loaders', is_package=True) + _save_and_stub('langbot.pkg.core', is_package=True) + _save_and_stub('langbot.pkg.core.app', {'Application': type('Application', (), {})}) + _save_and_stub('langbot.pkg.entity', is_package=True) + _save_and_stub('langbot.pkg.entity.persistence', is_package=True) + _save_and_stub('langbot.pkg.entity.persistence.mcp', {}) + + # box models + import enum as _enum + + class _BPS(str, _enum.Enum): + RUNNING = 'running' + EXITED = 'exited' + + _save_and_stub('langbot_plugin.box', is_package=True) + _save_and_stub('langbot_plugin.box.models', {'BoxManagedProcessStatus': _BPS}) + + # Now load mcp.py via spec_from_file_location + mod_fqn = 'langbot.pkg.provider.tools.loaders.mcp' + sys.modules.pop(mod_fqn, None) + mcp_path = os.path.join( + os.path.dirname(__file__), + '..', + '..', + '..', + 'src', + 'langbot', + 'pkg', + 'provider', + 'tools', + 'loaders', + 'mcp.py', + ) + mcp_path = os.path.normpath(mcp_path) + pkg_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(mcp_path)))) + sys.modules['langbot.pkg'].__path__ = [pkg_root] + sys.modules['langbot.pkg.provider.tools.loaders'].__path__ = [os.path.dirname(mcp_path)] + spec = importlib.util.spec_from_file_location(mod_fqn, mcp_path) + mod = importlib.util.module_from_spec(spec) + sys.modules[mod_fqn] = mod + spec.loader.exec_module(mod) + + yield mod + + # Cleanup + sys.modules.pop(mod_fqn, None) + sys.modules.pop('langbot.pkg.provider.tools.loaders.mcp_stdio', None) + sys.modules.pop('langbot.pkg.box.workspace', None) + for name in reversed(list(saved)): + if saved[name] is None: + sys.modules.pop(name, None) + else: + sys.modules[name] = saved[name] + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _make_ap(): + ap = Mock() + ap.logger = Mock() + ap.box_service = Mock() + return ap + + +def _make_session(mcp_module, server_config: dict, ap=None): + if ap is None: + ap = _make_ap() + return mcp_module.RuntimeMCPSession( + server_name=server_config.get('name', 'test-server'), + server_config=server_config, + enable=True, + ap=ap, + ) + + +# ── MCPServerBoxConfig ────────────────────────────────────────────── + + +class TestMCPServerBoxConfig: + def test_default_values(self, mcp_module): + cfg = mcp_module.MCPServerBoxConfig.model_validate({}) + assert cfg.image is None + assert cfg.network == 'on' + assert cfg.host_path is None + assert cfg.host_path_mode == 'ro' + assert cfg.env == {} + assert cfg.startup_timeout_sec == 120 + assert cfg.cpus is None + assert cfg.memory_mb is None + assert cfg.pids_limit is None + assert cfg.read_only_rootfs is None + + def test_custom_values(self, mcp_module): + cfg = mcp_module.MCPServerBoxConfig.model_validate( + { + 'image': 'node:20', + 'network': 'on', + 'host_path': '/home/user/mcp', + 'host_path_mode': 'rw', + 'env': {'FOO': 'bar'}, + 'startup_timeout_sec': 60, + 'cpus': 2.0, + 'memory_mb': 1024, + 'pids_limit': 256, + 'read_only_rootfs': False, + } + ) + assert cfg.image == 'node:20' + assert cfg.network == 'on' + assert cfg.cpus == 2.0 + assert cfg.memory_mb == 1024 + + def test_extra_fields_ignored(self, mcp_module): + cfg = mcp_module.MCPServerBoxConfig.model_validate( + { + 'image': 'node:20', + 'unknown_field': 'whatever', + } + ) + assert cfg.image == 'node:20' + assert not hasattr(cfg, 'unknown_field') + + +# ── Path Rewriting ────────────────────────────────────────────────── + + +class TestRewritePath: + def test_no_host_path_returns_unchanged(self, mcp_module): + s = _make_session( + mcp_module, + { + 'name': 'test', + 'uuid': 'u1', + 'mode': 'sse', + 'command': 'python', + 'args': [], + }, + ) + assert s._rewrite_path('/some/path', None) == '/some/path' + + def test_empty_path_returns_empty(self, mcp_module): + s = _make_session( + mcp_module, + { + 'name': 'test', + 'uuid': 'u1', + 'mode': 'sse', + 'command': 'python', + 'args': [], + }, + ) + assert s._rewrite_path('', '/home/user/mcp') == '' + + def test_prefix_match_rewrites(self, mcp_module): + s = _make_session( + mcp_module, + { + 'name': 'test', + 'uuid': 'u1', + 'mode': 'sse', + 'command': 'python', + 'args': [], + }, + ) + result = s._rewrite_path('/home/user/mcp/server.py', '/home/user/mcp') + assert result == '/workspace/server.py' + + def test_exact_match_rewrites_to_workspace(self, mcp_module): + s = _make_session( + mcp_module, + { + 'name': 'test', + 'uuid': 'u1', + 'mode': 'sse', + 'command': 'python', + 'args': [], + }, + ) + result = s._rewrite_path('/home/user/mcp', '/home/user/mcp') + assert result == '/workspace' + + def test_non_matching_path_unchanged(self, mcp_module): + s = _make_session( + mcp_module, + { + 'name': 'test', + 'uuid': 'u1', + 'mode': 'sse', + 'command': 'python', + 'args': [], + }, + ) + result = s._rewrite_path('/opt/other/server.py', '/home/user/mcp') + assert result == '/opt/other/server.py' + + def test_similar_prefix_not_rewritten(self, mcp_module): + s = _make_session( + mcp_module, + { + 'name': 'test', + 'uuid': 'u1', + 'mode': 'sse', + 'command': 'python', + 'args': [], + }, + ) + result = s._rewrite_path('/home/user/mcp-other/file.py', '/home/user/mcp') + assert result == '/home/user/mcp-other/file.py' + + def test_nested_subpath_rewrites(self, mcp_module): + s = _make_session( + mcp_module, + { + 'name': 'test', + 'uuid': 'u1', + 'mode': 'sse', + 'command': 'python', + 'args': [], + }, + ) + result = s._rewrite_path('/home/user/mcp/src/lib/main.py', '/home/user/mcp') + assert result == '/workspace/src/lib/main.py' + + +# ── host_path Inference ───────────────────────────────────────────── + + +class TestInferHostPath: + def test_no_absolute_paths_returns_none(self, mcp_module): + s = _make_session( + mcp_module, + { + 'name': 'test', + 'uuid': 'u1', + 'mode': 'sse', + 'command': 'python', + 'args': ['server.py'], + }, + ) + assert s._infer_host_path() is None + + def test_nonexistent_path_returns_none(self, mcp_module): + s = _make_session( + mcp_module, + { + 'name': 'test', + 'uuid': 'u1', + 'mode': 'sse', + 'command': '/nonexistent/path/to/python', + 'args': [], + }, + ) + assert s._infer_host_path() is None + + def test_existing_absolute_path_infers_directory(self, mcp_module): + with tempfile.NamedTemporaryFile(suffix='.py') as f: + s = _make_session( + mcp_module, + { + 'name': 'test', + 'uuid': 'u1', + 'mode': 'sse', + 'command': 'python', + 'args': [f.name], + }, + ) + result = s._infer_host_path() + assert result is not None + assert result == os.path.dirname(os.path.realpath(f.name)) + + +# ── Build Box Session Payload ─────────────────────────────────────── + + +class TestBuildBoxSessionPayload: + def test_minimal_config(self, mcp_module): + s = _make_session( + mcp_module, + { + 'name': 'test', + 'uuid': 'u1', + 'mode': 'sse', + 'command': 'python', + 'args': [], + }, + ) + payload = s._build_box_session_payload('session-123') + assert payload['session_id'] == 'session-123' + assert payload['workdir'] == '/workspace' + assert payload['env'] == {} + assert 'host_path' not in payload + + def test_with_host_path(self, mcp_module): + s = _make_session( + mcp_module, + { + 'name': 'test', + 'uuid': 'u1', + 'mode': 'sse', + 'command': 'python', + 'args': [], + 'box': {'host_path': '/home/user/mcp', 'host_path_mode': 'ro'}, + }, + ) + payload = s._build_box_session_payload('session-123') + assert payload['host_path'] == '/home/user/mcp' + assert payload['host_path_mode'] == 'ro' + + def test_optional_fields_included_when_set(self, mcp_module): + s = _make_session( + mcp_module, + { + 'name': 'test', + 'uuid': 'u1', + 'mode': 'sse', + 'command': 'python', + 'args': [], + 'box': {'image': 'node:20', 'cpus': 2.0, 'memory_mb': 1024, 'pids_limit': 256}, + }, + ) + payload = s._build_box_session_payload('session-123') + assert payload['image'] == 'node:20' + assert payload['cpus'] == 2.0 + assert payload['memory_mb'] == 1024 + assert payload['pids_limit'] == 256 + + def test_none_fields_excluded(self, mcp_module): + s = _make_session( + mcp_module, + { + 'name': 'test', + 'uuid': 'u1', + 'mode': 'sse', + 'command': 'python', + 'args': [], + }, + ) + payload = s._build_box_session_payload('session-123') + assert 'image' not in payload + assert 'cpus' not in payload + + +# ── Build Box Process Payload ─────────────────────────────────────── + + +class TestBuildBoxProcessPayload: + def test_basic_payload(self, mcp_module): + s = _make_session( + mcp_module, + { + 'name': 'test', + 'uuid': 'u1', + 'mode': 'sse', + 'command': 'python', + 'args': ['server.py'], + 'env': {'KEY': 'val'}, + }, + ) + payload = s._build_box_process_payload() + assert payload['command'] == 'python' + assert payload['args'] == ['server.py'] + assert payload['env'] == {'KEY': 'val'} + assert payload['cwd'] == '/workspace' + + def test_path_rewriting_applied(self, mcp_module): + s = _make_session( + mcp_module, + { + 'name': 'test', + 'uuid': 'u1', + 'mode': 'sse', + 'command': '/home/user/mcp/venv/bin/python', + 'args': ['/home/user/mcp/server.py', '--config', '/home/user/mcp/config.json'], + 'env': {}, + 'box': {'host_path': '/home/user/mcp'}, + }, + ) + payload = s._build_box_process_payload() + # venv python is replaced with plain 'python' (deps installed in-container) + assert payload['command'] == 'python' + assert payload['args'] == ['/workspace/server.py', '--config', '/workspace/config.json'] + + def test_non_matching_args_not_rewritten(self, mcp_module): + s = _make_session( + mcp_module, + { + 'name': 'test', + 'uuid': 'u1', + 'mode': 'sse', + 'command': 'python', + 'args': ['/opt/other/server.py', '--flag'], + 'env': {}, + 'box': {'host_path': '/home/user/mcp'}, + }, + ) + payload = s._build_box_process_payload() + assert payload['command'] == 'python' + assert payload['args'] == ['/opt/other/server.py', '--flag'] + + +# ── get_runtime_info_dict ─────────────────────────────────────────── + + +class TestGetRuntimeInfoDict: + def test_non_stdio_session(self, mcp_module): + s = _make_session( + mcp_module, + { + 'name': 'test', + 'uuid': 'test-uuid', + 'mode': 'sse', + 'command': 'python', + 'args': [], + }, + ) + info = s.get_runtime_info_dict() + assert info['status'] == 'connecting' + assert 'box_session_id' not in info + + def test_runtime_tools_include_parameters(self, mcp_module): + s = _make_session( + mcp_module, + { + 'name': 'test', + 'uuid': 'test-uuid', + 'mode': 'sse', + 'command': 'python', + 'args': [], + }, + ) + s.functions = [ + SimpleNamespace( + name='create-service', + description='Create a service', + parameters={ + 'type': 'object', + 'properties': { + 'project_id': {'type': 'string'}, + }, + 'required': ['project_id'], + }, + ) + ] + + info = s.get_runtime_info_dict() + + assert info['tools'][0]['parameters']['properties']['project_id']['type'] == 'string' + assert info['tools'][0]['parameters']['required'] == ['project_id'] + + def test_stdio_session_includes_box_info(self, mcp_module): + ap = _make_ap() + ap.box_service.available = True + s = _make_session( + mcp_module, + { + 'name': 'test', + 'uuid': 'test-uuid', + 'mode': 'stdio', + 'command': 'python', + 'args': [], + }, + ap=ap, + ) + info = s.get_runtime_info_dict() + assert info['box_session_id'] == 'mcp-shared' + assert info['box_enabled'] is True + + def test_stdio_session_refuses_when_box_unavailable(self, mcp_module): + """Policy: when Box is configured but unavailable (disabled in config + OR connection failed), stdio MCP servers are NOT treated as box-stdio. + ``_init_stdio_python_server`` will raise a clear refusal at start + time; until then, the runtime info simply omits box_session_id so the + UI can render the disabled state cleanly.""" + ap = _make_ap() + ap.box_service.available = False + s = _make_session( + mcp_module, + { + 'name': 'test', + 'uuid': 'test-uuid', + 'mode': 'stdio', + 'command': 'python', + 'args': [], + }, + ap=ap, + ) + info = s.get_runtime_info_dict() + assert 'box_session_id' not in info + assert 'box_enabled' not in info + + def test_stdio_session_without_box_service_uses_local_stdio(self, mcp_module): + ap = _make_ap() + del ap.box_service + s = _make_session( + mcp_module, + { + 'name': 'test', + 'uuid': 'test-uuid', + 'mode': 'stdio', + 'command': 'python', + 'args': [], + }, + ap=ap, + ) + info = s.get_runtime_info_dict() + assert 'box_session_id' not in info + + +# ── Box config parsing ────────────────────────────────────────────── + + +class TestBoxConfigParsing: + def test_box_config_parsed_from_server_config(self, mcp_module): + s = _make_session( + mcp_module, + { + 'name': 'test', + 'uuid': 'u1', + 'mode': 'sse', + 'command': 'python', + 'args': [], + 'box': {'image': 'node:20', 'host_path': '/home/user/mcp'}, + }, + ) + assert isinstance(s.box_config, mcp_module.MCPServerBoxConfig) + assert s.box_config.image == 'node:20' + assert s.box_config.host_path == '/home/user/mcp' + + def test_missing_box_key_uses_defaults(self, mcp_module): + s = _make_session( + mcp_module, + { + 'name': 'test', + 'uuid': 'u1', + 'mode': 'sse', + 'command': 'python', + 'args': [], + }, + ) + assert isinstance(s.box_config, mcp_module.MCPServerBoxConfig) + assert s.box_config.image is None + assert s.box_config.host_path_mode == 'ro' + + +@pytest.mark.asyncio +async def test_init_box_stdio_server_stages_host_path_in_shared_workspace(mcp_module, tmp_path): + mcp_stdio_module = sys.modules['langbot.pkg.provider.tools.loaders.mcp_stdio'] + + class FakeClientSession: + def __init__(self, *_args): + pass + + async def __aenter__(self): + return self + + async def __aexit__(self, exc_type, exc, tb): + return False + + async def initialize(self): + return None + + @asynccontextmanager + async def fake_websocket_client(_url: str): + yield ('read-stream', 'write-stream') + + mcp_stdio_module.ClientSession = FakeClientSession + mcp_stdio_module.websocket_client = fake_websocket_client + + ap = _make_ap() + ap.box_service.available = True + ap.box_service.default_workspace = str(tmp_path / 'shared-box-workspace') + ap.box_service.create_session = AsyncMock(return_value={}) + ap.box_service.build_spec = Mock(return_value='validated-spec') + ap.box_service.client = SimpleNamespace( + execute=AsyncMock(return_value=SimpleNamespace(ok=True, stderr='', exit_code=0)) + ) + ap.box_service.start_managed_process = AsyncMock(return_value={}) + ap.box_service.get_managed_process_websocket_url = Mock(return_value='ws://box.example/process') + + host_path = tmp_path / 'mcp-source' + host_path.mkdir() + server_file = host_path / 'server.py' + server_file.write_text('print("hello")\n', encoding='utf-8') + + session = _make_session( + mcp_module, + { + 'name': 'test', + 'uuid': 'u1', + 'mode': 'stdio', + 'command': str(host_path / '.venv' / 'bin' / 'python'), + 'args': [str(server_file)], + 'box': {'host_path': str(host_path)}, + }, + ap=ap, + ) + + await session._init_box_stdio_server() + await session.exit_stack.aclose() + + assert ap.box_service.create_session.await_count == 1 + session_payload = ap.box_service.create_session.await_args.args[0] + assert session_payload['session_id'] == 'mcp-shared' + assert 'host_path' not in session_payload + assert ap.box_service.build_spec.call_count == 1 + assert ap.box_service.build_spec.call_args.kwargs.get('skip_host_mount_validation', False) is False + assert ap.box_service.build_spec.call_args.args[0]['host_path'] == str(host_path) + + staged_file = tmp_path / 'shared-box-workspace' / '.mcp' / 'u1' / 'workspace' / 'server.py' + assert staged_file.read_text(encoding='utf-8') == 'print("hello")\n' + + process_payload = ap.box_service.start_managed_process.await_args.args[1] + assert process_payload['process_id'] == 'u1' + assert process_payload['command'] == 'python' + assert process_payload['args'] == ['/workspace/.mcp/u1/workspace/server.py'] + assert process_payload['cwd'] == '/workspace/.mcp/u1/workspace' diff --git a/tests/unit_tests/provider/test_model_service.py b/tests/unit_tests/provider/test_model_service.py index 344cfe398..60ac658e4 100644 --- a/tests/unit_tests/provider/test_model_service.py +++ b/tests/unit_tests/provider/test_model_service.py @@ -169,6 +169,7 @@ async def test_updated_llm_model_is_immediately_usable_by_local_agent_pipeline() ap.logger = Mock() ap.persistence_mgr = SimpleNamespace(execute_async=AsyncMock()) ap.tool_mgr = SimpleNamespace(get_all_tools=AsyncMock(return_value=[])) + ap.skill_mgr = None # PreProcessor only uses skill_mgr for the local-agent skill-binding branch ap.plugin_connector = SimpleNamespace( emit_event=AsyncMock(return_value=SimpleNamespace(event=SimpleNamespace(default_prompt=[], prompt=[]))) ) diff --git a/tests/unit_tests/provider/test_skill_tools.py b/tests/unit_tests/provider/test_skill_tools.py new file mode 100644 index 000000000..00e04bfa0 --- /dev/null +++ b/tests/unit_tests/provider/test_skill_tools.py @@ -0,0 +1,479 @@ +from __future__ import annotations + +import os +import tempfile +from types import SimpleNamespace +from unittest.mock import AsyncMock, Mock + +import pytest + + +def _make_ap(logger=None): + ap = SimpleNamespace() + ap.logger = logger or Mock() + ap.persistence_mgr = Mock() + ap.persistence_mgr.execute_async = AsyncMock(return_value=Mock(all=Mock(return_value=[]))) + ap.persistence_mgr.serialize_model = Mock(side_effect=lambda cls, row: row) + return ap + + +def _make_skill_data( + name='test-skill', + instructions='Do something', + package_root='', + entry_file='SKILL.md', + **kwargs, +): + return { + 'name': name, + 'display_name': kwargs.pop('display_name', name), + 'description': kwargs.pop('description', f'Description of {name}'), + 'instructions': instructions, + 'package_root': package_root, + 'entry_file': entry_file, + **kwargs, + } + + +class TestSkillManagerCache: + """The Box runtime is the only source of truth — SkillManager just holds + an in-memory cache populated by ``reload_skills``. There is no local + filesystem reader anymore.""" + + def test_refresh_skill_from_disk_reports_cache_presence(self): + """Box is the only source of truth for skill content. refresh_skill_from_disk + now just reports whether the skill is still in the in-memory cache — + the actual content refresh is driven by SkillService awaiting + ``reload_skills`` after every Box mutation.""" + from langbot.pkg.skill.manager import SkillManager + + ap = _make_ap() + mgr = SkillManager(ap) + + # Empty cache → returns False + assert mgr.refresh_skill_from_disk('test-skill') is False + + # Cache populated → returns True; method does NOT mutate the cache + cached = _make_skill_data(name='test-skill', instructions='Cached') + mgr.skills['test-skill'] = cached + assert mgr.refresh_skill_from_disk('test-skill') is True + assert mgr.skills['test-skill'] is cached + assert mgr.refresh_skill_from_disk('') is False + + @pytest.mark.asyncio + async def test_reload_skills_drops_box_skills_with_missing_package_root(self): + """When Box reports a skill whose package_root is gone from the + LangBot-visible filesystem, the cache must drop it instead of + keeping a stale entry that would later produce a bad mount.""" + from langbot.pkg.skill.manager import SkillManager + + with tempfile.TemporaryDirectory() as live_dir: + ghost_dir = os.path.join(live_dir, '_does_not_exist') + box_service = SimpleNamespace( + available=True, + list_skills=AsyncMock( + return_value=[ + _make_skill_data(name='alive', package_root=live_dir), + _make_skill_data(name='ghost', package_root=ghost_dir), + ] + ), + ) + + ap = _make_ap() + ap.box_service = box_service + mgr = SkillManager(ap) + + await mgr.reload_skills() + + assert list(mgr.skills) == ['alive'] + # Warning fired with the dropped skill name so operators can see it. + warning_messages = [str(call.args[0]) for call in ap.logger.warning.call_args_list] + assert any('ghost' in msg and 'package_root missing' in msg for msg in warning_messages) + + +class TestSkillActivationHelper: + """Skill activation is now Tool-Call based. + + The legacy text-marker mechanism (``[ACTIVATE_SKILL: x]`` detection, + ``build_activation_prompt_for_skills``, ``remove_activation_marker``, + ``prepare_skill_activation``) has been removed. Activation now goes + through ``skill.activation.register_activated_skill``, invoked by the + ``activate`` Tool Call. + """ + + def test_register_activated_skill_records_known_skill(self): + from langbot.pkg.skill.activation import register_activated_skill + from langbot.pkg.provider.tools.loaders.skill import ACTIVATED_SKILLS_KEY + from langbot.pkg.skill.manager import SkillManager + + ap = _make_ap() + mgr = SkillManager(ap) + mgr.skills = { + 'primary': _make_skill_data(name='primary', instructions='Primary instructions'), + } + ap.skill_mgr = mgr + + query = SimpleNamespace(variables={}) + + assert register_activated_skill(ap, query, 'primary') is True + assert set(query.variables[ACTIVATED_SKILLS_KEY].keys()) == {'primary'} + assert query.variables[ACTIVATED_SKILLS_KEY]['primary']['name'] == 'primary' + + def test_register_activated_skill_rejects_unknown_skill(self): + from langbot.pkg.skill.activation import register_activated_skill + from langbot.pkg.provider.tools.loaders.skill import ACTIVATED_SKILLS_KEY + from langbot.pkg.skill.manager import SkillManager + + ap = _make_ap() + mgr = SkillManager(ap) + mgr.skills = {'primary': _make_skill_data(name='primary')} + ap.skill_mgr = mgr + + query = SimpleNamespace(variables={}) + + assert register_activated_skill(ap, query, 'missing') is False + assert ACTIVATED_SKILLS_KEY not in query.variables + + def test_register_activated_skill_without_skill_manager_returns_false(self): + from langbot.pkg.skill.activation import register_activated_skill + + ap = _make_ap() # no skill_mgr attribute + query = SimpleNamespace(variables={}) + + assert register_activated_skill(ap, query, 'primary') is False + + +class TestSkillPathHelpers: + def test_get_visible_skills_filters_by_bound_names(self): + from langbot.pkg.provider.tools.loaders.skill import PIPELINE_BOUND_SKILLS_KEY, get_visible_skills + + ap = _make_ap() + ap.skill_mgr = SimpleNamespace( + skills={ + 'visible': _make_skill_data(name='visible'), + 'hidden': _make_skill_data(name='hidden'), + } + ) + query = SimpleNamespace(variables={PIPELINE_BOUND_SKILLS_KEY: ['visible']}) + + result = get_visible_skills(ap, query) + + assert list(result.keys()) == ['visible'] + + def test_resolve_virtual_skill_path_allows_visible_skill_reads(self): + from langbot.pkg.provider.tools.loaders.skill import ( + PIPELINE_BOUND_SKILLS_KEY, + resolve_virtual_skill_path, + ) + + ap = _make_ap() + ap.skill_mgr = SimpleNamespace(skills={'demo': _make_skill_data(name='demo')}) + query = SimpleNamespace(variables={PIPELINE_BOUND_SKILLS_KEY: ['demo']}) + + skill, rewritten = resolve_virtual_skill_path( + ap, + query, + '/workspace/.skills/demo/SKILL.md', + include_visible=True, + include_activated=False, + ) + + assert skill['name'] == 'demo' + assert rewritten == '/workspace/SKILL.md' + + def test_build_skill_session_id_uses_name_based_identifier(self): + from langbot.pkg.provider.tools.loaders.skill import build_skill_session_id + + with_launcher = build_skill_session_id( + {'name': 'writer'}, + SimpleNamespace(query_id=42, launcher_type='person', launcher_id='123'), + ) + fallback = build_skill_session_id({'name': 'writer'}, SimpleNamespace(query_id=99)) + + assert with_launcher == 'skill-person_123-writer' + assert fallback == 'skill-99-writer' + + def test_should_prepare_skill_python_env_detects_manifests_and_venv(self): + from langbot.pkg.provider.tools.loaders.skill import should_prepare_skill_python_env + + with tempfile.TemporaryDirectory() as tmpdir: + assert should_prepare_skill_python_env(tmpdir) is False + + with open(os.path.join(tmpdir, 'requirements.txt'), 'w', encoding='utf-8') as f: + f.write('requests==2.32.0\n') + assert should_prepare_skill_python_env(tmpdir) is True + + with tempfile.TemporaryDirectory() as tmpdir: + os.makedirs(os.path.join(tmpdir, '.venv')) + assert should_prepare_skill_python_env(tmpdir) is True + + def test_wrap_skill_command_with_python_env_bootstraps_then_runs_command(self): + from langbot.pkg.provider.tools.loaders.skill import wrap_skill_command_with_python_env + + command = wrap_skill_command_with_python_env('python scripts/run.py') + + assert 'python -m venv "$_LB_VENV_DIR"' in command + assert 'export VIRTUAL_ENV="$_LB_VENV_DIR"' in command + assert command.rstrip().endswith('python scripts/run.py') + + +class TestSkillToolLoader: + """The skill tool surface is now just ``activate`` + ``register_skill``. + + The legacy CRUD authoring tools (create/list/get/update/delete/ + import_skill_from_directory/reload_skills) were removed; skill CRUD is + handled by SkillService via the HTTP API / web UI instead. + """ + + @pytest.mark.asyncio + async def test_activate_returns_instructions_and_registers_skill(self): + from langbot.pkg.provider.tools.loaders.skill_authoring import ( + ACTIVATE_SKILL_TOOL_NAME, + SkillToolLoader, + ) + from langbot.pkg.provider.tools.loaders.skill import ACTIVATED_SKILLS_KEY + + skill = _make_skill_data(name='demo', package_root='/data/skills/demo', instructions='Step 1') + ap = _make_ap() + ap.skill_mgr = SimpleNamespace( + skills={'demo': skill}, + get_skill_by_name=lambda name: skill if name == 'demo' else None, + ) + + loader = SkillToolLoader(ap) + query = SimpleNamespace(variables={}) + + result = await loader.invoke_tool(ACTIVATE_SKILL_TOOL_NAME, {'skill_name': 'demo'}, query) + + assert result['activated'] is True + assert result['skill_name'] == 'demo' + assert result['mount_path'] == '/workspace/.skills/demo' + assert 'Step 1' in result['content'] + assert set(query.variables[ACTIVATED_SKILLS_KEY].keys()) == {'demo'} + + @pytest.mark.asyncio + async def test_activate_unknown_skill_raises(self): + from langbot.pkg.provider.tools.loaders.skill_authoring import ( + ACTIVATE_SKILL_TOOL_NAME, + SkillToolLoader, + ) + + ap = _make_ap() + ap.skill_mgr = SimpleNamespace( + skills={'demo': _make_skill_data(name='demo')}, + get_skill_by_name=lambda name: None, + ) + + loader = SkillToolLoader(ap) + + with pytest.raises(ValueError, match='not found'): + await loader.invoke_tool( + ACTIVATE_SKILL_TOOL_NAME, + {'skill_name': 'ghost'}, + SimpleNamespace(variables={}), + ) + + @pytest.mark.asyncio + async def test_register_skill_scans_directory_and_creates_skill(self): + from langbot.pkg.provider.tools.loaders.skill_authoring import ( + REGISTER_SKILL_TOOL_NAME, + SkillToolLoader, + ) + + with tempfile.TemporaryDirectory() as tmpdir: + repo_dir = os.path.join(tmpdir, 'repo') + os.makedirs(repo_dir) + + ap = _make_ap() + ap.box_service = SimpleNamespace(default_workspace=tmpdir, available=True) + ap.skill_service = SimpleNamespace( + scan_directory_async=AsyncMock( + return_value={ + 'name': 'cloned-skill', + 'display_name': 'Cloned Skill', + 'description': 'Imported from clone', + 'instructions': 'Do work', + } + ), + create_skill=AsyncMock( + return_value=_make_skill_data(name='cloned-skill', package_root=os.path.realpath(repo_dir)) + ), + ) + + loader = SkillToolLoader(ap) + result = await loader.invoke_tool( + REGISTER_SKILL_TOOL_NAME, + {'path': '/workspace/repo'}, + SimpleNamespace(), + ) + + ap.skill_service.scan_directory_async.assert_awaited_once_with(os.path.realpath(repo_dir)) + ap.skill_service.create_skill.assert_awaited_once_with( + { + 'name': 'cloned-skill', + 'display_name': 'Cloned Skill', + 'description': 'Imported from clone', + 'instructions': 'Do work', + 'package_root': os.path.realpath(repo_dir), + } + ) + assert result['registered'] is True + assert result['skill_name'] == 'cloned-skill' + assert result['source_path'] == '/workspace/repo' + + @pytest.mark.asyncio + async def test_register_skill_rejects_workspace_escape(self): + from langbot.pkg.provider.tools.loaders.skill_authoring import ( + REGISTER_SKILL_TOOL_NAME, + SkillToolLoader, + ) + + with tempfile.TemporaryDirectory() as tmpdir: + ap = _make_ap() + ap.box_service = SimpleNamespace(default_workspace=tmpdir, available=True) + ap.skill_service = SimpleNamespace(scan_directory_async=AsyncMock(), create_skill=AsyncMock()) + + loader = SkillToolLoader(ap) + + with pytest.raises(ValueError, match='escapes the workspace boundary'): + await loader.invoke_tool( + REGISTER_SKILL_TOOL_NAME, + {'path': '/workspace/../../etc'}, + SimpleNamespace(), + ) + + @pytest.mark.asyncio + async def test_register_skill_requires_skill_service(self): + from langbot.pkg.provider.tools.loaders.skill_authoring import ( + REGISTER_SKILL_TOOL_NAME, + SkillToolLoader, + ) + + with tempfile.TemporaryDirectory() as tmpdir: + ap = _make_ap() # no skill_service attribute + ap.box_service = SimpleNamespace(default_workspace=tmpdir, available=True) + + loader = SkillToolLoader(ap) + + with pytest.raises(ValueError, match='Skill service not available'): + await loader.invoke_tool( + REGISTER_SKILL_TOOL_NAME, + {'path': '/workspace/foo'}, + SimpleNamespace(), + ) + + @pytest.mark.asyncio + async def test_tools_hidden_when_sandbox_backend_unavailable(self): + from langbot.pkg.provider.tools.loaders.skill_authoring import SkillToolLoader + + ap = _make_ap() + ap.skill_mgr = SimpleNamespace(skills={}) + ap.box_service = SimpleNamespace( + available=True, + get_status=AsyncMock(return_value={'backend': {'available': False}}), + ) + + loader = SkillToolLoader(ap) + await loader.initialize() + + assert await loader.get_tools() == [] + assert await loader.has_tool('activate') is False + assert await loader.has_tool('register_skill') is False + + @pytest.mark.asyncio + async def test_tools_exposed_when_sandbox_backend_available(self): + from langbot.pkg.provider.tools.loaders.skill_authoring import SkillToolLoader + + ap = _make_ap() + ap.skill_mgr = SimpleNamespace(skills={'demo': _make_skill_data(name='demo')}) + ap.box_service = SimpleNamespace( + available=True, + get_status=AsyncMock(return_value={'backend': {'available': True}}), + ) + + loader = SkillToolLoader(ap) + await loader.initialize() + + tools = await loader.get_tools() + + assert sorted(tool.name for tool in tools) == ['activate', 'register_skill'] + assert await loader.has_tool('activate') is True + assert await loader.has_tool('register_skill') is True + + +class TestNativeToolLoaderSkillPaths: + @pytest.mark.asyncio + async def test_read_visible_skill_file(self): + from langbot.pkg.provider.tools.loaders.native import NativeToolLoader + from langbot.pkg.provider.tools.loaders.skill import PIPELINE_BOUND_SKILLS_KEY + + with tempfile.TemporaryDirectory() as tmpdir: + skill_md = os.path.join(tmpdir, 'SKILL.md') + with open(skill_md, 'w', encoding='utf-8') as f: + f.write('demo instructions') + + ap = _make_ap() + ap.box_service = SimpleNamespace(available=True, default_workspace=tmpdir) + ap.skill_mgr = SimpleNamespace(skills={'demo': _make_skill_data(name='demo', package_root=tmpdir)}) + loader = NativeToolLoader(ap) + + result = await loader.invoke_tool( + 'read', + {'path': '/workspace/.skills/demo/SKILL.md'}, + SimpleNamespace(query_id='q1', variables={PIPELINE_BOUND_SKILLS_KEY: ['demo']}), + ) + + assert result == {'ok': True, 'content': 'demo instructions'} + + @pytest.mark.asyncio + async def test_exec_in_activated_skill_mount_rewrites_command_and_refreshes(self): + from langbot.pkg.provider.tools.loaders.native import NativeToolLoader + from langbot.pkg.provider.tools.loaders.skill import register_activated_skill + + with tempfile.TemporaryDirectory() as tmpdir: + ap = _make_ap() + ap.box_service = SimpleNamespace( + available=True, + default_workspace=tmpdir, + execute_tool=AsyncMock(return_value={'ok': True}), + ) + ap.skill_mgr = SimpleNamespace(refresh_skill_from_disk=Mock()) + loader = NativeToolLoader(ap) + + query = SimpleNamespace(query_id='q1', launcher_type='person', launcher_id='123', variables={}) + register_activated_skill(query, _make_skill_data(name='demo', package_root=tmpdir)) + + result = await loader.invoke_tool( + 'exec', + { + 'command': 'python /workspace/.skills/demo/scripts/run.py', + 'workdir': '/workspace/.skills/demo', + }, + query, + ) + + assert result == {'ok': True} + tool_parameters = ap.box_service.execute_tool.await_args.args[0] + assert tool_parameters['command'] == 'python /workspace/.skills/demo/scripts/run.py' + assert tool_parameters['workdir'] == '/workspace/.skills/demo' + ap.skill_mgr.refresh_skill_from_disk.assert_called_once_with('demo') + + @pytest.mark.asyncio + async def test_write_requires_skill_activation(self): + from langbot.pkg.provider.tools.loaders.native import NativeToolLoader + from langbot.pkg.provider.tools.loaders.skill import PIPELINE_BOUND_SKILLS_KEY + + with tempfile.TemporaryDirectory() as tmpdir: + ap = _make_ap() + ap.box_service = SimpleNamespace(available=True, default_workspace=tmpdir) + ap.skill_mgr = SimpleNamespace(skills={'demo': _make_skill_data(name='demo', package_root=tmpdir)}) + loader = NativeToolLoader(ap) + + query = SimpleNamespace(query_id='q1', variables={PIPELINE_BOUND_SKILLS_KEY: ['demo']}) + + with pytest.raises(ValueError, match='Skill "demo" is not available at this path'): + await loader.invoke_tool( + 'write', + {'path': '/workspace/.skills/demo/notes.txt', 'content': 'hi'}, + query, + ) diff --git a/tests/unit_tests/provider/test_tool_manager.py b/tests/unit_tests/provider/test_tool_manager.py index 867b2e221..8e8439f52 100644 --- a/tests/unit_tests/provider/test_tool_manager.py +++ b/tests/unit_tests/provider/test_tool_manager.py @@ -4,6 +4,7 @@ - Tool schema generation for OpenAI and Anthropic - Tool execution dispatch """ + from __future__ import annotations import pytest @@ -52,11 +53,12 @@ def mock_app(self): @pytest.fixture def sample_tools(self): """Create sample LLMTool list for testing.""" + def dummy_weather_func(**kwargs): - return "weather result" + return 'weather result' def dummy_calc_func(**kwargs): - return "calc result" + return 'calc result' tools = [ resource_tool.LLMTool( @@ -65,15 +67,10 @@ def dummy_calc_func(**kwargs): description='Get current weather for a location', parameters={ 'type': 'object', - 'properties': { - 'location': { - 'type': 'string', - 'description': 'City name' - } - }, - 'required': ['location'] + 'properties': {'location': {'type': 'string', 'description': 'City name'}}, + 'required': ['location'], }, - func=dummy_weather_func + func=dummy_weather_func, ), resource_tool.LLMTool( name='calculate', @@ -81,15 +78,10 @@ def dummy_calc_func(**kwargs): description='Perform a calculation', parameters={ 'type': 'object', - 'properties': { - 'expression': { - 'type': 'string', - 'description': 'Math expression' - } - }, - 'required': ['expression'] + 'properties': {'expression': {'type': 'string', 'description': 'Math expression'}}, + 'required': ['expression'], }, - func=dummy_calc_func + func=dummy_calc_func, ), ] return tools @@ -188,26 +180,48 @@ class TestToolManagerExecuteFuncCall: @pytest.fixture def mock_app_with_loaders(self): - """Create mock app with mock tool loaders.""" + """Create mock app with mock tool loaders. + + Returns (app, plugin_loader, mcp_loader). The native and skill loaders + are attached directly to the app for tests that don't need to assert + against them — they all default to ``has_tool == False`` so the + execute_func_call probe falls through to the plugin/mcp pair. + """ mock_app = Mock() mock_app.logger = Mock() + def _make_inert_loader(): + loader = Mock() + loader.has_tool = AsyncMock(return_value=False) + loader.invoke_tool = AsyncMock(return_value=None) + loader.initialize = AsyncMock() + loader.shutdown = AsyncMock() + return loader + # Create mock plugin loader - mock_plugin_loader = Mock() - mock_plugin_loader.has_tool = AsyncMock(return_value=False) + mock_plugin_loader = _make_inert_loader() mock_plugin_loader.invoke_tool = AsyncMock(return_value='plugin_result') - mock_plugin_loader.initialize = AsyncMock() - mock_plugin_loader.shutdown = AsyncMock() # Create mock MCP loader - mock_mcp_loader = Mock() - mock_mcp_loader.has_tool = AsyncMock(return_value=False) + mock_mcp_loader = _make_inert_loader() mock_mcp_loader.invoke_tool = AsyncMock(return_value='mcp_result') - mock_mcp_loader.initialize = AsyncMock() - mock_mcp_loader.shutdown = AsyncMock() + + # Stash inert native/skill loaders so the ToolManager probe order + # (native → plugin → mcp → skill) doesn't AttributeError. Tests that + # need to override these can replace the attributes on the manager. + mock_app._inert_native_loader = _make_inert_loader() + mock_app._inert_skill_loader = _make_inert_loader() return mock_app, mock_plugin_loader, mock_mcp_loader + @staticmethod + def _wire_loaders(manager, mock_app, plugin_loader, mcp_loader): + """Attach all four loaders (native + plugin + mcp + skill) to manager.""" + manager.native_tool_loader = mock_app._inert_native_loader + manager.plugin_tool_loader = plugin_loader + manager.mcp_tool_loader = mcp_loader + manager.skill_tool_loader = mock_app._inert_skill_loader + @pytest.fixture def sample_query(self): """Create sample query for testing.""" @@ -215,9 +229,7 @@ def sample_query(self): return query @pytest.mark.asyncio - async def test_execute_calls_plugin_loader_when_has_tool( - self, mock_app_with_loaders, sample_query - ): + async def test_execute_calls_plugin_loader_when_has_tool(self, mock_app_with_loaders, sample_query): """Test that execute_func_call uses plugin loader when tool exists there.""" toolmgr = get_toolmgr_module() @@ -225,26 +237,17 @@ async def test_execute_calls_plugin_loader_when_has_tool( mock_plugin_loader.has_tool = AsyncMock(return_value=True) manager = toolmgr.ToolManager(mock_app) - manager.plugin_tool_loader = mock_plugin_loader - manager.mcp_tool_loader = mock_mcp_loader + self._wire_loaders(manager, mock_app, mock_plugin_loader, mock_mcp_loader) - result = await manager.execute_func_call( - 'test_tool', - {'param': 'value'}, - sample_query - ) + result = await manager.execute_func_call('test_tool', {'param': 'value'}, sample_query) assert result == 'plugin_result' - mock_plugin_loader.invoke_tool.assert_called_once_with( - 'test_tool', {'param': 'value'}, sample_query - ) + mock_plugin_loader.invoke_tool.assert_called_once_with('test_tool', {'param': 'value'}, sample_query) # MCP loader should not be called mock_mcp_loader.invoke_tool.assert_not_called() @pytest.mark.asyncio - async def test_execute_calls_mcp_loader_when_plugin_not_found( - self, mock_app_with_loaders, sample_query - ): + async def test_execute_calls_mcp_loader_when_plugin_not_found(self, mock_app_with_loaders, sample_query): """Test that execute_func_call uses MCP loader when plugin doesn't have tool.""" toolmgr = get_toolmgr_module() @@ -253,24 +256,15 @@ async def test_execute_calls_mcp_loader_when_plugin_not_found( mock_mcp_loader.has_tool = AsyncMock(return_value=True) manager = toolmgr.ToolManager(mock_app) - manager.plugin_tool_loader = mock_plugin_loader - manager.mcp_tool_loader = mock_mcp_loader + self._wire_loaders(manager, mock_app, mock_plugin_loader, mock_mcp_loader) - result = await manager.execute_func_call( - 'test_tool', - {'param': 'value'}, - sample_query - ) + result = await manager.execute_func_call('test_tool', {'param': 'value'}, sample_query) assert result == 'mcp_result' - mock_mcp_loader.invoke_tool.assert_called_once_with( - 'test_tool', {'param': 'value'}, sample_query - ) + mock_mcp_loader.invoke_tool.assert_called_once_with('test_tool', {'param': 'value'}, sample_query) @pytest.mark.asyncio - async def test_execute_raises_when_tool_not_found( - self, mock_app_with_loaders, sample_query - ): + async def test_execute_raises_when_tool_not_found(self, mock_app_with_loaders, sample_query): """Test that execute_func_call raises ValueError when tool not found.""" toolmgr = get_toolmgr_module() @@ -279,20 +273,13 @@ async def test_execute_raises_when_tool_not_found( mock_mcp_loader.has_tool = AsyncMock(return_value=False) manager = toolmgr.ToolManager(mock_app) - manager.plugin_tool_loader = mock_plugin_loader - manager.mcp_tool_loader = mock_mcp_loader + self._wire_loaders(manager, mock_app, mock_plugin_loader, mock_mcp_loader) with pytest.raises(ValueError, match='未找到工具'): - await manager.execute_func_call( - 'unknown_tool', - {}, - sample_query - ) + await manager.execute_func_call('unknown_tool', {}, sample_query) @pytest.mark.asyncio - async def test_plugin_loader_checked_first( - self, mock_app_with_loaders, sample_query - ): + async def test_plugin_loader_checked_first(self, mock_app_with_loaders, sample_query): """Test that plugin loader is checked before MCP loader.""" toolmgr = get_toolmgr_module() @@ -302,8 +289,7 @@ async def test_plugin_loader_checked_first( mock_mcp_loader.has_tool = AsyncMock(return_value=True) manager = toolmgr.ToolManager(mock_app) - manager.plugin_tool_loader = mock_plugin_loader - manager.mcp_tool_loader = mock_mcp_loader + self._wire_loaders(manager, mock_app, mock_plugin_loader, mock_mcp_loader) await manager.execute_func_call('test_tool', {}, sample_query) @@ -317,20 +303,30 @@ class TestToolManagerShutdown: @pytest.mark.asyncio async def test_shutdown_calls_loader_shutdown(self): - """Test that shutdown calls shutdown on both loaders.""" + """Test that shutdown calls shutdown on every registered loader.""" toolmgr = get_toolmgr_module() mock_app = Mock() - mock_plugin_loader = Mock() - mock_plugin_loader.shutdown = AsyncMock() - mock_mcp_loader = Mock() - mock_mcp_loader.shutdown = AsyncMock() + + def _make_loader(): + loader = Mock() + loader.shutdown = AsyncMock() + return loader + + mock_native_loader = _make_loader() + mock_plugin_loader = _make_loader() + mock_mcp_loader = _make_loader() + mock_skill_loader = _make_loader() manager = toolmgr.ToolManager(mock_app) + manager.native_tool_loader = mock_native_loader manager.plugin_tool_loader = mock_plugin_loader manager.mcp_tool_loader = mock_mcp_loader + manager.skill_tool_loader = mock_skill_loader await manager.shutdown() + mock_native_loader.shutdown.assert_called_once() mock_plugin_loader.shutdown.assert_called_once() - mock_mcp_loader.shutdown.assert_called_once() \ No newline at end of file + mock_mcp_loader.shutdown.assert_called_once() + mock_skill_loader.shutdown.assert_called_once() diff --git a/tests/unit_tests/provider/test_tool_manager_native.py b/tests/unit_tests/provider/test_tool_manager_native.py new file mode 100644 index 000000000..117a20fd3 --- /dev/null +++ b/tests/unit_tests/provider/test_tool_manager_native.py @@ -0,0 +1,250 @@ +from __future__ import annotations + +import os +import tempfile +from types import SimpleNamespace +from unittest.mock import AsyncMock, Mock + +import pytest + +import langbot_plugin.api.entities.builtin.resource.tool as resource_tool + +from langbot.pkg.provider.tools.loaders.native import NativeToolLoader +from langbot.pkg.provider.tools.toolmgr import ToolManager + + +class StubLoader: + def __init__(self, tools: list[resource_tool.LLMTool] | None = None, invoke_result=None): + self._tools = tools or [] + self._invoke_result = invoke_result + + async def get_tools(self, *_args, **_kwargs): + return self._tools + + async def has_tool(self, name: str) -> bool: + return any(tool.name == name for tool in self._tools) + + async def invoke_tool(self, name: str, parameters: dict, query): + return self._invoke_result(name, parameters, query) if callable(self._invoke_result) else self._invoke_result + + async def shutdown(self): + return None + + +def make_tool(name: str) -> resource_tool.LLMTool: + return resource_tool.LLMTool( + name=name, + human_desc=name, + description=name, + parameters={'type': 'object', 'properties': {}}, + func=lambda parameters: parameters, + ) + + +@pytest.mark.asyncio +async def test_tool_manager_omits_skill_authoring_tools_by_default(): + manager = ToolManager(SimpleNamespace()) + manager.native_tool_loader = StubLoader([make_tool('exec')]) + manager.skill_tool_loader = StubLoader([make_tool('activate')]) + manager.plugin_tool_loader = StubLoader([make_tool('plugin_tool')]) + manager.mcp_tool_loader = StubLoader([make_tool('mcp_tool')]) + + tools = await manager.get_all_tools() + + assert [tool.name for tool in tools] == ['exec', 'plugin_tool', 'mcp_tool'] + + +@pytest.mark.asyncio +async def test_tool_manager_includes_skill_authoring_tools_when_requested(): + manager = ToolManager(SimpleNamespace()) + manager.native_tool_loader = StubLoader([make_tool('exec')]) + manager.skill_tool_loader = StubLoader([make_tool('activate')]) + manager.plugin_tool_loader = StubLoader([make_tool('plugin_tool')]) + manager.mcp_tool_loader = StubLoader([make_tool('mcp_tool')]) + + tools = await manager.get_all_tools(include_skill_authoring=True) + + assert [tool.name for tool in tools] == ['exec', 'activate', 'plugin_tool', 'mcp_tool'] + + +@pytest.mark.asyncio +async def test_tool_manager_routes_native_tool_calls(): + app = SimpleNamespace() + manager = ToolManager(app) + manager.native_tool_loader = StubLoader([make_tool('exec')], invoke_result={'backend': 'fake'}) + manager.skill_tool_loader = StubLoader([make_tool('activate')]) + manager.plugin_tool_loader = StubLoader([make_tool('plugin_tool')]) + manager.mcp_tool_loader = StubLoader([make_tool('mcp_tool')]) + + result = await manager.execute_func_call('exec', {'command': 'pwd'}, query=Mock()) + + assert result == {'backend': 'fake'} + + +@pytest.mark.asyncio +async def test_native_tool_loader_hides_tools_when_box_unavailable(): + loader = NativeToolLoader(SimpleNamespace(box_service=SimpleNamespace(available=False))) + + assert await loader.get_tools() == [] + for tool_name in ('exec', 'read', 'write', 'edit', 'glob', 'grep'): + assert await loader.has_tool(tool_name) is False + + +@pytest.mark.asyncio +async def test_native_tool_loader_exposes_all_tools_when_box_available(): + box_service = SimpleNamespace( + available=True, + get_status=AsyncMock(return_value={'backend': {'available': True}}), + ) + loader = NativeToolLoader(SimpleNamespace(box_service=box_service, logger=Mock())) + await loader.initialize() + + tools = await loader.get_tools() + + assert [tool.name for tool in tools] == ['exec', 'read', 'write', 'edit', 'glob', 'grep'] + for tool_name in ('exec', 'read', 'write', 'edit', 'glob', 'grep'): + assert await loader.has_tool(tool_name) is True + + +# ── read/write/edit file tool tests ───────────────────────────── + + +def _make_loader_with_workspace(tmpdir: str) -> tuple[NativeToolLoader, Mock]: + logger = Mock() + box_service = SimpleNamespace(available=True, default_workspace=tmpdir) + ap = SimpleNamespace(box_service=box_service, logger=logger) + return NativeToolLoader(ap), logger + + +def _make_query() -> Mock: + q = Mock() + q.query_id = 'test-query-1' + return q + + +@pytest.mark.asyncio +async def test_read_file(): + with tempfile.TemporaryDirectory() as tmpdir: + loader, _ = _make_loader_with_workspace(tmpdir) + with open(os.path.join(tmpdir, 'hello.txt'), 'w') as f: + f.write('hello world') + + result = await loader.invoke_tool('read', {'path': '/workspace/hello.txt'}, _make_query()) + + assert result['ok'] is True + assert result['content'] == 'hello world' + + +@pytest.mark.asyncio +async def test_read_nonexistent_file(): + with tempfile.TemporaryDirectory() as tmpdir: + loader, _ = _make_loader_with_workspace(tmpdir) + + result = await loader.invoke_tool('read', {'path': '/workspace/no_such.txt'}, _make_query()) + + assert result['ok'] is False + assert 'not found' in result['error'].lower() + + +@pytest.mark.asyncio +async def test_read_directory(): + with tempfile.TemporaryDirectory() as tmpdir: + loader, _ = _make_loader_with_workspace(tmpdir) + os.makedirs(os.path.join(tmpdir, 'subdir')) + with open(os.path.join(tmpdir, 'a.txt'), 'w') as f: + f.write('a') + + result = await loader.invoke_tool('read', {'path': '/workspace'}, _make_query()) + + assert result['ok'] is True + assert result['is_directory'] is True + assert 'a.txt' in result['content'] + + +@pytest.mark.asyncio +async def test_write_creates_file(): + with tempfile.TemporaryDirectory() as tmpdir: + loader, _ = _make_loader_with_workspace(tmpdir) + + result = await loader.invoke_tool( + 'write', {'path': '/workspace/new.txt', 'content': 'new content'}, _make_query() + ) + + assert result['ok'] is True + with open(os.path.join(tmpdir, 'new.txt')) as f: + assert f.read() == 'new content' + + +@pytest.mark.asyncio +async def test_write_creates_subdirectories(): + with tempfile.TemporaryDirectory() as tmpdir: + loader, _ = _make_loader_with_workspace(tmpdir) + + result = await loader.invoke_tool( + 'write', {'path': '/workspace/sub/deep/file.txt', 'content': 'nested'}, _make_query() + ) + + assert result['ok'] is True + with open(os.path.join(tmpdir, 'sub', 'deep', 'file.txt')) as f: + assert f.read() == 'nested' + + +@pytest.mark.asyncio +async def test_edit_replaces_unique_string(): + with tempfile.TemporaryDirectory() as tmpdir: + loader, _ = _make_loader_with_workspace(tmpdir) + with open(os.path.join(tmpdir, 'code.py'), 'w') as f: + f.write('def foo():\n return 1\n') + + result = await loader.invoke_tool( + 'edit', + {'path': '/workspace/code.py', 'old_string': 'return 1', 'new_string': 'return 42'}, + _make_query(), + ) + + assert result['ok'] is True + with open(os.path.join(tmpdir, 'code.py')) as f: + assert f.read() == 'def foo():\n return 42\n' + + +@pytest.mark.asyncio +async def test_edit_rejects_ambiguous_match(): + with tempfile.TemporaryDirectory() as tmpdir: + loader, _ = _make_loader_with_workspace(tmpdir) + with open(os.path.join(tmpdir, 'dup.txt'), 'w') as f: + f.write('aaa\naaa\n') + + result = await loader.invoke_tool( + 'edit', + {'path': '/workspace/dup.txt', 'old_string': 'aaa', 'new_string': 'bbb'}, + _make_query(), + ) + + assert result['ok'] is False + assert '2' in result['error'] + + +@pytest.mark.asyncio +async def test_edit_rejects_missing_string(): + with tempfile.TemporaryDirectory() as tmpdir: + loader, _ = _make_loader_with_workspace(tmpdir) + with open(os.path.join(tmpdir, 'x.txt'), 'w') as f: + f.write('hello') + + result = await loader.invoke_tool( + 'edit', + {'path': '/workspace/x.txt', 'old_string': 'nope', 'new_string': 'yes'}, + _make_query(), + ) + + assert result['ok'] is False + assert 'not found' in result['error'].lower() + + +@pytest.mark.asyncio +async def test_path_escape_blocked(): + with tempfile.TemporaryDirectory() as tmpdir: + loader, _ = _make_loader_with_workspace(tmpdir) + + with pytest.raises(ValueError, match='escapes'): + await loader.invoke_tool('read', {'path': '/workspace/../../etc/passwd'}, _make_query()) diff --git a/tests/unit_tests/test_paths.py b/tests/unit_tests/test_paths.py new file mode 100644 index 000000000..c1e84f443 --- /dev/null +++ b/tests/unit_tests/test_paths.py @@ -0,0 +1,23 @@ +from pathlib import Path + +from src.langbot.pkg.utils import paths + + +def test_get_data_root_uses_source_root_in_repo_checkout(): + data_root = Path(paths.get_data_root()) + repo_root = Path(__file__).resolve().parents[2] + + assert data_root == repo_root / 'data' + + +def test_get_data_path_joins_under_data_root(): + data_path = Path(paths.get_data_path('skills', 'demo-skill')) + repo_root = Path(__file__).resolve().parents[2] + + assert data_path == repo_root / 'data' / 'skills' / 'demo-skill' + + +def test_get_data_root_honors_env_override(monkeypatch, tmp_path): + monkeypatch.setenv('LANGBOT_DATA_ROOT', str(tmp_path / 'custom-data')) + + assert Path(paths.get_data_root()) == (tmp_path / 'custom-data').resolve() diff --git a/tests/unit_tests/test_preproc.py b/tests/unit_tests/test_preproc.py new file mode 100644 index 000000000..3164f35b8 --- /dev/null +++ b/tests/unit_tests/test_preproc.py @@ -0,0 +1,204 @@ +from __future__ import annotations + +import importlib +import sys +import types +from types import SimpleNamespace +from unittest.mock import AsyncMock, Mock + +import pytest + +from langbot_plugin.api.entities.builtin.pipeline.query import Query +from langbot_plugin.api.entities.builtin.platform.entities import Friend +from langbot_plugin.api.entities.builtin.platform.events import FriendMessage +from langbot_plugin.api.entities.builtin.platform.message import MessageChain, Plain +from langbot_plugin.api.entities.builtin.provider.message import Message +from langbot_plugin.api.entities.builtin.provider.prompt import Prompt +from langbot_plugin.api.entities.builtin.provider.session import Conversation, LauncherTypes, Session + + +def _make_query() -> Query: + message_chain = MessageChain([Plain(text='create a skill')]) + return Query( + query_id=1, + launcher_type=LauncherTypes.PERSON, + launcher_id='launcher-1', + sender_id='sender-1', + message_event=FriendMessage( + message_chain=message_chain, + time=0, + sender=Friend(id='sender-1', nickname='Tester', remark='Tester'), + ), + message_chain=message_chain, + bot_uuid='bot-1', + pipeline_uuid='pipe-1', + pipeline_config={ + 'ai': { + 'runner': {'runner': 'local-agent'}, + 'local-agent': { + 'model': {'primary': 'model-1', 'fallbacks': []}, + 'prompt': 'default', + 'knowledge-bases': [], + }, + }, + 'trigger': {'misc': {}}, + }, + variables={}, + ) + + +def _make_conversation() -> Conversation: + return Conversation( + prompt=Prompt(name='default', messages=[Message(role='system', content='system prompt')]), + messages=[], + pipeline_uuid='pipe-1', + bot_uuid='bot-1', + uuid='conv-1', + ) + + +def _make_app(*, skill_service) -> SimpleNamespace: + session = Session(launcher_type=LauncherTypes.PERSON, launcher_id='launcher-1', sender_id='sender-1') + conversation = _make_conversation() + model = SimpleNamespace(model_entity=SimpleNamespace(uuid='model-1', abilities={'func_call'})) + tool_mgr = SimpleNamespace(get_all_tools=AsyncMock(return_value=[])) + + return SimpleNamespace( + sess_mgr=SimpleNamespace( + get_session=AsyncMock(return_value=session), + get_conversation=AsyncMock(return_value=conversation), + ), + model_mgr=SimpleNamespace(get_model_by_uuid=AsyncMock(return_value=model)), + tool_mgr=tool_mgr, + plugin_connector=SimpleNamespace( + emit_event=AsyncMock( + return_value=SimpleNamespace( + event=SimpleNamespace( + default_prompt=conversation.prompt.messages.copy(), + prompt=conversation.messages.copy(), + ) + ) + ) + ), + pipeline_service=SimpleNamespace( + get_pipeline=AsyncMock(return_value={'extensions_preferences': {'enable_all_skills': True}}) + ), + skill_mgr=SimpleNamespace( + build_skill_aware_prompt_addition=Mock(return_value=''), + skills={}, + ), + skill_service=skill_service, + logger=Mock(), + ) + + +def _import_preproc_modules(): + fake_app_module = types.ModuleType('langbot.pkg.core.app') + fake_app_module.Application = object + sys.modules['langbot.pkg.core.app'] = fake_app_module + + for module_name in ( + 'langbot.pkg.pipeline.preproc.preproc', + 'langbot.pkg.pipeline.stage', + ): + sys.modules.pop(module_name, None) + + preproc_module = importlib.import_module('langbot.pkg.pipeline.preproc.preproc') + entities_module = importlib.import_module('langbot.pkg.pipeline.entities') + return preproc_module, entities_module + + +@pytest.mark.asyncio +async def test_preproc_enables_skill_authoring_tools_when_skill_service_available(): + preproc_module, entities_module = _import_preproc_modules() + + app = _make_app(skill_service=SimpleNamespace()) + stage = preproc_module.PreProcessor(app) + + result = await stage.process(_make_query(), 'PreProcessor') + + assert result.result_type == entities_module.ResultType.CONTINUE + app.tool_mgr.get_all_tools.assert_awaited_once_with(None, None, include_skill_authoring=True) + + +@pytest.mark.asyncio +async def test_preproc_disables_skill_authoring_tools_when_skill_service_missing(): + preproc_module, entities_module = _import_preproc_modules() + + app = _make_app(skill_service=None) + stage = preproc_module.PreProcessor(app) + + result = await stage.process(_make_query(), 'PreProcessor') + + assert result.result_type == entities_module.ResultType.CONTINUE + app.tool_mgr.get_all_tools.assert_awaited_once_with(None, None, include_skill_authoring=False) + + +@pytest.mark.asyncio +async def test_preproc_injects_skill_index_into_system_prompt(): + """The Tool Call activation pattern still needs the LLM to know which + skills exist. PreProcessor must append the SkillManager's index + addendum to the first system message.""" + preproc_module, entities_module = _import_preproc_modules() + + app = _make_app(skill_service=SimpleNamespace()) + addendum = '\n\nAvailable Skills:\n- demo (demo): Demo skill.\n\nCall activate ...' + app.skill_mgr.build_skill_aware_prompt_addition = Mock(return_value=addendum) + + query = _make_query() + result = await stage_process_capture(preproc_module, app, query) + + assert result.result_type == entities_module.ResultType.CONTINUE + app.skill_mgr.build_skill_aware_prompt_addition.assert_called_once_with(bound_skills=None) + head = query.prompt.messages[0] + assert head.role == 'system' + assert head.content.endswith(addendum) + + +@pytest.mark.asyncio +async def test_preproc_respects_pipeline_bound_skills_subset(): + """When ``enable_all_skills`` is false the bound list is passed through + so the addendum only mentions skills allowed for this pipeline.""" + preproc_module, entities_module = _import_preproc_modules() + + app = _make_app(skill_service=SimpleNamespace()) + app.pipeline_service.get_pipeline = AsyncMock( + return_value={ + 'extensions_preferences': { + 'enable_all_skills': False, + 'skills': ['only-this'], + } + } + ) + app.skill_mgr.build_skill_aware_prompt_addition = Mock(return_value='') + + query = _make_query() + result = await stage_process_capture(preproc_module, app, query) + + assert result.result_type == entities_module.ResultType.CONTINUE + app.skill_mgr.build_skill_aware_prompt_addition.assert_called_once_with(bound_skills=['only-this']) + assert query.variables.get('_pipeline_bound_skills') == ['only-this'] + + +@pytest.mark.asyncio +async def test_preproc_skips_injection_when_addendum_is_empty(): + """No visible skills → system prompt is left untouched (no + ``Available Skills`` block appended).""" + preproc_module, entities_module = _import_preproc_modules() + + app = _make_app(skill_service=SimpleNamespace()) + app.skill_mgr.build_skill_aware_prompt_addition = Mock(return_value='') + + query = _make_query() + result = await stage_process_capture(preproc_module, app, query) + + assert result.result_type == entities_module.ResultType.CONTINUE + if query.prompt and query.prompt.messages: + assert 'Available Skills' not in (query.prompt.messages[0].content or '') + + +async def stage_process_capture(preproc_module, app, query): + """Run PreProcessor.process and return the result while keeping ``query`` + accessible to the assertions (process mutates query in place).""" + stage = preproc_module.PreProcessor(app) + return await stage.process(query, 'PreProcessor') diff --git a/tests/unit_tests/test_skill_service.py b/tests/unit_tests/test_skill_service.py new file mode 100644 index 000000000..6fd7d64f2 --- /dev/null +++ b/tests/unit_tests/test_skill_service.py @@ -0,0 +1,89 @@ +from types import SimpleNamespace +from unittest.mock import AsyncMock + +import pytest + +from langbot.pkg.api.http.service.skill import SkillService + + +class TestRequireBoxForWrite: + """Box is the only source of truth for skills — there is no local + filesystem fallback. Every write and (most) read methods refuse cleanly + when the Box runtime is disabled, unreachable, or simply not installed.""" + + def _ap_with_disabled_box(self): + return SimpleNamespace( + skill_mgr=SimpleNamespace(reload_skills=AsyncMock()), + box_service=SimpleNamespace( + available=False, + enabled=False, + _connector_error='Box runtime is disabled in config (box.enabled = false)', + ), + ) + + def _ap_with_failed_box(self): + return SimpleNamespace( + skill_mgr=SimpleNamespace(reload_skills=AsyncMock()), + box_service=SimpleNamespace( + available=False, + enabled=True, + _connector_error='docker daemon not running', + ), + ) + + @pytest.mark.asyncio + async def test_create_skill_refused_when_box_disabled(self): + service = SkillService(self._ap_with_disabled_box()) + with pytest.raises(ValueError, match='disabled in config'): + await service.create_skill({'name': 'x'}) + + @pytest.mark.asyncio + async def test_create_skill_refused_when_box_failed(self): + service = SkillService(self._ap_with_failed_box()) + with pytest.raises(ValueError, match='docker daemon not running'): + await service.create_skill({'name': 'x'}) + + @pytest.mark.asyncio + async def test_update_skill_refused_when_box_disabled(self): + service = SkillService(self._ap_with_disabled_box()) + with pytest.raises(ValueError, match='Editing a skill requires the Box runtime'): + await service.update_skill('x', {}) + + @pytest.mark.asyncio + async def test_write_skill_file_refused_when_box_disabled(self): + service = SkillService(self._ap_with_disabled_box()) + with pytest.raises(ValueError, match='Editing skill files requires the Box runtime'): + await service.write_skill_file('x', 'a.txt', 'hi') + + @pytest.mark.asyncio + async def test_install_from_github_refused_when_box_disabled(self): + service = SkillService(self._ap_with_disabled_box()) + with pytest.raises(ValueError, match='Installing a skill from GitHub'): + await service.install_from_github({'owner': 'o', 'repo': 'r', 'asset_url': 'https://example/x.zip'}) + + @pytest.mark.asyncio + async def test_install_from_zip_upload_refused_when_box_disabled(self): + service = SkillService(self._ap_with_disabled_box()) + with pytest.raises(ValueError, match='Installing a skill from upload'): + await service.install_from_zip_upload(file_bytes=b'', filename='x.zip') + + @pytest.mark.asyncio + async def test_create_skill_refused_when_box_service_missing_entirely(self): + """No ap.box_service attribute at all (truly minimal setup): + Box is the only source of truth, so creation must still refuse.""" + service = SkillService(SimpleNamespace(skill_mgr=SimpleNamespace(reload_skills=AsyncMock()))) + with pytest.raises(ValueError, match='not initialised'): + await service.create_skill({'name': 'x'}) + + @pytest.mark.asyncio + async def test_list_skills_returns_empty_when_box_unavailable(self): + """list_skills should render an empty surface (not crash) so the + skills page can show a banner instead of a broken state.""" + service = SkillService(self._ap_with_disabled_box()) + assert await service.list_skills() == [] + + @pytest.mark.asyncio + async def test_read_skill_file_refused_when_box_unavailable(self): + service = SkillService(self._ap_with_disabled_box()) + with pytest.raises(ValueError, match='Reading a skill file'): + await service.read_skill_file('x', 'a.txt') diff --git a/tests/unit_tests/utils/test_paths.py b/tests/unit_tests/utils/test_paths.py index 390c82702..0043a3338 100644 --- a/tests/unit_tests/utils/test_paths.py +++ b/tests/unit_tests/utils/test_paths.py @@ -11,7 +11,6 @@ import os import pytest -from unittest.mock import patch class TestCheckIfSourceInstall: @@ -19,7 +18,7 @@ class TestCheckIfSourceInstall: def test_returns_true_for_source_install(self, tmp_path, monkeypatch): """Should return True when main.py with LangBot marker exists.""" - main_py = tmp_path / "main.py" + main_py = tmp_path / 'main.py' main_py.write_text('# LangBot/main.py\n# This is the entry point') monkeypatch.chdir(tmp_path) @@ -33,52 +32,14 @@ def test_returns_true_for_source_install(self, tmp_path, monkeypatch): paths._is_source_install = None - def test_returns_false_when_no_main_py(self, tmp_path, monkeypatch): - """Should return False when main.py doesn't exist.""" - monkeypatch.chdir(tmp_path) - - from langbot.pkg.utils import paths - - paths._is_source_install = None - - result = paths._check_if_source_install() - assert result is False - - paths._is_source_install = None - - def test_returns_false_when_main_py_without_marker(self, tmp_path, monkeypatch): - """Should return False when main.py exists but lacks LangBot marker.""" - main_py = tmp_path / "main.py" - main_py.write_text('# Some other project\nprint("hello")') - - monkeypatch.chdir(tmp_path) - - from langbot.pkg.utils import paths - - paths._is_source_install = None - - result = paths._check_if_source_install() - assert result is False - - paths._is_source_install = None - - def test_handles_io_error_gracefully(self, tmp_path, monkeypatch): - """Should return False when main.py cannot be read.""" - main_py = tmp_path / "main.py" - main_py.write_text('# LangBot/main.py\n') - - monkeypatch.chdir(tmp_path) - - from langbot.pkg.utils import paths - - paths._is_source_install = None - - # Patch open to raise IOError - with patch("builtins.open", side_effect=IOError("Cannot read")): - result = paths._check_if_source_install() - assert result is False - - paths._is_source_install = None + # Note: ``_check_if_source_install`` was refactored to walk + # ``Path(__file__).resolve().parents`` looking for ``pyproject.toml`` + + # ``main.py`` instead of relying on the cwd. That makes it robust to where + # the process is launched from but also means the old "cwd doesn't have + # main.py" / "main.py without marker" / "IOError on read" cases no longer + # apply — there's no file read at all. The corresponding negative tests + # were removed; ``test_returns_true_for_source_install`` still exercises + # the positive path because the repo checkout itself is a source install. class TestGetFrontendPath: @@ -92,16 +53,16 @@ def test_returns_web_dist_by_default(self): result = paths.get_frontend_path() # The result should contain web/dist or be an absolute path to it - assert "web/dist" in result or result.endswith("dist") + assert 'web/dist' in result or result.endswith('dist') paths._is_source_install = None def test_finds_dist_directory_in_source_mode(self, tmp_path, monkeypatch): """Should find web/dist when running from source mode.""" - main_py = tmp_path / "main.py" + main_py = tmp_path / 'main.py' main_py.write_text('# LangBot/main.py\n') - web_dist = tmp_path / "web" / "dist" + web_dist = tmp_path / 'web' / 'dist' web_dist.mkdir(parents=True) monkeypatch.chdir(tmp_path) @@ -111,18 +72,18 @@ def test_finds_dist_directory_in_source_mode(self, tmp_path, monkeypatch): paths._is_source_install = None result = paths.get_frontend_path() - assert result == "web/dist" + assert result == 'web/dist' paths._is_source_install = None def test_prefers_dist_over_out_in_source_mode(self, tmp_path, monkeypatch): """Should prefer web/dist over web/out when both exist in source mode.""" - main_py = tmp_path / "main.py" + main_py = tmp_path / 'main.py' main_py.write_text('# LangBot/main.py\n') - web_dist = tmp_path / "web" / "dist" + web_dist = tmp_path / 'web' / 'dist' web_dist.mkdir(parents=True) - web_out = tmp_path / "web" / "out" + web_out = tmp_path / 'web' / 'out' web_out.mkdir(parents=True) monkeypatch.chdir(tmp_path) @@ -132,7 +93,7 @@ def test_prefers_dist_over_out_in_source_mode(self, tmp_path, monkeypatch): paths._is_source_install = None result = paths.get_frontend_path() - assert result == "web/dist" + assert result == 'web/dist' paths._is_source_install = None @@ -148,19 +109,19 @@ def test_returns_original_path_when_not_found(self, tmp_path, monkeypatch): paths._is_source_install = None - result = paths.get_resource_path("nonexistent/file.txt") - assert result == "nonexistent/file.txt" + result = paths.get_resource_path('nonexistent/file.txt') + assert result == 'nonexistent/file.txt' paths._is_source_install = None def test_finds_resource_in_current_directory_source_mode(self, tmp_path, monkeypatch): """Should find resource in current directory when in source mode.""" - main_py = tmp_path / "main.py" + main_py = tmp_path / 'main.py' main_py.write_text('# LangBot/main.py\n') - resource_file = tmp_path / "templates" / "config.yaml" + resource_file = tmp_path / 'templates' / 'config.yaml' resource_file.parent.mkdir(parents=True, exist_ok=True) - resource_file.write_text("test: value") + resource_file.write_text('test: value') monkeypatch.chdir(tmp_path) @@ -168,18 +129,18 @@ def test_finds_resource_in_current_directory_source_mode(self, tmp_path, monkeyp paths._is_source_install = None - result = paths.get_resource_path("templates/config.yaml") + result = paths.get_resource_path('templates/config.yaml') assert os.path.exists(result) paths._is_source_install = None def test_returns_relative_path_in_source_mode(self, tmp_path, monkeypatch): """Should return relative path if resource exists in source mode.""" - main_py = tmp_path / "main.py" + main_py = tmp_path / 'main.py' main_py.write_text('# LangBot/main.py\n') - resource_file = tmp_path / "test_resource.txt" - resource_file.write_text("test content") + resource_file = tmp_path / 'test_resource.txt' + resource_file.write_text('test content') monkeypatch.chdir(tmp_path) @@ -187,8 +148,8 @@ def test_returns_relative_path_in_source_mode(self, tmp_path, monkeypatch): paths._is_source_install = None - result = paths.get_resource_path("test_resource.txt") - assert result == "test_resource.txt" + result = paths.get_resource_path('test_resource.txt') + assert result == 'test_resource.txt' paths._is_source_install = None @@ -198,7 +159,7 @@ class TestPathFunctionsCaching: def test_source_install_cache_is_used(self, tmp_path, monkeypatch): """_check_if_source_install should use cached result.""" - main_py = tmp_path / "main.py" + main_py = tmp_path / 'main.py' main_py.write_text('# LangBot/main.py\n') monkeypatch.chdir(tmp_path) @@ -219,5 +180,5 @@ def test_source_install_cache_is_used(self, tmp_path, monkeypatch): paths._is_source_install = None -if __name__ == "__main__": - pytest.main([__file__, "-v"]) \ No newline at end of file +if __name__ == '__main__': + pytest.main([__file__, '-v']) diff --git a/tests/unit_tests/utils/test_version.py b/tests/unit_tests/utils/test_version.py deleted file mode 100644 index df698caf8..000000000 --- a/tests/unit_tests/utils/test_version.py +++ /dev/null @@ -1,136 +0,0 @@ -""" -Unit tests for version utility functions. - -Tests version comparison logic without network calls. -""" - -from __future__ import annotations - -from unittest.mock import Mock - -from langbot.pkg.utils.version import VersionManager - - -class TestVersionComparison: - """Tests for version comparison functions.""" - - def _create_version_manager(self): - """Create a VersionManager with mock app.""" - mock_app = Mock() - mock_app.proxy_mgr = Mock() - mock_app.proxy_mgr.get_forward_providers = Mock(return_value={}) - mock_app.logger = Mock() - return VersionManager(mock_app) - - def test_is_newer_same_version(self): - """is_newer returns False for same version.""" - vm = self._create_version_manager() - result = vm.is_newer('v1.0.0', 'v1.0.0') - assert result is False - - def test_is_newer_different_major_version(self): - """is_newer returns False for different major version.""" - # Note: is_newer ignores major version changes - vm = self._create_version_manager() - result = vm.is_newer('v2.0.0', 'v1.0.0') - assert result is False - - def test_is_newer_minor_update(self): - """is_newer returns True for minor update within same major.""" - vm = self._create_version_manager() - result = vm.is_newer('v1.1.0', 'v1.0.0') - assert result is True - - def test_is_newer_patch_update(self): - """is_newer returns True for patch update within same major.""" - vm = self._create_version_manager() - result = vm.is_newer('v1.0.1', 'v1.0.0') - assert result is True - - def test_is_newer_with_fourth_segment(self): - """is_newer ignores fourth version segment.""" - # Both have same first 3 segments - vm = self._create_version_manager() - result = vm.is_newer('v1.0.0.1', 'v1.0.0.0') - assert result is False - - def test_is_newer_short_version(self): - """is_newer handles short version numbers.""" - vm = self._create_version_manager() - result = vm.is_newer('v1.0', 'v1.0') - assert result is False - - def test_is_newer_older_version(self): - """is_newer returns True when new > old.""" - vm = self._create_version_manager() - result = vm.is_newer('v1.2.0', 'v1.1.0') - assert result is True - - -class TestCompareVersionStr: - """Tests for compare_version_str static method.""" - - def test_compare_equal_versions(self): - """Equal versions return 0.""" - result = VersionManager.compare_version_str('v1.0.0', 'v1.0.0') - assert result == 0 - - def test_compare_without_v_prefix(self): - """Versions without v prefix work the same.""" - result = VersionManager.compare_version_str('1.0.0', '1.0.0') - assert result == 0 - - def test_compare_mixed_prefix(self): - """Mixed v prefix works correctly.""" - result = VersionManager.compare_version_str('v1.0.0', '1.0.0') - assert result == 0 - - def test_compare_first_greater(self): - """First version greater returns 1.""" - result = VersionManager.compare_version_str('v1.1.0', 'v1.0.0') - assert result == 1 - - def test_compare_first_smaller(self): - """First version smaller returns -1.""" - result = VersionManager.compare_version_str('v1.0.0', 'v1.1.0') - assert result == -1 - - def test_compare_different_lengths(self): - """Different length versions are padded with zeros.""" - result = VersionManager.compare_version_str('v1.0', 'v1.0.0') - assert result == 0 - - def test_compare_shorter_greater(self): - """Shorter version padded, first still greater.""" - result = VersionManager.compare_version_str('v1.1', 'v1.0.0') - assert result == 1 - - def test_compare_longer_greater(self): - """Longer version, first smaller.""" - result = VersionManager.compare_version_str('v1.0', 'v1.0.1') - assert result == -1 - - def test_compare_major_version(self): - """Major version comparison.""" - result = VersionManager.compare_version_str('v2.0.0', 'v1.9.9') - assert result == 1 - - def test_compare_minor_version(self): - """Minor version comparison.""" - result = VersionManager.compare_version_str('v1.5.0', 'v1.4.9') - assert result == 1 - - def test_compare_patch_version(self): - """Patch version comparison.""" - result = VersionManager.compare_version_str('v1.0.1', 'v1.0.0') - assert result == 1 - - def test_compare_four_segments(self): - """Four segment version comparison.""" - result = VersionManager.compare_version_str('v1.0.0.1', 'v1.0.0.0') - assert result == 1 - - def test_compare_long_versions(self): - """Long version strings work correctly.""" - result = VersionManager.compare_version_str('v1.2.3.4.5', 'v1.2.3.4.4') - assert result == 1 diff --git a/uv.lock b/uv.lock index fc56bbbc0..0c40474fa 100644 --- a/uv.lock +++ b/uv.lock @@ -560,6 +560,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ef/71/9a2c88abb5fe47b46168b262254d5b5d635de371eba4bd01ea5c8c109575/botocore-1.42.39-py3-none-any.whl", hash = "sha256:9e0d0fed9226449cc26fcf2bbffc0392ac698dd8378e8395ce54f3ec13f81d58", size = 14591958, upload-time = "2026-01-30T20:38:14.814Z" }, ] +[[package]] +name = "bracex" +version = "2.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/63/9a/fec38644694abfaaeca2798b58e276a8e61de49e2e37494ace423395febc/bracex-2.6.tar.gz", hash = "sha256:98f1347cd77e22ee8d967a30ad4e310b233f7754dbf31ff3fceb76145ba47dc7", size = 26642, upload-time = "2025-06-22T19:12:31.254Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9d/2a/9186535ce58db529927f6cf5990a849aa9e052eea3e2cfefe20b9e1802da/bracex-2.6-py3-none-any.whl", hash = "sha256:0b0049264e7340b3ec782b5cb99beb325f36c3782a32e36e876452fd49a09952", size = 11508, upload-time = "2025-06-22T19:12:29.781Z" }, +] + [[package]] name = "build" version = "1.4.0" @@ -1086,6 +1095,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277, upload-time = "2023-12-24T09:54:30.421Z" }, ] +[[package]] +name = "dockerfile-parse" +version = "2.0.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/92/df/929ee0b5d2c8bd8d713c45e71b94ab57c7e11e322130724d54f469b2cd48/dockerfile-parse-2.0.1.tar.gz", hash = "sha256:3184ccdc513221983e503ac00e1aa504a2aa8f84e5de673c46b0b6eee99ec7bc", size = 24556, upload-time = "2023-07-18T13:36:07.897Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7a/6c/79cd5bc1b880d8c1a9a5550aa8dacd57353fa3bb2457227e1fb47383eb49/dockerfile_parse-2.0.1-py2.py3-none-any.whl", hash = "sha256:bdffd126d2eb26acf1066acb54cb2e336682e1d72b974a40894fac76a4df17f6", size = 14845, upload-time = "2023-07-18T13:36:06.052Z" }, +] + [[package]] name = "docstring-parser" version = "0.17.0" @@ -1115,6 +1133,28 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b0/0d/9feae160378a3553fa9a339b0e9c1a048e147a4127210e286ef18b730f03/durationpy-0.10-py3-none-any.whl", hash = "sha256:3b41e1b601234296b4fb368338fdcd3e13e0b4fb5b67345948f4f2bf9868b286", size = 3922, upload-time = "2025-05-17T13:52:36.463Z" }, ] +[[package]] +name = "e2b" +version = "2.21.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "attrs" }, + { name = "dockerfile-parse" }, + { name = "h2" }, + { name = "httpcore" }, + { name = "httpx" }, + { name = "packaging" }, + { name = "protobuf" }, + { name = "python-dateutil" }, + { name = "rich" }, + { name = "typing-extensions" }, + { name = "wcmatch" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f7/97/0e86ccb9e05c18e6e795e0808f14e2dc9f5c9ffb7be2a5cb77afd6d9f59e/e2b-2.21.1.tar.gz", hash = "sha256:2eff473ca03173cee1ccd9f9ec9e90c2b4705cca418e080e3104753d7ec33490", size = 157458, upload-time = "2026-05-14T17:36:02.318Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6f/d4/8b6a9a120e724dd8f91aededa89348a667a01fffbba28ae1a42cb397b0f0/e2b-2.21.1-py3-none-any.whl", hash = "sha256:9ec4646f3dba4a6da855baa8adeab239aa988e15904611e38bf12aeec2562ac9", size = 297476, upload-time = "2026-05-14T17:36:00.351Z" }, +] + [[package]] name = "ebooklib" version = "0.20" @@ -1859,7 +1899,7 @@ wheels = [ [[package]] name = "langbot" -version = "4.9.7" +version = "4.10.0b1" source = { editable = "." } dependencies = [ { name = "aiocqhttp" }, @@ -1973,7 +2013,7 @@ requires-dist = [ { name = "ebooklib", specifier = ">=0.18" }, { name = "gewechat-client", specifier = ">=0.1.5" }, { name = "html2text", specifier = ">=2024.2.26" }, - { name = "langbot-plugin", specifier = "==0.3.11" }, + { name = "langbot-plugin", specifier = "==0.4.0b1" }, { name = "langchain", specifier = ">=0.2.0" }, { name = "langchain-core", specifier = ">=1.2.28" }, { name = "langchain-text-splitters", specifier = ">=1.1.2" }, @@ -2036,11 +2076,13 @@ dev = [ [[package]] name = "langbot-plugin" -version = "0.3.11" +version = "0.4.0b1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiofiles" }, + { name = "aiohttp" }, { name = "dotenv" }, + { name = "e2b" }, { name = "httpx" }, { name = "jinja2" }, { name = "pip" }, @@ -2054,9 +2096,9 @@ dependencies = [ { name = "watchdog" }, { name = "websockets" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/91/83/93b86bcdbfe51d820fa59232aaa73cc802d6ce614f67d8f8b33957419538/langbot_plugin-0.3.11.tar.gz", hash = "sha256:8d10c98c771b468b2d35cc007778439c39922a88265fcc16a5881234bc7c1b19", size = 190315, upload-time = "2026-05-12T15:45:24.262Z" } +sdist = { url = "https://files.pythonhosted.org/packages/8a/e0/4bb2fd08813879d3da390f588b2927ae626edcfd45dca36900e2e54fb23c/langbot_plugin-0.4.0b1.tar.gz", hash = "sha256:f523c197ff9f5aa3db737e29765ebe1f7a8c96f973240ce3769ccccd0bfddde7", size = 216965, upload-time = "2026-05-21T05:23:27.682Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/8f/22/de7977a6a5cbf557b80043eb3ed39e5feff24033a5d6db4ab88d48ccb6ea/langbot_plugin-0.3.11-py3-none-any.whl", hash = "sha256:c1d2e84eda1584902d99efa316b850c08c1c04fcc199306ff4af1dca1431304a", size = 165574, upload-time = "2026-05-12T15:45:22.908Z" }, + { url = "https://files.pythonhosted.org/packages/a0/59/9c6df7cd652d3434d1139ee8392e170108e5f980046b9a55bff324e094fe/langbot_plugin-0.4.0b1-py3-none-any.whl", hash = "sha256:b533407296399c7693255678a4d1390be957dabffa21ca2982e56d28a728854b", size = 194310, upload-time = "2026-05-21T05:23:26.215Z" }, ] [[package]] @@ -2751,7 +2793,7 @@ wheels = [ [[package]] name = "moto" version = "5.2.1" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } +source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "boto3" }, { name = "botocore" }, @@ -2761,9 +2803,9 @@ dependencies = [ { name = "werkzeug" }, { name = "xmltodict" }, ] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f6/e9/c38202162db2e76623176be9f1dbc9aa41228ffa91ee8da2d3986082c3e3/moto-5.2.1.tar.gz", hash = "sha256:ccb2f3e1dfa82e50e054bda98b0be708d244d2668364dcc1d45e8d3de6091bde", size = 8634437, upload-time = "2026-05-10T19:11:57.286Z" } +sdist = { url = "https://files.pythonhosted.org/packages/f6/e9/c38202162db2e76623176be9f1dbc9aa41228ffa91ee8da2d3986082c3e3/moto-5.2.1.tar.gz", hash = "sha256:ccb2f3e1dfa82e50e054bda98b0be708d244d2668364dcc1d45e8d3de6091bde", size = 8634437, upload-time = "2026-05-10T19:11:57.286Z" } wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/15/79/8085b7c1ecd48d0535c3c8444a1d8df2926e457dce8e55fabc332a382c9c/moto-5.2.1-py3-none-any.whl", hash = "sha256:19d2fbd6e613aa5b4e364c52cd5d3cea371643a0f4210689a703227bd2924c5c", size = 6671379, upload-time = "2026-05-10T19:11:53.543Z" }, + { url = "https://files.pythonhosted.org/packages/15/79/8085b7c1ecd48d0535c3c8444a1d8df2926e457dce8e55fabc332a382c9c/moto-5.2.1-py3-none-any.whl", hash = "sha256:19d2fbd6e613aa5b4e364c52cd5d3cea371643a0f4210689a703227bd2924c5c", size = 6671379, upload-time = "2026-05-10T19:11:53.543Z" }, ] [[package]] @@ -4767,15 +4809,15 @@ wheels = [ [[package]] name = "responses" version = "0.26.0" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } +source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "pyyaml" }, { name = "requests" }, { name = "urllib3" }, ] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9f/b4/b7e040379838cc71bf5aabdb26998dfbe5ee73904c92c1c161faf5de8866/responses-0.26.0.tar.gz", hash = "sha256:c7f6923e6343ef3682816ba421c006626777893cb0d5e1434f674b649bac9eb4", size = 81303, upload-time = "2026-02-19T14:38:05.574Z" } +sdist = { url = "https://files.pythonhosted.org/packages/9f/b4/b7e040379838cc71bf5aabdb26998dfbe5ee73904c92c1c161faf5de8866/responses-0.26.0.tar.gz", hash = "sha256:c7f6923e6343ef3682816ba421c006626777893cb0d5e1434f674b649bac9eb4", size = 81303, upload-time = "2026-02-19T14:38:05.574Z" } wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ce/04/7f73d05b556da048923e31a0cc878f03be7c5425ed1f268082255c75d872/responses-0.26.0-py3-none-any.whl", hash = "sha256:03ec4409088cd5c66b71ecbbbd27fe2c58ddfad801c66203457b3e6a04868c37", size = 35099, upload-time = "2026-02-19T14:38:03.847Z" }, + { url = "https://files.pythonhosted.org/packages/ce/04/7f73d05b556da048923e31a0cc878f03be7c5425ed1f268082255c75d872/responses-0.26.0-py3-none-any.whl", hash = "sha256:03ec4409088cd5c66b71ecbbbd27fe2c58ddfad801c66203457b3e6a04868c37", size = 35099, upload-time = "2026-02-19T14:38:03.847Z" }, ] [[package]] @@ -5931,6 +5973,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6e/d4/ed38dd3b1767193de971e694aa544356e63353c33a85d948166b5ff58b9e/watchfiles-1.1.1-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3e6f39af2eab0118338902798b5aa6664f46ff66bc0280de76fca67a7f262a49", size = 457546, upload-time = "2025-10-14T15:06:13.372Z" }, ] +[[package]] +name = "wcmatch" +version = "10.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "bracex" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/79/3e/c0bdc27cf06f4e47680bd5803a07cb3dfd17de84cde92dd217dcb9e05253/wcmatch-10.1.tar.gz", hash = "sha256:f11f94208c8c8484a16f4f48638a85d771d9513f4ab3f37595978801cb9465af", size = 117421, upload-time = "2025-06-22T19:14:02.49Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/eb/d8/0d1d2e9d3fabcf5d6840362adcf05f8cf3cd06a73358140c3a97189238ae/wcmatch-10.1-py3-none-any.whl", hash = "sha256:5848ace7dbb0476e5e55ab63c6bbd529745089343427caa5537f230cc01beb8a", size = 39854, upload-time = "2025-06-22T19:14:00.978Z" }, +] + [[package]] name = "websocket-client" version = "1.9.0" @@ -6072,10 +6126,10 @@ wheels = [ [[package]] name = "xmltodict" version = "1.0.4" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/19/70/80f3b7c10d2630aa66414bf23d210386700aa390547278c789afa994fd7e/xmltodict-1.0.4.tar.gz", hash = "sha256:6d94c9f834dd9e44514162799d344d815a3a4faec913717a9ecbfa5be1bb8e61", size = 26124, upload-time = "2026-02-22T02:21:22.074Z" } +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/19/70/80f3b7c10d2630aa66414bf23d210386700aa390547278c789afa994fd7e/xmltodict-1.0.4.tar.gz", hash = "sha256:6d94c9f834dd9e44514162799d344d815a3a4faec913717a9ecbfa5be1bb8e61", size = 26124, upload-time = "2026-02-22T02:21:22.074Z" } wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/38/34/98a2f52245f4d47be93b580dae5f9861ef58977d73a79eb47c58f1ad1f3a/xmltodict-1.0.4-py3-none-any.whl", hash = "sha256:a4a00d300b0e1c59fc2bfccb53d7b2e88c32f200df138a0dd2229f842497026a", size = 13580, upload-time = "2026-02-22T02:21:21.039Z" }, + { url = "https://files.pythonhosted.org/packages/38/34/98a2f52245f4d47be93b580dae5f9861ef58977d73a79eb47c58f1ad1f3a/xmltodict-1.0.4-py3-none-any.whl", hash = "sha256:a4a00d300b0e1c59fc2bfccb53d7b2e88c32f200df138a0dd2229f842497026a", size = 13580, upload-time = "2026-02-22T02:21:21.039Z" }, ] [[package]] diff --git a/web/src/app/home/add-extension/page.tsx b/web/src/app/home/add-extension/page.tsx new file mode 100644 index 000000000..4c8445a61 --- /dev/null +++ b/web/src/app/home/add-extension/page.tsx @@ -0,0 +1,1274 @@ +import MarketPage from '@/app/home/plugins/components/plugin-market/PluginMarketComponent'; +import { + Dialog, + DialogContent, + DialogHeader, + DialogTitle, + DialogFooter, +} from '@/components/ui/dialog'; +import { Button } from '@/components/ui/button'; +import { + Popover, + PopoverContent, + PopoverTrigger, +} from '@/components/ui/popover'; +import { + Download, + PlusIcon, + ChevronLeft, + ChevronRight, + Server, + Github, + BookOpen, + FileArchive, + Loader2, + CircleHelp, +} from 'lucide-react'; +import { Input } from '@/components/ui/input'; +import { + Tooltip, + TooltipContent, + TooltipTrigger, +} from '@/components/ui/tooltip'; +import React, { useState, useCallback, useEffect, useRef } from 'react'; +import { useNavigate, useSearchParams } from 'react-router-dom'; +import { httpClient, systemInfo } from '@/app/infra/http/HttpClient'; +import { toast } from 'sonner'; +import { useTranslation } from 'react-i18next'; +import { PluginV4 } from '@/app/infra/entities/plugin'; +import type { Skill } from '@/app/infra/entities/api'; +import { useSidebarData } from '@/app/home/components/home-sidebar/SidebarDataContext'; +import { usePluginInstallTasks } from '@/app/home/plugins/components/plugin-install-task'; +import MCPForm from '@/app/home/mcp/components/mcp-form/MCPForm'; +import type { + MCPFormDraft, + MCPFormHandle, +} from '@/app/home/mcp/components/mcp-form/MCPForm'; +import SkillZipPreviewPanel from '@/app/home/skills/components/SkillZipPreviewPanel'; +import PluginLocalPreviewPanel from '@/app/home/plugins/components/PluginLocalPreviewPanel'; + +type PopoverView = 'menu' | 'mcp' | 'github'; + +enum GithubInstallStatus { + WAIT_INPUT = 'wait_input', + SELECT_RELEASE = 'select_release', + SELECT_ASSET = 'select_asset', + ASK_CONFIRM = 'ask_confirm', + INSTALLING = 'installing', + SKILL_PREVIEW = 'skill_preview', + SKILL_INSTALLING = 'skill_installing', + ERROR = 'error', +} + +interface GithubRelease { + id: number; + tag_name: string; + name: string; + published_at: string; + prerelease: boolean; + draft: boolean; + source_type?: 'release' | 'tag' | 'branch'; + archive_url?: string; +} + +interface GithubAsset { + id: number; + name: string; + size: number; + download_url: string; + content_type: string; +} + +interface GithubSkillMdInfo { + owner: string; + repo: string; + ref: string; + path: string; +} + +function isGithubSkillMdUrl(rawUrl: string): boolean { + try { + const url = new URL(rawUrl.trim()); + return url.pathname.toLowerCase().endsWith('/skill.md'); + } catch { + return rawUrl.trim().toLowerCase().split('?', 1)[0].endsWith('skill.md'); + } +} + +function parseGithubSkillMdUrl(rawUrl: string): GithubSkillMdInfo { + const url = new URL(rawUrl.trim()); + const parts = url.pathname.split('/').filter(Boolean); + + if (url.hostname === 'github.com') { + if (parts.length < 5 || parts[2] !== 'blob') { + throw new Error('Invalid GitHub SKILL.md URL'); + } + return { + owner: parts[0], + repo: parts[1], + ref: parts[3], + path: parts.slice(4).join('/'), + }; + } + + if (url.hostname === 'raw.githubusercontent.com') { + if (parts.length < 4) { + throw new Error('Invalid GitHub SKILL.md URL'); + } + return { + owner: parts[0], + repo: parts[1], + ref: parts[2], + path: parts.slice(3).join('/'), + }; + } + + throw new Error('Invalid GitHub SKILL.md URL'); +} + +enum PluginInstallStatus { + ASK_CONFIRM = 'ask_confirm', + INSTALLING = 'installing', + ERROR = 'error', +} + +export default function AddExtensionPage() { + const { t } = useTranslation(); + + if (!systemInfo?.enable_marketplace) { + return ( +
+

{t('plugins.marketplace')}

+
+ ); + } + + return ; +} + +function AddExtensionContent() { + const { t } = useTranslation(); + const navigate = useNavigate(); + const [searchParams, setSearchParams] = useSearchParams(); + const { refreshPlugins, refreshMCPServers, refreshSkills } = useSidebarData(); + const { + addTask, + setSelectedTaskId, + registerOnTaskComplete, + unregisterOnTaskComplete, + clearCompletedTasks, + } = usePluginInstallTasks(); + const [modalOpen, setModalOpen] = useState(false); + const [installInfo, setInstallInfo] = useState>({}); + const [installExtensionType, setInstallExtensionType] = useState< + 'plugin' | 'mcp' | 'skill' + >('plugin'); + const [pluginInstallStatus, setPluginInstallStatus] = + useState(PluginInstallStatus.ASK_CONFIRM); + const [installError, setInstallError] = useState(null); + const [popoverOpen, setPopoverOpen] = useState(false); + const [popoverView, setPopoverView] = useState('menu'); + const [isDragOver, setIsDragOver] = useState(false); + const [skillUploadPreviewOpen, setSkillUploadPreviewOpen] = useState(false); + const [skillUploadPreviewFile, setSkillUploadPreviewFile] = + useState(null); + const [pluginUploadPreviewOpen, setPluginUploadPreviewOpen] = useState(false); + const [pluginUploadPreviewFile, setPluginUploadPreviewFile] = + useState(null); + const fileInputRef = useRef(null); + const mcpFormRef = useRef(null); + const [mcpTesting, setMcpTesting] = useState(false); + const [mcpDraft, setMcpDraft] = useState(); + + // GitHub install state + const [githubURL, setGithubURL] = useState(''); + const [githubReleases, setGithubReleases] = useState([]); + const [selectedRelease, setSelectedRelease] = useState( + null, + ); + const [githubAssets, setGithubAssets] = useState([]); + const [selectedAsset, setSelectedAsset] = useState(null); + const [githubOwner, setGithubOwner] = useState(''); + const [githubRepo, setGithubRepo] = useState(''); + const [fetchingReleases, setFetchingReleases] = useState(false); + const [fetchingAssets, setFetchingAssets] = useState(false); + const [fetchingSkillPreview, setFetchingSkillPreview] = useState(false); + const [githubSkillInfo, setGithubSkillInfo] = + useState(null); + const [githubSkillPreview, setGithubSkillPreview] = useState( + null, + ); + const [githubInstallStatus, setGithubInstallStatus] = + useState(GithubInstallStatus.WAIT_INPUT); + const [githubInstallError, setGithubInstallError] = useState( + null, + ); + + useEffect(() => { + // Clear any stale completed tasks on mount + clearCompletedTasks(); + }, [clearCompletedTasks]); + + useEffect(() => { + if (searchParams.get('manual') !== '1') return; + + setPopoverView('menu'); + setPopoverOpen(true); + setSearchParams( + (current) => { + const next = new URLSearchParams(current); + next.delete('manual'); + return next; + }, + { replace: true }, + ); + }, [searchParams, setSearchParams]); + + useEffect(() => { + const onComplete = (_taskId: number, success: boolean) => { + if (success) { + toast.success(t('plugins.installSuccess')); + refreshPlugins(); + } + }; + registerOnTaskComplete(onComplete); + return () => { + unregisterOnTaskComplete(onComplete); + }; + }, [registerOnTaskComplete, unregisterOnTaskComplete, refreshPlugins, t]); + + const handleInstallPlugin = useCallback(async (plugin: PluginV4) => { + setInstallInfo({ + plugin_author: plugin.author, + plugin_name: plugin.name, + plugin_version: plugin.latest_version, + }); + setInstallExtensionType(plugin.type || 'plugin'); + setPluginInstallStatus(PluginInstallStatus.ASK_CONFIRM); + setInstallError(null); + setModalOpen(true); + }, []); + + function handleModalConfirm() { + setPluginInstallStatus(PluginInstallStatus.INSTALLING); + const pluginDisplayName = `${installInfo.plugin_author}/${installInfo.plugin_name}`; + httpClient + .installPluginFromMarketplace( + installInfo.plugin_author, + installInfo.plugin_name, + installInfo.plugin_version, + ) + .then((resp: { task_id: number }) => { + const taskId = resp.task_id; + const taskKey = `marketplace-${taskId}`; + addTask({ + taskId, + pluginName: pluginDisplayName, + source: 'marketplace', + extensionType: installExtensionType, + }); + setSelectedTaskId(taskKey); + setModalOpen(false); + }) + .catch((err: { msg?: string }) => { + setInstallError(err.msg || null); + setPluginInstallStatus(PluginInstallStatus.ERROR); + }); + } + + const validateFileType = (file: File): boolean => { + const allowedExtensions = ['.lbpkg', '.zip']; + const fileName = file.name.toLowerCase(); + return allowedExtensions.some((ext) => fileName.endsWith(ext)); + }; + + const getExtensionTypeFromFile = (file: File): 'plugin' | 'skill' => { + const fileName = file.name.toLowerCase(); + if (fileName.endsWith('.lbpkg')) return 'plugin'; + if (fileName.endsWith('.zip')) return 'skill'; + return 'plugin'; + }; + + const uploadFile = useCallback( + async (file: File) => { + if (!validateFileType(file)) { + toast.error(t('addExtension.unsupportedFileType')); + return; + } + + const extType = getExtensionTypeFromFile(file); + + setPopoverOpen(false); + // Clear any selected task to avoid showing stale dialogs + setSelectedTaskId(null); + + if (extType === 'plugin') { + setPluginUploadPreviewFile(file); + setPluginUploadPreviewOpen(true); + } else { + setSkillUploadPreviewFile(file); + setSkillUploadPreviewOpen(true); + } + }, + [t, setSelectedTaskId], + ); + + const handleFileSelect = useCallback(() => { + if (fileInputRef.current) { + fileInputRef.current.click(); + } + }, []); + + const handleFileChange = useCallback( + (event: React.ChangeEvent) => { + const file = event.target.files?.[0]; + if (file) { + uploadFile(file); + } + event.target.value = ''; + }, + [uploadFile], + ); + + const handleDragOver = useCallback((event: React.DragEvent) => { + event.preventDefault(); + setIsDragOver(true); + }, []); + + const handleDragLeave = useCallback((event: React.DragEvent) => { + event.preventDefault(); + setIsDragOver(false); + }, []); + + const handleDrop = useCallback( + (event: React.DragEvent) => { + event.preventDefault(); + setIsDragOver(false); + const files = Array.from(event.dataTransfer.files); + if (files.length > 0) { + uploadFile(files[0]); + } + }, + [uploadFile], + ); + + function handleMCPCreated(_serverName: string) { + setMcpDraft(undefined); + refreshMCPServers(); + setPopoverView('menu'); + setPopoverOpen(false); + } + + async function checkExtensionsLimit(): Promise { + const maxExtensions = systemInfo.limitation?.max_extensions ?? -1; + if (maxExtensions < 0) return true; + try { + const [pluginsResp, mcpResp, skillsResp] = await Promise.all([ + httpClient.getPlugins(), + httpClient.getMCPServers(), + httpClient.getSkills(), + ]); + const total = + (pluginsResp.plugins?.length ?? 0) + + (mcpResp.servers?.length ?? 0) + + (skillsResp.skills?.length ?? 0); + if (total >= maxExtensions) { + toast.error( + t('limitation.maxExtensionsReached', { max: maxExtensions }), + ); + return false; + } + } catch { + // If we can't check, let backend handle it + } + return true; + } + + function resetGithubState() { + setGithubURL(''); + setGithubReleases([]); + setSelectedRelease(null); + setGithubAssets([]); + setSelectedAsset(null); + setGithubOwner(''); + setGithubRepo(''); + setFetchingReleases(false); + setFetchingAssets(false); + setFetchingSkillPreview(false); + setGithubSkillInfo(null); + setGithubSkillPreview(null); + setGithubInstallStatus(GithubInstallStatus.WAIT_INPUT); + setGithubInstallError(null); + } + + async function handleGithubAddressSubmit() { + if (isGithubSkillMdUrl(githubURL)) { + await previewGithubSkillMd(); + return; + } + await fetchGithubReleases(); + } + + async function fetchGithubReleases() { + if (!githubURL.trim()) { + toast.error(t('plugins.enterRepoUrl')); + return; + } + + setFetchingReleases(true); + setGithubInstallError(null); + setGithubSkillInfo(null); + setGithubSkillPreview(null); + + try { + const result = await httpClient.getGithubReleases(githubURL); + setGithubReleases(result.releases); + setGithubOwner(result.owner); + setGithubRepo(result.repo); + + if (result.releases.length === 0) { + toast.warning(t('plugins.noReleasesFound')); + } else { + setGithubInstallStatus(GithubInstallStatus.SELECT_RELEASE); + } + } catch (error: unknown) { + const errorMessage = + error instanceof Error ? error.message : String(error); + setGithubInstallError(errorMessage || t('plugins.fetchReleasesError')); + setGithubInstallStatus(GithubInstallStatus.ERROR); + } finally { + setFetchingReleases(false); + } + } + + async function previewGithubSkillMd() { + if (!githubURL.trim()) { + toast.error(t('addExtension.githubUrlRequired')); + return; + } + + setFetchingSkillPreview(true); + setGithubInstallError(null); + setGithubReleases([]); + setGithubAssets([]); + setSelectedRelease(null); + setSelectedAsset(null); + + try { + const skillInfo = parseGithubSkillMdUrl(githubURL); + const result = await httpClient.previewSkillInstallFromGithub( + githubURL.trim(), + skillInfo.owner, + skillInfo.repo, + skillInfo.ref, + ); + const preview = result.skills?.[0]; + if (!preview) { + throw new Error(t('addExtension.noSkillPreviewFound')); + } + setGithubOwner(skillInfo.owner); + setGithubRepo(skillInfo.repo); + setGithubSkillInfo(skillInfo); + setGithubSkillPreview(preview); + setGithubInstallStatus(GithubInstallStatus.SKILL_PREVIEW); + } catch (error: unknown) { + const errorMessage = + error instanceof Error ? error.message : String(error); + setGithubInstallError(errorMessage || t('skills.previewLoadError')); + setGithubInstallStatus(GithubInstallStatus.ERROR); + } finally { + setFetchingSkillPreview(false); + } + } + + async function handleReleaseSelect(release: GithubRelease) { + setSelectedRelease(release); + setFetchingAssets(true); + setGithubInstallError(null); + + try { + const result = await httpClient.getGithubReleaseAssets( + githubOwner, + githubRepo, + release.id, + release.tag_name, + release.source_type, + release.archive_url, + ); + setGithubAssets(result.assets); + + if (result.assets.length === 0) { + toast.warning(t('plugins.noAssetsFound')); + } else { + setGithubInstallStatus(GithubInstallStatus.SELECT_ASSET); + } + } catch (error: unknown) { + const errorMessage = + error instanceof Error ? error.message : String(error); + setGithubInstallError(errorMessage || t('plugins.fetchAssetsError')); + setGithubInstallStatus(GithubInstallStatus.ERROR); + } finally { + setFetchingAssets(false); + } + } + + function handleAssetSelect(asset: GithubAsset) { + setSelectedAsset(asset); + setGithubInstallStatus(GithubInstallStatus.ASK_CONFIRM); + } + + async function handleGithubConfirm() { + if (!selectedAsset || !selectedRelease) return; + if (!(await checkExtensionsLimit())) return; + + setGithubInstallStatus(GithubInstallStatus.INSTALLING); + const pluginDisplayName = `${githubOwner}/${githubRepo}`; + httpClient + .installPluginFromGithub( + selectedAsset.download_url, + githubOwner, + githubRepo, + selectedRelease.tag_name, + ) + .then((resp) => { + const taskId = resp.task_id; + const taskKey = `github-${taskId}`; + addTask({ + taskId, + pluginName: pluginDisplayName, + source: 'github', + extensionType: 'plugin', + fileSize: selectedAsset.size, + }); + setSelectedTaskId(taskKey); + resetGithubState(); + setPopoverOpen(false); + }) + .catch((err) => { + setGithubInstallError(err.msg); + setGithubInstallStatus(GithubInstallStatus.ERROR); + }); + } + + async function handleGithubSkillConfirm() { + if (!githubSkillInfo) return; + if (!(await checkExtensionsLimit())) return; + + setGithubInstallStatus(GithubInstallStatus.SKILL_INSTALLING); + try { + await httpClient.installSkillFromGithub( + githubURL.trim(), + githubSkillInfo.owner, + githubSkillInfo.repo, + githubSkillInfo.ref, + ); + toast.success(t('skills.installSuccess')); + refreshPlugins(); + refreshSkills(); + resetGithubState(); + setPopoverOpen(false); + } catch (err: unknown) { + const errorMessage = + err instanceof Error + ? err.message + : typeof err === 'object' && err && 'msg' in err + ? String((err as { msg?: string }).msg || '') + : String(err); + setGithubInstallError(errorMessage); + setGithubInstallStatus(GithubInstallStatus.ERROR); + } + } + + function formatFileSize(bytes: number): string { + if (bytes === 0) return '0 Bytes'; + const k = 1024; + const sizes = ['Bytes', 'KB', 'MB', 'GB']; + const i = Math.floor(Math.log(bytes) / Math.log(k)); + return Math.round((bytes / Math.pow(k, i)) * 100) / 100 + ' ' + sizes[i]; + } + + function getPopoverWidth(): string { + switch (popoverView) { + case 'mcp': + return 'w-[calc(100vw-2rem)] sm:w-[560px]'; + case 'github': + return 'w-[calc(100vw-2rem)] sm:w-[560px]'; + default: + return 'w-[calc(100vw-2rem)] sm:w-[380px]'; + } + } + + const extensionActions = ( + <> + + + { + setPopoverOpen(open); + }} + > + + + + + {/* ===== Menu View ===== */} + {popoverView === 'menu' && ( +
+ {/* File upload area */} +
+ +

+ {t('addExtension.uploadExtension')} +

+

+ {t('addExtension.uploadHint')} +

+
+ +

+ {t('addExtension.orContinueWith')} +

+ +
+ + + + + +
+
+ )} + + {/* ===== MCP Form View ===== */} + {popoverView === 'mcp' && ( +
+
+ +

+ {t('mcp.createServer')} +

+
+ +
+ {}} + onNewServerCreated={handleMCPCreated} + onDraftChange={setMcpDraft} + onTestingChange={setMcpTesting} + /> +
+ +
+ + +
+
+ )} + + {/* ===== GitHub Install View ===== */} + {popoverView === 'github' && ( +
+
+ +

+ {t('addExtension.installFromGithub')} +

+
+ +
+ {githubInstallStatus === GithubInstallStatus.WAIT_INPUT && ( +
+
+ {t('addExtension.githubUrlHelp')} + + + + + + {t('addExtension.githubUrlTooltip')} + + +
+ setGithubURL(e.target.value)} + onKeyDown={(e) => { + if (e.key === 'Enter') handleGithubAddressSubmit(); + }} + /> + +
+ )} + + {githubInstallStatus === GithubInstallStatus.SELECT_RELEASE && ( +
+
+

+ {t('plugins.selectRelease')} +

+ +
+
+ {githubReleases.map((release) => ( +
handleReleaseSelect(release)} + > +
+
+ {release.name || release.tag_name} +
+
+ {release.tag_name} •{' '} + {new Date( + release.published_at, + ).toLocaleDateString()} +
+
+ {release.prerelease && ( + + Pre + + )} +
+ ))} +
+ {fetchingAssets && ( +

+ + {t('plugins.loading')} +

+ )} +
+ )} + + {githubInstallStatus === GithubInstallStatus.SELECT_ASSET && ( +
+
+

+ {t('plugins.selectAsset')} +

+ +
+ {selectedRelease && ( +
+ + {selectedRelease.name || selectedRelease.tag_name} + +
+ )} +
+ {githubAssets.map((asset) => ( +
handleAssetSelect(asset)} + > + {asset.name} + + {formatFileSize(asset.size)} + +
+ ))} +
+
+ )} + + {githubInstallStatus === GithubInstallStatus.ASK_CONFIRM && ( +
+
+

+ {t('plugins.confirmInstall')} +

+ +
+ {selectedRelease && selectedAsset && ( +
+
+ Repository: + + {githubOwner}/{githubRepo} + +
+
+ Release: + {selectedRelease.tag_name} +
+
+ File: + {selectedAsset.name} +
+
+ )} + +
+ )} + + {githubInstallStatus === GithubInstallStatus.SKILL_PREVIEW && ( +
+
+

+ {t('addExtension.previewSkill')} +

+ +
+ + {githubSkillPreview && ( +
+
+ + + +
+
+ {githubSkillPreview.display_name || + githubSkillPreview.name} +
+
+ {githubSkillPreview.name} +
+
+
+ {githubSkillPreview.description && ( +

+ {githubSkillPreview.description} +

+ )} +
+
+ + Repository:{' '} + + {githubSkillInfo?.owner}/{githubSkillInfo?.repo} +
+
+ + File:{' '} + + + {githubSkillInfo?.path} + +
+ {githubSkillPreview.package_root && ( +
+ + Directory:{' '} + + + {githubSkillPreview.package_root} + +
+ )} +
+
+ )} + + +
+ )} + + {githubInstallStatus === GithubInstallStatus.INSTALLING && ( +
+ + {t('plugins.installing')} +
+ )} + + {githubInstallStatus === + GithubInstallStatus.SKILL_INSTALLING && ( +
+ + {t('skills.installing')} +
+ )} + + {githubInstallStatus === GithubInstallStatus.ERROR && ( +
+

+ {t('plugins.installFailed')} +

+ {githubInstallError && ( +

+ {githubInstallError} +

+ )} + +
+ )} +
+
+ )} +
+
+ + ); + + return ( + <> +
+
+ +
+
+ + { + setModalOpen(open); + if (!open) { + setInstallError(null); + } + }} + > + + + + + {t('plugins.installPlugin')} + + + + {pluginInstallStatus === PluginInstallStatus.ASK_CONFIRM && ( +
+

+ {installInfo.plugin_version + ? t('plugins.askConfirm', { + name: installInfo.plugin_name, + version: installInfo.plugin_version, + }) + : t('plugins.askConfirmNoVersion', { + name: installInfo.plugin_name, + })} +

+
+ )} + + {pluginInstallStatus === PluginInstallStatus.INSTALLING && ( +
+

{t('plugins.installing')}

+
+ )} + + {pluginInstallStatus === PluginInstallStatus.ERROR && ( +
+

{t('plugins.installFailed')}

+

{installError}

+
+ )} + + + {pluginInstallStatus === PluginInstallStatus.ASK_CONFIRM && ( + <> + + + + )} + {pluginInstallStatus === PluginInstallStatus.ERROR && ( + + )} + +
+
+ + {/* Plugin Upload Preview Dialog */} + { + setPluginUploadPreviewOpen(open); + if (!open) { + setPluginUploadPreviewFile(null); + } + }} + > + + + + + {t('plugins.localPreview.title')} + + + {pluginUploadPreviewFile && ( + { + setPluginUploadPreviewOpen(false); + setPluginUploadPreviewFile(null); + }} + onInstallStarted={() => { + setPluginUploadPreviewOpen(false); + setPluginUploadPreviewFile(null); + }} + /> + )} + + + + {/* Skill Upload Preview Dialog */} + { + setSkillUploadPreviewOpen(open); + if (!open) { + setSkillUploadPreviewFile(null); + } + }} + > + + + + + {t('skills.uploadZip')} + + + {skillUploadPreviewFile && ( + { + setSkillUploadPreviewOpen(false); + setSkillUploadPreviewFile(null); + }} + onImported={(skillNames) => { + setSkillUploadPreviewOpen(false); + setSkillUploadPreviewFile(null); + void refreshSkills(); + const firstSkillName = skillNames[0]; + if (firstSkillName) { + navigate( + `/home/skills?id=${encodeURIComponent(firstSkillName)}`, + ); + } + }} + /> + )} + + + + ); +} diff --git a/web/src/app/home/bots/components/bot-card/BotCard.tsx b/web/src/app/home/bots/components/bot-card/BotCard.tsx index 3551ed667..c5a1cba8d 100644 --- a/web/src/app/home/bots/components/bot-card/BotCard.tsx +++ b/web/src/app/home/bots/components/bot-card/BotCard.tsx @@ -4,6 +4,7 @@ import { httpClient } from '@/app/infra/http/HttpClient'; import { Switch } from '@/components/ui/switch'; import { useTranslation } from 'react-i18next'; import { toast } from 'sonner'; +import { MessageSquare, Workflow } from 'lucide-react'; export default function BotCard({ botCardVO, @@ -42,28 +43,14 @@ export default function BotCard({
- - - + {botCardVO.adapterLabel}
- - - + {botCardVO.usePipelineName} diff --git a/web/src/app/home/components/BoxUnavailableNotice.tsx b/web/src/app/home/components/BoxUnavailableNotice.tsx new file mode 100644 index 000000000..5fe54a80a --- /dev/null +++ b/web/src/app/home/components/BoxUnavailableNotice.tsx @@ -0,0 +1,53 @@ +import { useTranslation } from 'react-i18next'; +import { Info, ShieldAlert } from 'lucide-react'; + +import { Alert, AlertDescription } from '@/components/ui/alert'; + +/** + * Banner shown when a feature depends on the Box sandbox runtime but it is + * currently disabled in config or otherwise unavailable. Pass the ``hint`` + * key returned by ``useBoxStatus`` (``'boxDisabled' | 'boxUnavailable'``). + * + * Renders nothing when there is no hint — safe to drop at the top of any + * page that may or may not need to surface the notice. + */ +export interface BoxUnavailableNoticeProps { + hint: 'boxDisabled' | 'boxUnavailable' | null; + /** Specific failure reason from the backend (``connector_error``). Shown + * on a dedicated line so the user sees WHY (e.g. ``Configured sandbox + * backend "nsjail" is unavailable``) instead of just the generic + * "unavailable" wording. Ignored when ``hint === 'boxDisabled'`` + * because the disabled-by-config message already carries the reason. */ + reason?: string | null; + className?: string; +} + +export function BoxUnavailableNotice({ + hint, + reason, + className, +}: BoxUnavailableNoticeProps) { + const { t } = useTranslation(); + if (!hint) return null; + + const variant = hint === 'boxDisabled' ? 'default' : 'destructive'; + const Icon = hint === 'boxDisabled' ? Info : ShieldAlert; + const showReason = hint === 'boxUnavailable' && reason; + + return ( + + + +
{t(`monitoring.${hint}`)}
+ {showReason && ( +
{reason}
+ )} +
+ {t('monitoring.boxRequiredHint')} +
+
+
+ ); +} + +export default BoxUnavailableNotice; diff --git a/web/src/app/home/components/account-settings-dialog/AccountSettingsDialog.tsx b/web/src/app/home/components/account-settings-dialog/AccountSettingsDialog.tsx index 87b438eb3..b658c9fab 100644 --- a/web/src/app/home/components/account-settings-dialog/AccountSettingsDialog.tsx +++ b/web/src/app/home/components/account-settings-dialog/AccountSettingsDialog.tsx @@ -20,7 +20,7 @@ import { } from '@/components/ui/item'; import { httpClient } from '@/app/infra/http/HttpClient'; import { systemInfo } from '@/app/infra/http'; -import { Loader2, ExternalLink, KeyRound } from 'lucide-react'; +import { Loader2, ExternalLink, KeyRound, Layers } from 'lucide-react'; import PasswordChangeDialog from '../password-change-dialog/PasswordChangeDialog'; interface AccountSettingsDialogProps { @@ -136,34 +136,7 @@ export default function AccountSettingsDialog({ {/* Space Account Item */} - - - - - + {t('account.spaceStatus')} diff --git a/web/src/app/home/components/dynamic-form/DynamicFormComponent.tsx b/web/src/app/home/components/dynamic-form/DynamicFormComponent.tsx index ffea18d6e..078db6f79 100644 --- a/web/src/app/home/components/dynamic-form/DynamicFormComponent.tsx +++ b/web/src/app/home/components/dynamic-form/DynamicFormComponent.tsx @@ -20,8 +20,14 @@ import { useTranslation } from 'react-i18next'; import { cn } from '@/lib/utils'; import { Input } from '@/components/ui/input'; import { Button } from '@/components/ui/button'; -import { Copy, Check, Globe, QrCode } from 'lucide-react'; +import { Copy, Check, Globe, Info, QrCode } from 'lucide-react'; import { copyToClipboard } from '@/app/utils/clipboard'; +import { + Tooltip, + TooltipContent, + TooltipProvider, + TooltipTrigger, +} from '@/components/ui/tooltip'; import { systemInfo } from '@/app/infra/http'; /** @@ -123,13 +129,13 @@ function WebhookUrlField({ }; return ( - - {label} -
+ + {label} +
(e.target as HTMLInputElement).select()} />
{extraUrl && ( -
+
(e.target as HTMLInputElement).select()} />