From 00dd6e5a1feb952af88a474abffd07f5015ceee6 Mon Sep 17 00:00:00 2001
From: zhaizhiqiang <584508161@qq.com>
Date: Fri, 26 Jun 2026 03:32:33 +0000
Subject: [PATCH 1/3] support mini-swe-agent and claud-code blockbox agent
 training recipes

---
 .../claude_code/Dockerfile.claude-code-tool   |  21 +
 .../claude_code/claude_code_runner.py         | 232 +++++++++
 .../claude_code/config/claude_code.yaml       |   1 +
 .../Dockerfile.mini-swe-agent-tool            |  45 ++
 .../blackbox_recipes/mini_swe_agent/README.md | 269 +++++++++++
 .../mini_swe_agent/__init__.py                |   0
 .../mini_swe_agent/config/agent_config.yaml   |  36 ++
 .../config/agent_config_openyuanrong.yaml     |  37 ++
 .../mini_swe_agent/config/parallel_infer.yaml |  31 ++
 .../config/swe_agent_blackbox.yaml            | 123 +++++
 .../swe_agent_blackbox_megatron_async.yaml    | 162 +++++++
 .../swe_agent_blackbox_megatron_sync.yaml     | 129 +++++
 .../mini_swe_agent/dataset.py                 |  34 ++
 .../mini_swe_agent/framework.py               | 105 ++++
 .../mini_swe_agent/mini_swe_agent_runner.py   | 227 +++++++++
 .../mini_swe_agent/parallel_infer.py          | 447 ++++++++++++++++++
 .../blackbox_recipes/mini_swe_agent/reward.py |  74 +++
 .../mini_swe_agent/run_agent.py               | 106 +++++
 .../mini_swe_agent/subprocess_runner.py       |  61 +++
 examples/blackbox_recipes/sandbox/sandbox.py  |  10 +
 .../blackbox_recipes/scripts/build_tool.sh    |  75 +++
 .../blackbox_recipes/scripts/run_infer.sh     |  66 +++
 .../blackbox_recipes/scripts/run_train.sh     | 122 +++++
 .../scripts/run_train_megatron_async.sh       | 199 ++++++++
 .../scripts/run_train_megatron_sync.sh        | 138 ++++++
 25 files changed, 2750 insertions(+)
 create mode 100644 examples/blackbox_recipes/claude_code/Dockerfile.claude-code-tool
 create mode 100644 examples/blackbox_recipes/claude_code/claude_code_runner.py
 create mode 100644 examples/blackbox_recipes/claude_code/config/claude_code.yaml
 create mode 100644 examples/blackbox_recipes/mini_swe_agent/Dockerfile.mini-swe-agent-tool
 create mode 100644 examples/blackbox_recipes/mini_swe_agent/README.md
 create mode 100644 examples/blackbox_recipes/mini_swe_agent/__init__.py
 create mode 100644 examples/blackbox_recipes/mini_swe_agent/config/agent_config.yaml
 create mode 100644 examples/blackbox_recipes/mini_swe_agent/config/agent_config_openyuanrong.yaml
 create mode 100644 examples/blackbox_recipes/mini_swe_agent/config/parallel_infer.yaml
 create mode 100644 examples/blackbox_recipes/mini_swe_agent/config/swe_agent_blackbox.yaml
 create mode 100644 examples/blackbox_recipes/mini_swe_agent/config/swe_agent_blackbox_megatron_async.yaml
 create mode 100644 examples/blackbox_recipes/mini_swe_agent/config/swe_agent_blackbox_megatron_sync.yaml
 create mode 100644 examples/blackbox_recipes/mini_swe_agent/dataset.py
 create mode 100644 examples/blackbox_recipes/mini_swe_agent/framework.py
 create mode 100644 examples/blackbox_recipes/mini_swe_agent/mini_swe_agent_runner.py
 create mode 100644 examples/blackbox_recipes/mini_swe_agent/parallel_infer.py
 create mode 100644 examples/blackbox_recipes/mini_swe_agent/reward.py
 create mode 100644 examples/blackbox_recipes/mini_swe_agent/run_agent.py
 create mode 100644 examples/blackbox_recipes/mini_swe_agent/subprocess_runner.py
 create mode 100644 examples/blackbox_recipes/sandbox/sandbox.py
 create mode 100755 examples/blackbox_recipes/scripts/build_tool.sh
 create mode 100755 examples/blackbox_recipes/scripts/run_infer.sh
 create mode 100755 examples/blackbox_recipes/scripts/run_train.sh
 create mode 100755 examples/blackbox_recipes/scripts/run_train_megatron_async.sh
 create mode 100755 examples/blackbox_recipes/scripts/run_train_megatron_sync.sh

diff --git a/examples/blackbox_recipes/claude_code/Dockerfile.claude-code-tool b/examples/blackbox_recipes/claude_code/Dockerfile.claude-code-tool
new file mode 100644
index 00000000..3d12af4c
--- /dev/null
+++ b/examples/blackbox_recipes/claude_code/Dockerfile.claude-code-tool
@@ -0,0 +1,21 @@
+# Claude Code sidecar tool image.
+#
+# Mounted at /opt/claude-code inside the SWE-bench sandbox.
+
+FROM node:20-bookworm-slim AS builder
+
+ARG TOOL_VERSION="latest"
+ARG NPM_REGISTRY=""
+
+ENV DISABLE_AUTOUPDATER=1 \
+    IS_SANDBOX=1 \
+    npm_config_audit=false \
+    npm_config_fund=false \
+    npm_config_update_notifier=false
+
+RUN if [ -n "${NPM_REGISTRY}" ]; then npm config set registry "${NPM_REGISTRY}"; fi \
+    && npm install -g --prefix /opt/claude-code "@anthropic-ai/claude-code@${TOOL_VERSION}" \
+    && /opt/claude-code/bin/claude --version
+
+FROM scratch
+COPY --from=builder /opt/claude-code /
diff --git a/examples/blackbox_recipes/claude_code/claude_code_runner.py b/examples/blackbox_recipes/claude_code/claude_code_runner.py
new file mode 100644
index 00000000..bee41aaf
--- /dev/null
+++ b/examples/blackbox_recipes/claude_code/claude_code_runner.py
@@ -0,0 +1,232 @@
+"""Claude Code runner for the blackbox SWE-agent recipe."""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+import shlex
+import time
+
+from uni_agent.trainer.framework.types import SessionHandle, SessionRuntime
+
+logger = logging.getLogger(__name__)
+
+DEFAULT_TOOL_IMAGE = "claude-code-tool:latest"
+TOOL_TARGET = "/opt/claude-code"
+
+
+def extract_task(raw_prompt) -> str:
+    if isinstance(raw_prompt, str):
+        return raw_prompt
+    return next(
+        (m["content"] for m in raw_prompt if isinstance(m, dict) and m.get("role") == "user"),
+        str(raw_prompt),
+    )
+
+
+def _extract_issue_text(task: str) -> str:
+    start = task.find("<issue_description>")
+    end = task.find("</issue_description>")
+    if start >= 0 and end > start:
+        return task[start + len("<issue_description>"):end].strip()
+    marker = "\nFollow these steps to resolve the issue:"
+    if marker in task:
+        return task.split(marker, 1)[0].strip()
+    return task.strip()
+
+
+def _decode_metadata_list(value) -> list[str]:
+    if not value:
+        return []
+    if isinstance(value, list):
+        return [str(item) for item in value]
+    if isinstance(value, str):
+        try:
+            parsed = json.loads(value)
+        except json.JSONDecodeError:
+            return [value]
+        if isinstance(parsed, list):
+            return [str(item) for item in parsed]
+    return [str(value)]
+
+
+def build_claude_task(raw_prompt, tools_kwargs: dict | None = None) -> str:
+    tools_kwargs = tools_kwargs or {}
+    task = extract_task(raw_prompt)
+    metadata = ((tools_kwargs.get("reward") or {}).get("metadata") or {})
+    issue = metadata.get("problem_statement") or _extract_issue_text(task)
+    tests = _decode_metadata_list(metadata.get("FAIL_TO_PASS"))
+    if not tests:
+        tests = _decode_metadata_list(metadata.get("PASS_TO_PASS"))[:3]
+    tests_block = "\n".join(f"- {test}" for test in tests) if tests else "- Run the closest relevant tests you identify."
+
+    return (
+        "You are fixing a SWE-bench task in /testbed.\n\n"
+        "Issue:\n"
+        f"{issue}\n\n"
+        "Rules:\n"
+        "- Edit source files only. Do not modify tests.\n"
+        "- The development environment is already installed; do not install packages unless a test command proves it is necessary.\n"
+        "- There is no submit tool in this environment. Do not try to submit.\n"
+        "- Do not create extra edge-case test files after the relevant tests pass.\n"
+        "- Do not run `pytest --collect-only`, `git log`, or any other command that does not directly validate the fix.\n"
+        "- Do not analyze unrelated `is_separable` behavior.\n"
+        "- Do not run additional ad-hoc verification after the listed relevant pytest command passes.\n"
+        "- Do not commit.\n"
+        "- After the minimal fix is applied and a relevant pytest command passes, print a one-line summary and exit immediately.\n\n"
+        "Relevant tests to run after the fix:\n"
+        f"{tests_block}\n"
+    )
+
+
+def build_claude_command(
+    *,
+    task: str,
+    base_url: str,
+    max_turns: int,
+    model: str = "default",
+    permission_mode: str = "bypassPermissions",
+    conda_env: str | None = "testbed",
+    disable_web_tools: bool = True,
+    disable_slash_commands: bool = True,
+) -> str:
+    env = {
+        "ANTHROPIC_BASE_URL": base_url,
+        "ANTHROPIC_API_KEY": "not-needed",
+        "ANTHROPIC_MODEL": model,
+        "ANTHROPIC_DEFAULT_HAIKU_MODEL": model,
+        "ANTHROPIC_DEFAULT_SONNET_MODEL": model,
+        "ANTHROPIC_DEFAULT_OPUS_MODEL": model,
+        "ANTHROPIC_SMALL_FAST_MODEL": model,
+        "CLAUDE_CODE_DISABLE_BACKGROUND_TASKS": "1",
+        "CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC": "1",
+        "CLAUDE_CODE_FORK_SUBAGENT": "0",
+        "CLAUDE_CODE_SUBAGENT_MODEL": model,
+        "DISABLE_AUTOUPDATER": "1",
+        "IS_SANDBOX": "1",
+    }
+    env_assignments = [f"{key}={shlex.quote(value)}" for key, value in env.items()]
+    if conda_env:
+        conda_prefix = f"/opt/miniconda3/envs/{conda_env}"
+        env_assignments.extend(
+            [
+                f"CONDA_DEFAULT_ENV={shlex.quote(conda_env)}",
+                f"CONDA_PREFIX={shlex.quote(conda_prefix)}",
+                f"PATH={shlex.quote(conda_prefix + '/bin')}:/opt/miniconda3/bin:$PATH",
+            ]
+        )
+    env_prefix = " ".join(env_assignments)
+    argv = [
+        "/opt/claude-code/bin/claude",
+        "-p",
+        task,
+        "--model",
+        model,
+        "--max-turns",
+        str(max_turns),
+        "--permission-mode",
+        permission_mode,
+    ]
+    if disable_slash_commands:
+        argv.append("--disable-slash-commands")
+    if disable_web_tools:
+        argv.extend(["--disallowedTools", "Agent", "Task", "WebFetch", "WebSearch"])
+    return (
+        "unset HTTP_PROXY HTTPS_PROXY http_proxy https_proxy NO_PROXY no_proxy; "
+        "cd /testbed; "
+        f"{env_prefix} "
+        + shlex.join(argv)
+    )
+
+
+async def _create_claude_sandbox(
+    *,
+    image: str,
+    sidecar_image: str,
+    gateway_url: str,
+):
+    from examples.swe_agent_blackbox.sandbox import YRSandbox, extract_upstream
+
+    upstream = extract_upstream(gateway_url) if gateway_url else ""
+    return await YRSandbox.create(
+        image=image,
+        sidecar_image=sidecar_image,
+        sidecar_target=TOOL_TARGET,
+        upstream=upstream,
+    )
+
+
+async def claude_code_runner(
+    *,
+    raw_prompt,
+    session: SessionHandle,
+    sample_index: int,
+    session_runtime: SessionRuntime,
+    tools_kwargs: dict | None = None,
+    tool_image: str = DEFAULT_TOOL_IMAGE,
+    run_timeout: int = 7200,
+    **kwargs,
+) -> None:
+    from examples.swe_agent_blackbox.dataset import extract_image
+    from examples.swe_agent_blackbox.mini_swe_agent_runner import SandboxEnvForReward
+    from examples.swe_agent_blackbox.reward import build_reward_context, evaluate_in_env
+
+    tools_kwargs = tools_kwargs or {}
+    task = build_claude_task(raw_prompt, tools_kwargs)
+    env_config = tools_kwargs.get("env", {})
+    image = extract_image(env_config)
+    if not image:
+        raise ValueError(f"No Docker image found in tools_kwargs.env for sample {sample_index}")
+
+    gateway_url = session.base_url
+    if not gateway_url:
+        raise ValueError(f"gateway_url is empty for sample {sample_index}")
+
+    sandbox = await _create_claude_sandbox(
+        image=image,
+        sidecar_image=tool_image,
+        gateway_url=gateway_url,
+    )
+
+    try:
+        post_setup_cmd = env_config.get("post_setup_cmd", "")
+        if post_setup_cmd:
+            setup_result = await sandbox.run(post_setup_cmd, timeout=120)
+            if setup_result.exit_code != 0:
+                logger.warning("post_setup_cmd failed rc=%s: %.300s", setup_result.exit_code, setup_result.stdout + setup_result.stderr)
+
+        from examples.swe_agent_blackbox.sandbox import rewrite_gateway_url
+
+        claude_base_url = rewrite_gateway_url(gateway_url, strip_v1=True)
+        max_turns = int(os.environ.get("SWE_AGENT_MAX_TURNS", "100"))
+        agent_cmd = build_claude_command(
+            task=task,
+            base_url=claude_base_url,
+            max_turns=max_turns,
+        )
+
+        started_at = time.perf_counter()
+        result = await sandbox.run(agent_cmd, timeout=int(run_timeout))
+        elapsed = time.perf_counter() - started_at
+        logger.info("[sample %d] claude-code finished rc=%s elapsed=%.1fs", sample_index, result.exit_code, elapsed)
+        if result.exit_code != 0:
+            logger.warning(
+                "[sample %d] claude-code failed stdout_tail=%r stderr_tail=%r",
+                sample_index,
+                (result.stdout or "")[-4000:],
+                (result.stderr or "")[-4000:],
+            )
+
+        metadata, eval_timeout = build_reward_context(tools_kwargs)
+        score, eval_result = await evaluate_in_env(SandboxEnvForReward(sandbox), metadata, eval_timeout)
+        logger.info("[sample %d] reward done score=%s resolved=%s", sample_index, score, eval_result.get("resolved"))
+
+        reward_info = {
+            "reward_score": score,
+            "claude_code_exit_code": result.exit_code,
+            **eval_result,
+        }
+        await session_runtime.complete_session(session.session_id, reward_info=reward_info)
+    finally:
+        await sandbox.cleanup()
diff --git a/examples/blackbox_recipes/claude_code/config/claude_code.yaml b/examples/blackbox_recipes/claude_code/config/claude_code.yaml
new file mode 100644
index 00000000..503fa1da
--- /dev/null
+++ b/examples/blackbox_recipes/claude_code/config/claude_code.yaml
@@ -0,0 +1 @@
+#TODO
\ No newline at end of file
diff --git a/examples/blackbox_recipes/mini_swe_agent/Dockerfile.mini-swe-agent-tool b/examples/blackbox_recipes/mini_swe_agent/Dockerfile.mini-swe-agent-tool
new file mode 100644
index 00000000..a2fba565
--- /dev/null
+++ b/examples/blackbox_recipes/mini_swe_agent/Dockerfile.mini-swe-agent-tool
@@ -0,0 +1,45 @@
+# Mini-swe-agent sidecar tool image.
+#
+# Contains a self-contained Python venv at /opt/mini-swe-agent with
+# mini-swe-agent + litellm installed.  When mounted into a sandbox at
+# /opt/mini-swe-agent, the agent can be invoked via:
+#
+#   /opt/mini-swe-agent/bin/python /opt/mini-swe-agent/bin/run_agent.py ...
+#
+# Uses python-build-standalone for maximum portability across different
+# glibc versions (built against older glibc, forward-compatible).
+#
+# Build:
+#   docker build -f Dockerfile.mini-swe-agent-tool -t mini-swe-agent-tool:latest .
+#
+
+FROM debian:bullseye-slim AS builder
+
+ARG PBS_RELEASE="20260602"
+ARG PBS_PYTHON="3.12.13"
+ARG PIP_INDEX_URL=""
+
+# Download and extract python-build-standalone (stripped, 32MB)
+RUN apt-get update && apt-get install -y --no-install-recommends ca-certificates wget \
+    && rm -rf /var/lib/apt/lists/* \
+    && wget -q \
+        "https://github.com/astral-sh/python-build-standalone/releases/download/${PBS_RELEASE}/cpython-${PBS_PYTHON}%2B${PBS_RELEASE}-x86_64-unknown-linux-gnu-install_only_stripped.tar.gz" \
+        -O /tmp/python.tar.gz \
+    && mkdir -p /opt/mini-swe-agent \
+    && tar -xzf /tmp/python.tar.gz -C /opt/mini-swe-agent --strip-components=1 \
+    && rm /tmp/python.tar.gz
+
+# Install mini-swe-agent + litellm
+RUN /opt/mini-swe-agent/bin/pip install --no-cache-dir \
+    ${PIP_INDEX_URL:+-i ${PIP_INDEX_URL}} \
+    "mini-swe-agent==2.2.8" \
+    "litellm==1.81.7"
+
+# Copy the in-sandbox runner script
+COPY run_agent.py /opt/mini-swe-agent/bin/run_agent.py
+
+# Final scratch image: files are at the image root level so that when
+# akernel_sdk.Mount(target="/opt/mini-swe-agent") overlays this image,
+# the files appear at /opt/mini-swe-agent/bin/python etc.
+FROM scratch
+COPY --from=builder /opt/mini-swe-agent /
diff --git a/examples/blackbox_recipes/mini_swe_agent/README.md b/examples/blackbox_recipes/mini_swe_agent/README.md
new file mode 100644
index 00000000..b32a637a
--- /dev/null
+++ b/examples/blackbox_recipes/mini_swe_agent/README.md
@@ -0,0 +1,269 @@
+# Mini-SWE-Agent In-Sandbox Execution
+
+## Overview
+
+`mini_swe` and `claude_code` both run inside the SWE-bench sandbox through a
+sidecar tool image. The external runner creates the sandbox, mounts the selected
+tool image, starts the agent process, and evaluates the reward in the same
+sandbox.
+
+For `mini_swe`, the agent executes commands through `LocalEnvironment` (local
+bash) inside the sandbox and calls the LLM through the gateway URL passed in via
+stdin. For `claude_code`, the runner starts the Claude Code CLI from the sidecar
+image and points it at the same Anthropic-compatible gateway.
+
+The `mini_swe` tool image uses
+[python-build-standalone](https://github.com/astral-sh/python-build-standalone)
+to build an isolated Python environment. The Claude Code tool image uses a Node
+builder to install the Claude Code npm package. Both images use a minimal
+`FROM scratch` final stage, so the sandbox base image does not need to provide
+Python, Node, or npm for the sidecar tool runtime.
+
+**Supported runners:**
+
+| runner | Description |
+|--------|-------------|
+| `uniagent` | Original SWE-agent runner |
+| `mini_swe` | mini-swe-agent sidecar runner |
+| `claude_code` | Claude Code sidecar runner; reward is returned through `complete_session(reward_info)` without writing a separate reward JSON file |
+
+**Supported sandbox types:**
+
+| Type | Description |
+|------|-------------|
+| OpenYuanRong (`"openyuanrong"`) | Uses `akernel_sdk.Mount` and `sandbox.commands.run()` |
+
+At runtime, the selected runner depends directly on its tool image. The tool
+image does not need to be extracted into a host directory ahead of time.
+
+## Architecture
+
+```text
+[Rollouter Host: mini_swe_agent_runner / claude_code_runner]
+  |
+  |-- _create_sandbox(image, sidecar_image)
+  |     `-- openyuanrong: Sandbox(mounts=[Mount(target="/opt/<tool>", ...)])
+  |
+  |-- sandbox.run("<tool entrypoint>")
+  |     `-- [Inside Sandbox]
+  |           /opt/mini-swe-agent/bin/python3.12 or /opt/claude-code/bin/claude
+  |           stdin <- task config JSON (task, gateway_url, agent)
+  |           commands run inside the SWE-bench sandbox
+  |           stdout -> runner-specific execution result
+  |
+  |-- parse agent result
+  |-- SandboxEnvForReward(sandbox) -> evaluate_in_env()
+  `-- session_runtime.complete_session(reward_info)
+```
+
+## Prerequisites
+
+1. **OpenYuanRong** - set `OPENYUANRONG_SERVER_ADDRESS` and `OPENYUANRONG_TOKEN`.
+2. **Runner tool image** - build the selected tool image and push it to a remote
+   registry if the sandbox service cannot access local Docker images.
+
+## 1. Build Tool Image
+
+`mini_swe` and `claude_code` are both injected into the SWE-bench sandbox as
+sidecar tool images, but they differ in image contents, mount paths, and
+accelerator/mirror options. Use `build_tool.sh` for both runners, and select the
+target runner with `--tool` or `TOOL_KIND`.
+
+| runner | Default tool image | Dockerfile | Sandbox mount path | Image contents | Mirror option |
+|--------|--------------------|------------|--------------------|----------------|---------------|
+| `mini_swe` | `mini-swe-agent-tool:latest` | `Dockerfile.mini-swe-agent-tool` | `/opt/mini-swe-agent` | Standalone Python 3.12, `mini-swe-agent`, `litellm`, and `run_agent.py` | `--pip-index` / `PIP_INDEX_URL` |
+| `claude_code` | `claude-code-tool:latest` | `Dockerfile.claude-code-tool` | `/opt/claude-code` | Claude Code npm package installed by a Node 20 builder | `--npm-registry` / `NPM_REGISTRY` |
+
+### mini_swe Tool Image
+
+`mini_swe` is the default build target:
+
+```bash
+# Use the default PyPI source.
+bash examples/swe_agent_blackbox/build_tool.sh
+
+# Use a custom PyPI mirror.
+bash examples/swe_agent_blackbox/build_tool.sh --pip-index https://pypi.tuna.tsinghua.edu.cn/simple/
+
+# Build and push to a remote registry.
+bash examples/swe_agent_blackbox/build_tool.sh --registry swr.cn-east-3.myhuaweicloud.com/openyuanrong
+```
+
+The `mini_swe` image uses `python-build-standalone` to build an isolated Python
+runtime. The final `FROM scratch` image contains only the files needed under
+`/opt/mini-swe-agent`, and it does not depend on the Python version installed in
+the sandbox base image.
+
+After pushing the image, point runtime inference at it with `SWE_AGENT_TOOL_IMAGE`:
+
+```bash
+SWE_AGENT_TOOL_IMAGE=swr.cn-east-3.myhuaweicloud.com/openyuanrong/mini-swe-agent-tool:latest \
+RUNNER=mini_swe \
+bash examples/swe_agent_blackbox/scripts/run_infer.sh
+```
+
+### Claude Code Tool Image
+
+Claude Code must be selected explicitly with `--tool claude_code`:
+
+```bash
+# Use the default npm registry.
+bash examples/swe_agent_blackbox/build_tool.sh --tool claude_code
+
+# Use a custom npm registry.
+bash examples/swe_agent_blackbox/build_tool.sh \
+    --tool claude_code \
+    --npm-registry https://registry.npmmirror.com
+
+# Select the Claude Code npm package version.
+bash examples/swe_agent_blackbox/build_tool.sh \
+    --tool claude_code \
+    --tool-version latest
+
+# Build and push the Claude Code sidecar image.
+bash examples/swe_agent_blackbox/build_tool.sh \
+    --tool claude_code \
+    --registry swr.cn-east-3.myhuaweicloud.com/openyuanrong
+```
+
+The Claude Code image uses `node:20-bookworm-slim` as the builder stage and
+installs `@anthropic-ai/claude-code` into `/opt/claude-code`. The final image is
+also a `FROM scratch` sidecar image. At runtime, the runner mounts it into the
+sandbox at `/opt/claude-code` and invokes `/opt/claude-code/bin/claude`.
+
+After pushing the image, point runtime inference at it with `SWE_AGENT_TOOL_IMAGE`:
+
+```bash
+SWE_AGENT_TOOL_IMAGE=swr.cn-east-3.myhuaweicloud.com/openyuanrong/claude-code-tool:latest \
+RUNNER=claude_code \
+bash examples/swe_agent_blackbox/scripts/run_infer.sh
+```
+
+### Combined Build Options
+
+`--tool`, image tags, mirrors, and registries can be combined:
+
+```bash
+bash examples/swe_agent_blackbox/build_tool.sh \
+    --tool mini_swe \
+    --pip-index https://pypi.tuna.tsinghua.edu.cn/simple/ \
+    --registry swr.cn-east-3.myhuaweicloud.com/openyuanrong
+```
+
+The build script:
+
+1. Selects the Dockerfile and default image name from `--tool`:
+   - `mini_swe` -> `mini-swe-agent-tool:latest`
+   - `claude_code` -> `claude-code-tool:latest`
+2. Tags and pushes the image when `--registry` is provided.
+
+Both tool images are sidecar runtime dependencies, not SWE-bench task base
+images. The `mini_swe` Python runtime is fully isolated from the sandbox
+container's Python. The `claude_code` Node/npm dependencies live only under
+`/opt/claude-code`, so the sandbox base image does not need Node installed.
+
+### Build Environment Variables
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `TOOL_IMAGE` | `mini-swe-agent-tool` / `claude-code-tool` | Image name; the default changes with `TOOL_KIND` |
+| `TOOL_TAG` | `latest` | Image tag |
+| `TOOL_VERSION` | `latest` | Tool package version; for `claude_code`, this selects the `@anthropic-ai/claude-code` npm package version |
+| `PIP_INDEX_URL` | unset, use PyPI | pip index URL; equivalent to `--pip-index` |
+| `TOOL_KIND` | `mini_swe` | Tool kind: `mini_swe` or `claude_code` |
+| `NPM_REGISTRY` | unset, use npm default | npm registry URL; equivalent to `--npm-registry` |
+
+## 2. Inference With OpenYuanRong Sandbox
+
+### Using run_infer.sh
+
+```bash
+cd "$(git rev-parse --show-toplevel)"
+
+RUNNER=mini_swe \
+SWE_AGENT_TOOL_IMAGE=swr.cn-east-3.myhuaweicloud.com/openyuanrong/mini-swe-agent-tool:latest \
+MODEL_PATH=$HOME/models/Qwen3.5-9B \
+DATA_PATH=$HOME/data/swe_agent/r2e_gym.parquet \
+MAX_SAMPLES=1 \
+TP=1 \
+bash examples/swe_agent_blackbox/scripts/run_infer.sh
+```
+
+### Calling Python Directly
+
+```bash
+python examples/swe_agent_blackbox/parallel_infer.py \
+    --model-path ~/models/Qwen3.5-9B \
+    --data-path ~/data/swe_agent/r2e_gym.parquet \
+    --max-samples 1 \
+    --runner mini_swe \
+    --max-turns 100 \
+    --tensor-parallel-size 1
+```
+
+## 3. Inference
+
+### Environment Variables
+
+```bash
+export OPENYUANRONG_SERVER_ADDRESS="6.2.179.37:8888"
+export OPENYUANRONG_TOKEN="<your-token>"
+export DEPLOYMENT=openyuanrong
+```
+
+### Run mini_swe
+
+```bash
+RUNNER=mini_swe \
+OPENYUANRONG_SERVER_ADDRESS="6.2.179.37:8888" \
+OPENYUANRONG_TOKEN="<token>" \
+DEPLOYMENT=openyuanrong \
+SWE_AGENT_TOOL_IMAGE=swr.cn-east-3.myhuaweicloud.com/openyuanrong/mini-swe-agent-tool:latest \
+bash examples/swe_agent_blackbox/scripts/run_infer.sh
+```
+
+### Run Claude Code
+
+```bash
+RUNNER=claude_code \
+OPENYUANRONG_SERVER_ADDRESS="6.2.179.37:8888" \
+OPENYUANRONG_TOKEN="<token>" \
+DEPLOYMENT=openyuanrong \
+SWE_AGENT_TOOL_IMAGE=swr.cn-east-3.myhuaweicloud.com/openyuanrong/claude-code-tool:latest \
+SWE_AGENT_MAX_TURNS=50 \
+SWE_AGENT_RUN_TIMEOUT=7200 \
+bash examples/swe_agent_blackbox/scripts/run_infer.sh
+```
+
+## 4. Training (Fully Async)
+
+```bash
+OPENYUANRONG_SERVER_ADDRESS="6.2.179.37:8888" \
+OPENYUANRONG_TOKEN="<token>" \
+MODEL_PATH=~/models/Qwen3.5-9B \
+bash examples/swe_agent_blackbox/scripts/run_train_megatron_async.sh
+```
+
+The training YAML keeps `mini_swe` as the default runner:
+
+```yaml
+agent_runner_fqn: examples.swe_agent_blackbox.mini_swe_agent_runner.mini_swe_agent_runner
+```
+
+To run training with Claude Code, keep the YAML unchanged and override the runner
+FQN from the launch command:
+
+```bash
+python3 -m verl.experimental.fully_async_policy.fully_async_main \
+  --config-path examples/swe_agent_blackbox/config \
+  --config-name swe_agent_blackbox_megatron_async \
+  actor_rollout_ref.rollout.custom.agent_framework.agent_runner_fqn=examples.swe_agent_blackbox.claude_code_runner.claude_code_runner
+```
+
+## 5. Configuration
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `SWE_AGENT_MAX_TURNS` | `100` | Max agent steps |
+| `SWE_AGENT_TOOL_IMAGE` | `swr.cn-east-3.myhuaweicloud.com/openyuanrong/mini-swe-agent-tool:latest` | Sidecar tool image |
+| `DEBUG_MODE` | (unset) | Set to 1 to enable debug logging |
diff --git a/examples/blackbox_recipes/mini_swe_agent/__init__.py b/examples/blackbox_recipes/mini_swe_agent/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/examples/blackbox_recipes/mini_swe_agent/config/agent_config.yaml b/examples/blackbox_recipes/mini_swe_agent/config/agent_config.yaml
new file mode 100644
index 00000000..b7352b72
--- /dev/null
+++ b/examples/blackbox_recipes/mini_swe_agent/config/agent_config.yaml
@@ -0,0 +1,36 @@
+- name: swe_agent
+
+  _target_: uni_agent.agent_loop.UniAgentLoop
+  concurrency: 64
+  log_dir: /tmp/swebench_qwen3_coder
+  mask_abnormal_exit_traj: false
+
+  interaction:
+    action_timeout: 300
+    max_turns: 100
+
+  env:
+    deployment:
+      type: local
+      command: /usr/bin/python3 -m swerex.server --auth-token {token}
+      timeout: 600
+      startup_timeout: 600
+      container_runtime: docker
+    env_variables:
+      PIP_PROGRESS_BAR: "off"
+      PIP_CACHE_DIR: "~/.cache/pip"
+      PAGER: "cat"
+      MANPAGER: "cat"
+      LESS: "-R"
+      TQDM_DISABLE: "1"
+      GIT_PAGER: "cat"
+
+  tool_parser: qwen3_coder
+
+  tools:
+    - name: str_replace_editor
+    - name: execute_bash
+    - name: submit
+
+  reward:
+    eval_timeout: 600
diff --git a/examples/blackbox_recipes/mini_swe_agent/config/agent_config_openyuanrong.yaml b/examples/blackbox_recipes/mini_swe_agent/config/agent_config_openyuanrong.yaml
new file mode 100644
index 00000000..b298c676
--- /dev/null
+++ b/examples/blackbox_recipes/mini_swe_agent/config/agent_config_openyuanrong.yaml
@@ -0,0 +1,37 @@
+- name: swe_agent
+
+  _target_: uni_agent.agent_loop.UniAgentLoop
+  concurrency: 64
+  log_dir: /tmp/swebench_qwen3_coder
+  mask_abnormal_exit_traj: false
+
+  interaction:
+    action_timeout: 300
+    max_turns: 100
+
+  env:
+    deployment:
+      type: openyuanrong
+      command: /opt/swe-rex/bin/python /opt/swe-rex/bin/swerex-remote --host 0.0.0.0 --port {port} --auth-token {token}
+      timeout: 600
+      startup_timeout: 600
+      swerex_runtime_image: swr.cn-east-3.myhuaweicloud.com/openyuanrong/swerex-runtime:1.4.0
+      swerex_runtime_target: /opt/swe-rex
+    env_variables:
+      PIP_PROGRESS_BAR: "off"
+      PIP_CACHE_DIR: "~/.cache/pip"
+      PAGER: "cat"
+      MANPAGER: "cat"
+      LESS: "-R"
+      TQDM_DISABLE: "1"
+      GIT_PAGER: "cat"
+
+  tool_parser: qwen3_coder
+
+  tools:
+    - name: str_replace_editor
+    - name: execute_bash
+    - name: submit
+
+  reward:
+    eval_timeout: 600
diff --git a/examples/blackbox_recipes/mini_swe_agent/config/parallel_infer.yaml b/examples/blackbox_recipes/mini_swe_agent/config/parallel_infer.yaml
new file mode 100644
index 00000000..0829fdcd
--- /dev/null
+++ b/examples/blackbox_recipes/mini_swe_agent/config/parallel_infer.yaml
@@ -0,0 +1,31 @@
+# Parallel inference config for the blackbox SWE-agent recipe.
+# Composes verl's base configs with inference-specific overrides.
+
+defaults:
+  - model_engine: dp
+  - actor@actor_rollout_ref.actor: ${model_engine}_actor
+  - rollout@actor_rollout_ref.rollout: rollout
+  - model@actor_rollout_ref.model: hf_model
+  - reward: reward
+  - _self_
+
+hydra:
+  searchpath:
+    - pkg://verl.trainer.config
+
+actor_rollout_ref:
+  hybrid_engine: true
+  nccl_timeout: 600
+  model: {}
+  rollout:
+    agent: {}
+
+trainer:
+  nnodes: 1
+  n_gpus_per_node: 8
+  logger:
+    - console
+  device: cuda
+  total_epochs: 1
+  total_training_steps: null
+  balance_batch: false
diff --git a/examples/blackbox_recipes/mini_swe_agent/config/swe_agent_blackbox.yaml b/examples/blackbox_recipes/mini_swe_agent/config/swe_agent_blackbox.yaml
new file mode 100644
index 00000000..62b73da1
--- /dev/null
+++ b/examples/blackbox_recipes/mini_swe_agent/config/swe_agent_blackbox.yaml
@@ -0,0 +1,123 @@
+# PPO trainer config for the blackbox SWE-agent recipe (v2).
+# Uses the generic AgentFrameworkRolloutAdapter + SWEAgentFramework subclass.
+
+hydra:
+  searchpath:
+    - pkg://verl.trainer.config
+
+defaults:
+  - ppo_trainer
+  - _self_
+
+actor_rollout_ref:
+  hybrid_engine: true
+  nccl_timeout: 600
+
+  model:
+    path: ???
+    enable_gradient_checkpointing: true
+
+  rollout:
+    name: vllm
+    mode: async
+    prompt_length: 4096
+    response_length: 131072
+    max_model_len: 135168
+    temperature: 1.0
+    top_p: 1.0
+    n: 8
+    tensor_model_parallel_size: 4
+    gpu_memory_utilization: 0.7
+    calculate_log_probs: true
+    enable_sleep_mode: true
+    free_cache_engine: true
+
+    multi_turn:
+      enable: true
+      max_assistant_turns: 1
+      max_parallel_calls: 1
+      format: qwen3_coder
+
+    agent:
+      num_workers: 8
+      agent_loop_manager_class: uni_agent.trainer.framework.entry.AgentFrameworkRolloutAdapter
+
+    custom:
+      agent_framework:
+        framework_class_fqn: examples.swe_agent_blackbox.framework.SWEAgentFramework
+        agent_runner_fqn: examples.swe_agent_blackbox.mini_swe_agent_runner.mini_swe_agent_runner
+        gateway_count: 1
+        completion_timeout_seconds: 600
+        max_concurrent_sessions: 32
+        agent_runner_kwargs:
+          agent_config_path: examples/swe_agent_blackbox/config/agent_config.yaml
+
+  actor:
+    use_dynamic_bsz: true
+    ppo_mini_batch_size: 16
+    use_kl_loss: false
+    kl_loss_coef: 0.0
+    clip_ratio_low: 0.2
+    clip_ratio_high: 0.28
+    loss_agg_mode: token-mean
+    optim:
+      lr: 1e-6
+      weight_decay: 0.1
+      clip_grad: 1.0
+    fsdp_config:
+      param_offload: true
+      optimizer_offload: true
+      grad_offload: true
+
+data:
+  train_files: ???
+  val_files: ???
+  max_prompt_length: 4096
+  max_response_length: 131072
+  train_batch_size: 128
+  val_batch_size: 128
+  return_raw_chat: true
+  trust_remote_code: true
+  custom_cls:
+    path: pkg://examples.swe_agent_blackbox.dataset
+    name: SWEBenchDataset
+
+algorithm:
+  gamma: 1.0
+  lam: 1.0
+  adv_estimator: grpo
+  use_kl_in_reward: false
+  kl_ctrl:
+    type: fixed
+    kl_coef: 0.0
+
+reward:
+  custom_reward_function:
+    path: pkg://examples/swe_agent_blackbox.reward
+    name: compute_score
+
+trainer:
+  use_legacy_worker_impl: disable
+  nnodes: 1
+  n_gpus_per_node: 8
+  total_epochs: 10
+  project_name: swe_agent_blackbox
+  experiment_name: swe_agent
+  logger:
+    - console
+  device: cuda
+  balance_batch: false
+  val_before_train: true
+  val_only: false
+  save_freq: 10
+  test_freq: 10
+  default_local_dir: checkpoints/swe_agent_blackbox
+  resume_mode: disable
+
+ray_kwargs:
+  ray_init:
+    runtime_env:
+      env_vars:
+        TRANSFER_QUEUE_ENABLE: ""
+        NCCL_P2P_DISABLE: "1"
+        NCCL_SHM_DISABLE: "1"
diff --git a/examples/blackbox_recipes/mini_swe_agent/config/swe_agent_blackbox_megatron_async.yaml b/examples/blackbox_recipes/mini_swe_agent/config/swe_agent_blackbox_megatron_async.yaml
new file mode 100644
index 00000000..d25fcce5
--- /dev/null
+++ b/examples/blackbox_recipes/mini_swe_agent/config/swe_agent_blackbox_megatron_async.yaml
@@ -0,0 +1,162 @@
+# Megatron + TQ fully-async training config for the blackbox SWE-agent recipe.
+# Uses FullyAsyncAgentFrameworkRolloutAdapter + SWEAgentFramework with Megatron backend.
+#
+# Entry point: python3 -m verl.experimental.fully_async_policy.fully_async_main
+# Requires: transfer_queue.enable=true (selects TQ path in FullyAsyncTaskRunner)
+
+hydra:
+  searchpath:
+    - pkg://verl.trainer.config
+
+defaults:
+  - ppo_megatron_trainer
+  - _self_
+
+actor_rollout_ref:
+  hybrid_engine: false
+  nccl_timeout: 9600
+
+  model:
+    path: ???
+
+  rollout:
+    name: vllm
+    mode: async
+    prompt_length: 4096
+    response_length: 131072
+    max_model_len: 135168
+    temperature: 1.0
+    top_p: 1.0
+    n: 8
+    tensor_model_parallel_size: 2
+    gpu_memory_utilization: 0.7
+    calculate_log_probs: true
+    enable_sleep_mode: true
+    free_cache_engine: true
+    enable_chunked_prefill: true
+    max_num_batched_tokens: 135168
+    checkpoint_engine:
+      backend: nccl
+
+    multi_turn:
+      enable: true
+      max_assistant_turns: 1
+      max_parallel_calls: 1
+      format: qwen3_coder
+
+    agent:
+      num_workers: 8
+      agent_loop_manager_class: uni_agent.trainer.framework.entry.FullyAsyncAgentFrameworkRolloutAdapter
+
+    custom:
+      agent_framework:
+        framework_class_fqn: examples.swe_agent_blackbox.framework.SWEAgentFramework
+        agent_runner_fqn: examples.swe_agent_blackbox.mini_swe_agent_runner.mini_swe_agent_runner
+        gateway_count: 1
+        completion_timeout_seconds: 600
+        max_concurrent_sessions: 32
+        agent_runner_kwargs:
+          agent_config_path: examples/swe_agent_blackbox/config/agent_config.yaml
+
+  actor:
+    use_dynamic_bsz: true
+    use_rollout_log_probs: true
+    ppo_mini_batch_size: 16
+    ppo_micro_batch_size_per_gpu: 1
+    use_kl_loss: false
+    kl_loss_coef: 0.0
+    clip_ratio_low: 0.2
+    clip_ratio_high: 0.28
+    clip_ratio_c: 10.0
+    loss_agg_mode: token-mean
+    entropy_coeff: 0
+    optim:
+      lr: 1e-6
+      weight_decay: 0.1
+      lr_decay_style: constant
+    megatron:
+      param_offload: true
+      grad_offload: true
+      optimizer_offload: true
+      tensor_model_parallel_size: 8
+      pipeline_model_parallel_size: 1
+      context_parallel_size: 1
+      use_mbridge: true
+      use_remove_padding: false
+
+  ref:
+    megatron:
+      param_offload: false
+      tensor_model_parallel_size: 8
+      pipeline_model_parallel_size: 1
+      context_parallel_size: 1
+
+data:
+  train_files: ???
+  val_files: ???
+  prompt_key: prompt
+  truncation: left
+  max_prompt_length: 4096
+  max_response_length: 131072
+  train_batch_size: 0
+  gen_batch_size: 1
+  return_raw_chat: true
+  trust_remote_code: true
+  custom_cls:
+    path: pkg://examples.swe_agent_blackbox.dataset
+    name: SWEBenchDataset
+
+algorithm:
+  gamma: 1.0
+  lam: 1.0
+  adv_estimator: grpo
+  use_kl_in_reward: false
+  kl_ctrl:
+    type: fixed
+    kl_coef: 0.0
+  rollout_correction:
+    bypass_mode: true
+
+reward:
+  custom_reward_function:
+    path: pkg://examples.swe_agent_blackbox.reward
+    name: compute_score
+
+trainer:
+  nnodes: 1
+  n_gpus_per_node: 8
+  total_epochs: 10
+  project_name: swe_agent_blackbox
+  experiment_name: swe_agent
+  logger:
+    - console
+  device: cuda
+  val_before_train: true
+  val_only: false
+  save_freq: 10
+  test_freq: 10
+  default_local_dir: checkpoints/swe_agent_blackbox
+  resume_mode: auto
+
+rollout:
+  nnodes: 1
+  n_gpus_per_node: 8
+  total_rollout_steps: 100000
+
+async_training:
+  use_trainer_do_validate: false
+  staleness_threshold: 1.0
+  trigger_parameter_sync_step: 4
+  require_batches: 1
+  partial_rollout: true
+
+transfer_queue:
+  enable: true
+
+ray_kwargs:
+  ray_init:
+    runtime_env:
+      env_vars:
+        TRANSFER_QUEUE_ENABLE: ""
+        NCCL_P2P_DISABLE: "1"
+        NCCL_SHM_DISABLE: "1"
diff --git a/examples/blackbox_recipes/mini_swe_agent/config/swe_agent_blackbox_megatron_sync.yaml b/examples/blackbox_recipes/mini_swe_agent/config/swe_agent_blackbox_megatron_sync.yaml
new file mode 100644
index 00000000..65b09b1a
--- /dev/null
+++ b/examples/blackbox_recipes/mini_swe_agent/config/swe_agent_blackbox_megatron_sync.yaml
@@ -0,0 +1,129 @@
+# Megatron sync training config for the blackbox SWE-agent recipe.
+# Uses main_ppo_sync + Megatron backend, same blackbox infrastructure as FSDP.
+#
+# Entry point: python3 -m verl.trainer.main_ppo_sync
+
+hydra:
+  searchpath:
+    - pkg://verl.trainer.config
+
+defaults:
+  - ppo_megatron_trainer
+  - _self_
+
+actor_rollout_ref:
+  hybrid_engine: true
+  nccl_timeout: 600
+
+  model:
+    path: ???
+    enable_gradient_checkpointing: true
+
+  rollout:
+    name: vllm
+    mode: async
+    prompt_length: 4096
+    response_length: 131072
+    max_model_len: 135168
+    temperature: 1.0
+    top_p: 1.0
+    n: 8
+    tensor_model_parallel_size: 4
+    gpu_memory_utilization: 0.7
+    calculate_log_probs: true
+    enable_sleep_mode: true
+    free_cache_engine: true
+
+    multi_turn:
+      enable: true
+      max_assistant_turns: 1
+      max_parallel_calls: 1
+      format: qwen3_coder
+
+    agent:
+      num_workers: 8
+      agent_loop_manager_class: uni_agent.trainer.framework.entry.AgentFrameworkRolloutAdapter
+
+    custom:
+      agent_framework:
+        framework_class_fqn: examples.swe_agent_blackbox.framework.SWEAgentFramework
+        agent_runner_fqn: examples.swe_agent_blackbox.mini_swe_agent_runner.mini_swe_agent_runner
+        gateway_count: 1
+        completion_timeout_seconds: 600
+        max_concurrent_sessions: 32
+        agent_runner_kwargs:
+          agent_config_path: examples/swe_agent_blackbox/config/agent_config.yaml
+
+  actor:
+    use_dynamic_bsz: true
+    ppo_mini_batch_size: 16
+    use_kl_loss: false
+    kl_loss_coef: 0.0
+    clip_ratio_low: 0.2
+    clip_ratio_high: 0.28
+    loss_agg_mode: token-mean
+    optim:
+      lr: 1e-6
+      weight_decay: 0.1
+      clip_grad: 1.0
+    megatron:
+      param_offload: true
+      grad_offload: true
+      optimizer_offload: true
+      tensor_model_parallel_size: 8
+      pipeline_model_parallel_size: 1
+      context_parallel_size: 1
+      use_mbridge: true
+
+data:
+  train_files: ???
+  val_files: ???
+  max_prompt_length: 4096
+  max_response_length: 131072
+  train_batch_size: 128
+  val_batch_size: 128
+  return_raw_chat: true
+  trust_remote_code: true
+  custom_cls:
+    path: pkg://examples.swe_agent_blackbox.dataset
+    name: SWEBenchDataset
+
+algorithm:
+  gamma: 1.0
+  lam: 1.0
+  adv_estimator: grpo
+  use_kl_in_reward: false
+  kl_ctrl:
+    type: fixed
+    kl_coef: 0.0
+
+reward:
+  custom_reward_function:
+    path: pkg://examples.swe_agent_blackbox.reward
+    name: compute_score
+
+trainer:
+  use_legacy_worker_impl: disable
+  nnodes: 1
+  n_gpus_per_node: 8
+  total_epochs: 10
+  project_name: swe_agent_blackbox
+  experiment_name: swe_agent
+  logger:
+    - console
+  device: cuda
+  balance_batch: false
+  val_before_train: true
+  val_only: false
+  save_freq: 10
+  test_freq: 10
+  default_local_dir: checkpoints/swe_agent_blackbox
+  resume_mode: disable
+
+ray_kwargs:
+  ray_init:
+    runtime_env:
+      env_vars:
+        TRANSFER_QUEUE_ENABLE: ""
+        NCCL_P2P_DISABLE: "1"
+        NCCL_SHM_DISABLE: "1"
diff --git a/examples/blackbox_recipes/mini_swe_agent/dataset.py b/examples/blackbox_recipes/mini_swe_agent/dataset.py
new file mode 100644
index 00000000..89d65129
--- /dev/null
+++ b/examples/blackbox_recipes/mini_swe_agent/dataset.py
@@ -0,0 +1,34 @@
+"""SWEBench-specific dataset that injects verl-standard reward fields."""
+
+from verl.utils.dataset.rl_dataset import RLHFDataset
+
+
+def extract_image(env_config: dict) -> str:
+    """Extract Docker image from env config, supporting both flat and nested formats.
+
+    Flat:   env_config["image"]
+    Nested: env_config["deployment"]["image"]
+    """
+    image = env_config.get("image")
+    if image:
+        return image
+    deployment = env_config.get("deployment")
+    if isinstance(deployment, dict):
+        image = deployment.get("image")
+        if image:
+            return image
+    return ""
+
+
+class SWEBenchDataset(RLHFDataset):
+
+    def __getitem__(self, item):
+        row_dict = super().__getitem__(item)
+        extra_info = row_dict.get("extra_info", {})
+        tools_kwargs = extra_info.get("tools_kwargs", {})
+        reward_config = tools_kwargs.get("reward", {})
+
+        row_dict.setdefault("data_source", reward_config.get("name", "unknown"))
+        row_dict.setdefault("reward_model", {"ground_truth": {}})
+
+        return row_dict
diff --git a/examples/blackbox_recipes/mini_swe_agent/framework.py b/examples/blackbox_recipes/mini_swe_agent/framework.py
new file mode 100644
index 00000000..7c5c027c
--- /dev/null
+++ b/examples/blackbox_recipes/mini_swe_agent/framework.py
@@ -0,0 +1,105 @@
+"""SWE-agent specific framework subclass.
+
+Injects reward_info (from agent_runner's complete_session call)
+into sample_fields["extra_info"] so the reward worker's
+compute_score can access it via extra_info.
+
+Overrides _run_session to execute agent_runner in a separate Ray worker
+process, preventing blocking operations from stalling the event loop.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import functools
+import logging
+from dataclasses import replace
+from uuid import uuid4
+
+import ray
+
+from uni_agent.trainer.framework.framework import OpenAICompatibleAgentFramework
+
+from examples.swe_agent_blackbox.subprocess_runner import remote_agent_run
+
+logger = logging.getLogger(__name__)
+
+
+class SWEAgentFramework(OpenAICompatibleAgentFramework):
+
+    async def _score_trajectories(self, session_trajectories, sample_fields):
+        if session_trajectories and session_trajectories[-1].reward_info:
+            reward_info = session_trajectories[-1].reward_info
+            extra_info = dict(sample_fields.get("extra_info") or {})
+            sample_fields = {**sample_fields, "extra_info": {**extra_info, **reward_info}}
+        return await super()._score_trajectories(session_trajectories, sample_fields)
+
+    def _resolve_runner(self) -> tuple[str, dict]:
+        """Extract FQN and pre-bound kwargs from self.agent_runner.
+
+        self.agent_runner may be a functools.partial (from_config wraps it),
+        so we unpack the original function and its keywords.
+        """
+        fn = self.agent_runner
+        kwargs = {}
+        if isinstance(fn, functools.partial):
+            kwargs = dict(fn.keywords)
+            fn = fn.func
+        fqn = f"{fn.__module__}.{fn.__qualname__}"
+        return fqn, kwargs
+
+    async def _run_session(
+        self,
+        *,
+        prompts,
+        raw_prompt,
+        sample_index: int,
+        session_id: str | None = None,
+        runner_kwargs: dict | None = None,
+    ):
+        """Run agent_runner in a Ray worker process instead of in-process."""
+        session_id = session_id or f"session-{sample_index}-0-{uuid4().hex}"
+        sample_fields = self._extract_sample_fields(prompts=prompts, sample_index=sample_index)
+        session = await self.session_runtime.create_session(session_id)
+        agent_runner_fqn, resolved_kwargs = self._resolve_runner()
+
+        try:
+            if runner_kwargs:
+                resolved_kwargs = {**resolved_kwargs, **runner_kwargs}
+
+            ref = remote_agent_run.remote(
+                agent_runner_fqn=agent_runner_fqn,
+                raw_prompt=raw_prompt,
+                session_id=session_id,
+                base_url=session.base_url,
+                sample_index=sample_index,
+                runner_kwargs=resolved_kwargs,
+            )
+            loop = asyncio.get_running_loop()
+            reward_info = await loop.run_in_executor(None, ray.get, ref)
+
+            await self.session_runtime.complete_session(
+                session_id, reward_info=reward_info,
+            )
+            session_trajectories = await self.session_runtime.finalize_session(session_id)
+
+        except Exception as e:
+            logger.error("_run_session failed: session=%s, sample=%d, runner=%s: %s",
+                         session_id, sample_index, agent_runner_fqn, e, exc_info=True)
+            await self.session_runtime.abort_session(session_id)
+            raise
+
+        if not self.reward_loop_worker_handles or not session_trajectories:
+            return session_trajectories, sample_fields
+
+        annotations = await self._score_trajectories(session_trajectories, sample_fields)
+        scored_trajectories = []
+        for traj, (score, extra) in zip(session_trajectories, annotations, strict=True):
+            scored_trajectories.append(
+                replace(
+                    traj,
+                    reward_score=score,
+                    extra_fields={**traj.extra_fields, "reward_extra_info": extra},
+                )
+            )
+        return scored_trajectories, sample_fields
diff --git a/examples/blackbox_recipes/mini_swe_agent/mini_swe_agent_runner.py b/examples/blackbox_recipes/mini_swe_agent/mini_swe_agent_runner.py
new file mode 100644
index 00000000..33882bc8
--- /dev/null
+++ b/examples/blackbox_recipes/mini_swe_agent/mini_swe_agent_runner.py
@@ -0,0 +1,227 @@
+"""Mini-swe-agent runner for the blackbox SWE-agent recipe.
+
+Agent runs inside a OpenYuanRong remote sandbox via sidecar tool image mount.
+The runner creates the sandbox, pipes task config via stdin, parses
+the result from stdout, and evaluates reward in the same sandbox.
+"""
+
+from __future__ import annotations
+
+import base64
+import json
+import logging
+import os
+import shlex
+import time
+from pathlib import Path
+
+from uni_agent.trainer.framework.types import SessionHandle, SessionRuntime
+
+from examples.swe_agent_blackbox.dataset import extract_image
+from examples.swe_agent_blackbox.reward import build_reward_context, evaluate_in_env
+from examples.swe_agent_blackbox.sandbox import CommandResult, YRSandbox, extract_upstream, rewrite_gateway_url
+
+logger = logging.getLogger(__name__)
+if os.environ.get("DEBUG_MODE"):
+    logger.setLevel(logging.DEBUG)
+
+DEFAULT_TOOL_IMAGE = "swr.cn-east-3.myhuaweicloud.com/openyuanrong/mini-swe-agent-tool:latest"
+
+
+class SandboxEnvForReward:
+    """Adapts :class:`YRSandbox` to the async env interface used by
+    reward specs (``communicate``, ``write_file``, ``read_file``).
+    """
+
+    def __init__(self, sandbox):
+        self._sandbox = sandbox
+
+    async def communicate(self, input: str, timeout=600, check="ignore", error_msg="Command failed") -> str:
+        result = await self._sandbox.run(input, timeout=int(timeout))
+        if check == "raise" and result.exit_code != 0:
+            raise RuntimeError(f"{error_msg}: {result.stdout[:200]}")
+        return result.stdout
+
+    async def write_file(self, path: str | Path, content: str) -> None:
+        encoded = base64.b64encode(content.encode()).decode()
+        await self.communicate(f"echo {encoded} | base64 -d > {path}", check="raise", error_msg=f"write {path}")
+
+    async def read_file(self, path: str | Path, **_) -> str:
+        return await self.communicate(f"cat {path}")
+
+
+def _extract_task(raw_prompt) -> str:
+    """Extract task text from raw_prompt (str or message list)."""
+    if isinstance(raw_prompt, str):
+        return raw_prompt
+    return next(
+        (m["content"] for m in raw_prompt if isinstance(m, dict) and m.get("role") == "user"),
+        str(raw_prompt),
+    )
+
+
+def _build_task_config(
+    *,
+    task: str,
+    gateway_url: str,
+) -> dict:
+    """Build the task config passed to run_agent.py via stdin."""
+    agent_gateway_url = rewrite_gateway_url(gateway_url)
+    step_limit = int(os.environ.get("SWE_AGENT_MAX_TURNS", "100"))
+    return {
+        "task": task,
+        "gateway_url": agent_gateway_url,
+        "agent": {
+            "step_limit": step_limit,
+        },
+    }
+
+
+def build_agent_command(
+    *,
+    config_b64: str,
+    conda_env: str = "testbed",
+) -> str:
+    """Build the command that runs run_agent.py inside the sandbox."""
+    conda_prefix = f"/opt/miniconda3/envs/{conda_env}"
+    env_prefix = (
+        f"CONDA_DEFAULT_ENV={shlex.quote(conda_env)} "
+        f"CONDA_PREFIX={shlex.quote(conda_prefix)} "
+        f"PATH={shlex.quote(conda_prefix + '/bin')}:/opt/miniconda3/bin:$PATH"
+    )
+    return (
+        "unset HTTP_PROXY HTTPS_PROXY http_proxy https_proxy NO_PROXY no_proxy; "
+        f"{env_prefix} "
+        f"echo {config_b64} | base64 -d | "
+        "/opt/mini-swe-agent/bin/python /opt/mini-swe-agent/bin/run_agent.py"
+    )
+
+
+async def mini_swe_agent_runner(
+    *,
+    raw_prompt,
+    session: SessionHandle,
+    sample_index: int,
+    session_runtime: SessionRuntime,
+    tools_kwargs: dict | None = None,
+    tool_image: str = DEFAULT_TOOL_IMAGE,
+    run_timeout: int = 7200,
+    conda_env: str = "testbed",
+    **kwargs,
+) -> None:
+    """Run mini-swe-agent inside a sandbox with sidecar tool mount.
+
+    Flow:
+        1. Create OpenYuanRong remote sandbox with mini-swe-agent sidecar
+        2. Pipe task config to run_agent.py via stdin
+        3. Parse agent result from stdout
+        4. Evaluate reward in the same sandbox
+        5. Complete session with reward_info
+    """
+    tools_kwargs = tools_kwargs or {}
+    logger.info("mini_swe_agent_runner called, sample_index=%d", sample_index)
+
+    # Extract task text and sandbox config (image from parquet)
+    task = _extract_task(raw_prompt)
+    logger.info("task extracted, %d chars", len(task))
+
+    env_config = tools_kwargs.get("env", {})
+    image = extract_image(env_config)
+    if not image:
+        raise ValueError(f"No sandbox image found in tools_kwargs.env for sample {sample_index}")
+
+    # Gateway URL — extract upstream for OpenYuanRong tunnel
+    gateway_url = session.base_url
+    if not gateway_url:
+        raise ValueError(f"gateway_url is empty for sample {sample_index}")
+
+    upstream = extract_upstream(gateway_url)
+    sandbox = await YRSandbox.create(
+        image=image, sidecar_image=tool_image, upstream=upstream,
+    )
+    sandbox_id = sandbox.sandbox_id
+    logger.info("Sandbox created (image=%s, sandbox_id=%s)", image, sandbox_id)
+
+    # Build task config (gateway URL rewritten to sandbox-internal tunnel)
+    task_config = _build_task_config(
+        task=task,
+        gateway_url=gateway_url,
+    )
+
+    try:
+        # Run post_setup_cmd if provided (e.g. git checkout correct commit)
+        post_setup_cmd = env_config.get("post_setup_cmd", "")
+        if post_setup_cmd:
+            logger.info("Running post_setup_cmd (%d chars)...", len(post_setup_cmd))
+            r = await sandbox.run(post_setup_cmd, timeout=600)
+            if r.exit_code != 0:
+                logger.warning("post_setup_cmd failed (rc=%d): %s", r.exit_code, r.stdout[:200])
+            else:
+                logger.info("post_setup_cmd done")
+
+        # Run agent inside sandbox — pipe config via base64-encoded stdin.
+        config_b64 = base64.b64encode(json.dumps(task_config).encode()).decode()
+        agent_cmd = build_agent_command(config_b64=config_b64, conda_env=conda_env)
+        logger.debug("[sample %d] starting agent inside sandbox", sample_index)
+        t0 = time.perf_counter()
+        agent_result = await sandbox.run(agent_cmd, timeout=int(run_timeout))
+        elapsed = time.perf_counter() - t0
+        logger.debug(
+            "[sample %d] agent process finished: rc=%d (%.1fs)",
+            sample_index, agent_result.exit_code, elapsed,
+        )
+
+        # Parse agent result from stdout
+        agent_info = _parse_agent_result(agent_result.stdout, sample_index)
+        logger.info(
+            "[sample %d] agent: exit_status=%s, submission=%d chars",
+            sample_index, agent_info.get("exit_status"),
+            len(agent_info.get("submission", "")),
+        )
+
+        # Evaluate reward in the same sandbox
+        metadata, eval_timeout = build_reward_context(tools_kwargs)
+        t0 = time.perf_counter()
+        reward_env = SandboxEnvForReward(sandbox)
+        score, eval_result = await evaluate_in_env(reward_env, metadata, eval_timeout)
+        logger.debug(
+            "[sample %d] reward done: score=%s, resolved=%s (%.1fs)",
+            sample_index, score, eval_result.get("resolved"), time.perf_counter() - t0,
+        )
+
+        reward_info = {"reward_score": score, **eval_result}
+        await session_runtime.complete_session(session.session_id, reward_info=reward_info)
+
+    except Exception as e:
+        logger.warning("Mini-swe-agent runner failed for sample %d (sandbox_id=%s): %s", sample_index, sandbox_id, e)
+        raise
+    finally:
+        try:
+            await sandbox.cleanup()
+        except Exception:
+            pass
+
+
+def _parse_agent_result(stdout: str, sample_index: int) -> dict:
+    """Parse agent result JSON from run_agent.py stdout.
+
+    litellm may print error messages to stdout, polluting the output.
+    The last line starting with '{' is the result JSON.
+    """
+    stdout = stdout.strip()
+    if not stdout:
+        return {"exit_status": "error", "submission": ""}
+    # Try the last line that looks like JSON first
+    lines = [l.strip() for l in stdout.split("\n") if l.strip()]
+    for line in reversed(lines):
+        if line.startswith("{"):
+            try:
+                return json.loads(line)
+            except json.JSONDecodeError:
+                continue
+    # Fallback: try entire stdout
+    try:
+        return json.loads(stdout)
+    except json.JSONDecodeError:
+        logger.warning("[sample %d] Failed to parse agent result (full stdout): %s", sample_index, stdout[:1000])
+        return {"exit_status": "error", "submission": ""}
diff --git a/examples/blackbox_recipes/mini_swe_agent/parallel_infer.py b/examples/blackbox_recipes/mini_swe_agent/parallel_infer.py
new file mode 100644
index 00000000..c74765e0
--- /dev/null
+++ b/examples/blackbox_recipes/mini_swe_agent/parallel_infer.py
@@ -0,0 +1,447 @@
+"""Parallel inference runner for the blackbox SWE-agent recipe (v2).
+
+Creates an LLM server, GatewayServingRuntime, and SWEAgentFramework,
+then runs agent sessions in parallel and reports resolve rate.
+
+Usage (CLI):
+    python examples/swe_agent_blackbox/parallel_infer.py \
+        --model-path ~/models/Qwen3-Coder-30B-A3B-Instruct \
+        --data-path ~/data/swe_agent/swe_bench_verified.parquet \
+        --max-samples 10
+"""
+
+from __future__ import annotations
+
+import argparse
+import asyncio
+import json
+import logging
+import os
+from functools import partial
+from typing import Any
+from uuid import uuid4
+
+import numpy as np
+import ray
+
+from verl import DataProto
+from verl.protocol import pad_dataproto_to_divisor
+from verl.utils import hf_tokenizer
+from verl.utils.transferqueue_utils import tq as _tq_mock
+from verl.workers.rollout.llm_server import LLMServerManager
+
+from uni_agent.trainer.gateway.runtime import GatewayServingRuntime
+
+from examples.swe_agent_blackbox.framework import SWEAgentFramework
+from examples.swe_agent_blackbox.agent_runner import swe_agent_runner
+from examples.swe_agent_blackbox.claude_code_runner import claude_code_runner
+
+try:
+    from examples.swe_agent_blackbox.mini_swe_agent_runner import mini_swe_agent_runner
+except ImportError:
+    mini_swe_agent_runner = None
+
+logging.basicConfig(
+    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+    level=os.getenv("VERL_LOGGING_LEVEL", "INFO"),
+    force=True,
+)
+logger = logging.getLogger(__name__)
+
+
+# =====================================================================
+# Dataset loading (inlined from dataset.py — only used here)
+# =====================================================================
+
+
+def _remap_image_to_local(image_name: str) -> str:
+    parts = image_name.split("/")
+    if len(parts) > 1 and "." in parts[0]:
+        basename = parts[-1]
+    else:
+        basename = image_name
+    basename = basename.replace("_1776_", "__")
+    if ":" in basename:
+        basename = basename.rsplit(":", 1)[0]
+    return f"{basename}:latest"
+
+
+def _remap_sample_images(sample: dict[str, Any]) -> dict[str, Any]:
+    extra_info = sample.get("extra_info")
+    if not extra_info:
+        return sample
+    tools_kwargs = extra_info.get("tools_kwargs", {})
+    env = tools_kwargs.get("env", {})
+    image = env.get("image")
+    if not image:
+        return sample
+    local_image = _remap_image_to_local(image)
+    if local_image != image:
+        logger.debug("Remapping image: %s -> %s", image, local_image)
+        env["image"] = local_image
+    return sample
+
+
+def _inject_reward_fields(sample: dict[str, Any]) -> None:
+    """Inject verl-standard data_source and reward_model from extra_info.tools_kwargs.reward."""
+    extra_info = sample.get("extra_info", {})
+    tools_kwargs = extra_info.get("tools_kwargs", {})
+    reward_config = tools_kwargs.get("reward", {})
+    sample.setdefault("data_source", reward_config.get("name", "unknown"))
+    sample.setdefault("reward_model", {"ground_truth": {}})
+
+
+def load_swe_dataset(data_path: str | list[str], max_samples: int = -1) -> list[dict[str, Any]]:
+    import pyarrow.parquet as pq
+
+    if isinstance(data_path, list):
+        paths = [os.path.expanduser(p) for p in data_path]
+    else:
+        paths = os.path.expanduser(data_path)
+
+    logger.info("Loading dataset from: %s", data_path)
+    if isinstance(paths, list):
+        import pyarrow as pa
+        tables = [pq.read_table(p) for p in paths]
+        table = pa.concat_tables(tables)
+    else:
+        table = pq.read_table(paths)
+    samples = table.to_pylist()
+
+    for i, sample in enumerate(samples):
+        samples[i] = _remap_sample_images(sample)
+        _inject_reward_fields(samples[i])
+
+    if max_samples > 0:
+        samples = samples[:max_samples]
+        logger.info("Using first %d samples (max_samples=%d)", len(samples), max_samples)
+
+    logger.info("Loaded %d samples from %s", len(samples), data_path)
+    return samples
+
+
+class _MockReplayBuffer:
+    """Minimal replay buffer for inference mode (no actual training)."""
+
+    def add(self, partition_id, items):
+        pass
+
+
+def run_inference(
+    *,
+    model_path: str,
+    data_path: str,
+    prompt_length: int = 4096,
+    response_length: int = 65536,
+    temperature: float = 0.8,
+    top_p: float = 0.9,
+    n: int = 1,
+    max_samples: int = -1,
+    engine: str = "vllm",
+    nnodes: int = 1,
+    n_gpus_per_node: int = 8,
+    tensor_parallel_size: int = 4,
+    gateway_count: int = 1,
+    max_concurrent_sessions: int = 2,
+    completion_timeout: float = 600.0,
+    tool_parser: str | None = None,
+    agent_config_path: str | None = None,
+    runner: str = "uniagent",
+    tool_image: str | None = None,
+    run_timeout: int = 7200,
+) -> dict[str, Any]:
+    """Run parallel SWE-agent inference using the blackbox framework."""
+    if runner == "mini_swe":
+        if mini_swe_agent_runner is None:
+            raise ImportError("mini-swe-agent is required for --runner mini_swe. Install with: pip install mini-swe-agent")
+        _agent_runner = partial(
+            mini_swe_agent_runner,
+            tool_image=tool_image or "swr.cn-east-3.myhuaweicloud.com/openyuanrong/mini-swe-agent-tool:latest",
+            run_timeout=run_timeout,
+        )
+    elif runner == "claude_code":
+        _agent_runner = partial(
+            claude_code_runner,
+            tool_image=tool_image or "claude-code-tool:latest",
+            run_timeout=run_timeout,
+        )
+    else:
+        _agent_runner = swe_agent_runner
+
+    if not ray.is_initialized():
+        ray.init()
+
+    # 1. Init Hydra config
+    config = _init_hydra_config(
+        model_path=model_path,
+        engine=engine,
+        prompt_length=prompt_length,
+        response_length=response_length,
+        temperature=temperature,
+        top_p=top_p,
+        n=n,
+        nnodes=nnodes,
+        n_gpus_per_node=n_gpus_per_node,
+        tensor_parallel_size=tensor_parallel_size,
+    )
+
+    # 2. Load dataset
+    samples = load_swe_dataset(data_path, max_samples=max_samples)
+    logger.info(
+        "Loaded %d samples, %d rollout(s) each, runner=%s, gateway_count=%d, max_concurrent_sessions=%d",
+        len(samples),
+        n,
+        runner,
+        gateway_count,
+        max_concurrent_sessions,
+    )
+
+    if not samples:
+        raise ValueError("No samples to process")
+
+    # 3. Create LLM server
+    logger.info("Initializing LLM server manager...")
+    llm_server_manager = LLMServerManager.create(config=config)
+
+    # 4. Create GatewayServingRuntime
+    logger.info("Using tool_parser=%r", tool_parser)
+
+    llm_client = llm_server_manager.get_client()
+    gateway_actor_kwargs = {
+        "tokenizer": hf_tokenizer(os.path.expanduser(model_path)),
+        "base_sampling_params": {"temperature": temperature, "top_p": top_p, "max_tokens": response_length},
+    }
+    if tool_parser:
+        gateway_actor_kwargs["tool_parser_name"] = tool_parser
+
+    gateway_runtime = GatewayServingRuntime(
+        llm_client=llm_client,
+        gateway_count=gateway_count,
+        gateway_actor_kwargs=gateway_actor_kwargs,
+    )
+
+    # 5. Create RewardLoopWorker for compute_score
+    from verl.experimental.reward_loop.reward_loop import RewardLoopWorker
+    reward_worker = ray.remote(RewardLoopWorker).remote(config, None)
+
+    # 6. Create framework
+    framework = SWEAgentFramework(
+        session_runtime=gateway_runtime,
+        agent_runner=_agent_runner,
+        replay_buffer=_MockReplayBuffer(),
+        rollout_config={"n": n, "val_kwargs": {"n": n}},
+        completion_timeout=completion_timeout,
+        wait_for_completion_after_agent_run=True,
+        max_concurrent_sessions=max_concurrent_sessions,
+        reward_loop_worker_handles=[reward_worker],
+    )
+
+    # 6. Build batch data and run
+    _tools_kwargs_list = []
+    for sample in samples:
+        tk = (sample.get("extra_info") or {}).get("tools_kwargs", {})
+        if runner == "uniagent" and agent_config_path:
+            tk["agent_config_path"] = agent_config_path
+        tk["model_path"] = os.path.expanduser(model_path)
+        _tools_kwargs_list.append(tk)
+
+    from tensordict import TensorDict
+    from verl.utils import tensordict_utils as _tu
+
+    raw_prompts = [sample["prompt"] for sample in samples]
+    uids = [str(uuid4()) for _ in samples]
+    td = TensorDict({"uid": uids, "global_steps": [0] * len(samples)}, batch_size=[len(samples)])
+    _tu.assign_non_tensor_stack(td, "raw_prompt", raw_prompts)
+    _tu.assign_non_tensor_stack(td, "tools_kwargs", _tools_kwargs_list)
+    _tu.assign_non_tensor_stack(td, "data_source", [sample["data_source"] for sample in samples])
+    _tu.assign_non_tensor_stack(td, "reward_model", [sample["reward_model"] for sample in samples])
+
+    batch = DataProto(batch=td, meta_info={}).repeat(n)
+
+    size_divisor = gateway_count
+    batch_padded, pad_size = pad_dataproto_to_divisor(batch, size_divisor)
+    logger.info("Starting %d agent session(s)...", len(batch_padded))
+
+    _tq_store: dict[str, Any] = {}
+
+    async def _dummy_kv_put(key, partition_id=None, tag=None, **kwargs):
+        _tq_store[key] = tag
+
+    async def _dummy_kv_batch_put(keys=None, fields=None, tags=None, partition_id=None, **kwargs):
+        for i, key in enumerate(keys):
+            _tq_store[key] = {"fields": fields, "tag": tags[i] if tags else None}
+
+    _tq_mock.async_kv_put = _dummy_kv_put
+    _tq_mock.async_kv_batch_put = _dummy_kv_batch_put
+
+    async def _generate():
+        return await framework.generate_sequences(batch_padded.batch)
+
+    try:
+        stats = asyncio.run(_generate())
+    except RuntimeError as e:
+        logger.warning("generate_sequences failed: %s", e)
+        stats = {}
+
+    # 7. Collect scores
+    uid_to_sample_idx = {uid: i for i, uid in enumerate(uids)}
+    per_sample_scores = [0.0] * len(samples)
+    sample_trajectory_counts = [0] * len(samples)
+    for key, value in _tq_store.items():
+        if not isinstance(value, dict) or "fields" not in value:
+            continue
+        fields = value["fields"]
+        rm_scores = fields.get("rm_scores", None)
+        if rm_scores is None:
+            continue
+        # Key format: {uid}_{session_index}_{index}
+        uid = key.rsplit("_", 2)[0]
+        sample_idx = uid_to_sample_idx.get(uid)
+        if sample_idx is None:
+            continue
+        score = float(rm_scores.float()[-1, -1].item())
+        per_sample_scores[sample_idx] += score
+        sample_trajectory_counts[sample_idx] += 1
+
+    for i in range(len(samples)):
+        if sample_trajectory_counts[i] > 0:
+            per_sample_scores[i] /= sample_trajectory_counts[i]
+
+    resolved_count = sum(1 for s in per_sample_scores if s > 0)
+    overall_mean = float(np.mean(per_sample_scores)) if per_sample_scores else 0.0
+    logger.info(
+        "Resolved %d / %d samples (%.2f%%), mean score: %.4f",
+        resolved_count, len(samples), 100.0 * resolved_count / max(len(samples), 1), overall_mean,
+    )
+
+    # 8. Cleanup
+    asyncio.run(gateway_runtime.shutdown())
+
+    return {
+        "stats": stats,
+        "mean_score": overall_mean,
+        "per_sample_scores": per_sample_scores,
+    }
+
+
+# =====================================================================
+# Helpers
+# =====================================================================
+
+
+def _init_hydra_config(
+    *,
+    model_path: str,
+    engine: str,
+    prompt_length: int,
+    response_length: int,
+    temperature: float,
+    top_p: float,
+    n: int,
+    nnodes: int,
+    n_gpus_per_node: int,
+    tensor_parallel_size: int,
+) -> Any:
+    """Initialize Hydra config with rollout/model settings."""
+    from hydra import compose, initialize_config_dir
+    from omegaconf import OmegaConf
+
+    config_dir = os.path.abspath("examples/swe_agent_blackbox/config")
+    with initialize_config_dir(config_dir=config_dir, version_base=None):
+        config = compose(config_name="parallel_infer")
+
+    config.actor_rollout_ref.model.path = os.path.expanduser(model_path)
+    config.actor_rollout_ref.rollout.name = engine
+    config.actor_rollout_ref.rollout.mode = "async"
+    config.actor_rollout_ref.rollout.prompt_length = prompt_length
+    config.actor_rollout_ref.rollout.response_length = response_length
+    config.actor_rollout_ref.rollout.max_model_len = prompt_length + response_length + 1024
+    config.actor_rollout_ref.rollout.n = n
+    config.actor_rollout_ref.rollout.tensor_model_parallel_size = tensor_parallel_size
+    config.actor_rollout_ref.rollout.gpu_memory_utilization = float(os.getenv("ROLLOUT_GPU_MEM_UTIL", "0.5"))
+    config.actor_rollout_ref.rollout.temperature = temperature
+    config.actor_rollout_ref.rollout.top_p = top_p
+    config.actor_rollout_ref.rollout.val_kwargs.temperature = temperature
+    config.actor_rollout_ref.rollout.val_kwargs.top_p = top_p
+    config.actor_rollout_ref.rollout.calculate_log_probs = True
+    config.actor_rollout_ref.rollout.multi_turn.max_assistant_turns = 100
+    config.actor_rollout_ref.rollout.multi_turn.max_parallel_calls = 1
+    config.actor_rollout_ref.rollout.nnodes = nnodes
+    config.actor_rollout_ref.rollout.n_gpus_per_node = n_gpus_per_node
+    config.trainer.nnodes = nnodes
+    config.trainer.n_gpus_per_node = n_gpus_per_node
+
+    config.reward.custom_reward_function.path = "pkg://examples.swe_agent_blackbox.reward"
+    config.reward.custom_reward_function.name = "compute_score"
+    config.reward.num_workers = 1
+
+    OmegaConf.set_struct(config.actor_rollout_ref.rollout, False)
+    config.actor_rollout_ref.rollout.enable_sleep_mode = False
+    config.actor_rollout_ref.rollout.enforce_eager = os.getenv("ROLLOUT_ENFORCE_EAGER", "0") == "1"
+    OmegaConf.set_struct(config.actor_rollout_ref.rollout, True)
+    return config
+
+
+# =====================================================================
+# CLI entry point
+# =====================================================================
+
+
+def main():
+    parser = argparse.ArgumentParser(description="SWE-Agent Blackbox Parallel Inference")
+    parser.add_argument("--data-path", type=str, default="~/data/swe_agent/swe_bench_verified.parquet")
+    parser.add_argument("--model-path", "--model", type=str, default="~/models/Qwen3-Coder-30B-A3B-Instruct")
+    parser.add_argument("--max-turns", type=int, default=100)
+    parser.add_argument("--prompt-length", type=int, default=4096)
+    parser.add_argument("--response-length", type=int, default=65536)
+    parser.add_argument("--temperature", type=float, default=0.8)
+    parser.add_argument("--top-p", type=float, default=0.9)
+    parser.add_argument("--n", type=int, default=1)
+    parser.add_argument("--max-samples", type=int, default=-1)
+    parser.add_argument("--engine", type=str, default="vllm", choices=["vllm", "sglang"])
+    parser.add_argument("--nnodes", type=int, default=1)
+    parser.add_argument("--n-gpus-per-node", type=int, default=8)
+    parser.add_argument("--tensor-parallel-size", "--tp", type=int, default=4)
+    parser.add_argument("--gateway-count", type=int, default=1)
+    parser.add_argument("--max-concurrent-sessions", type=int, default=2)
+    parser.add_argument("--tool-parser", type=str, default="qwen3_coder")
+    parser.add_argument("--tool-image", type=str, default=None)
+    parser.add_argument("--run-timeout", type=int, default=7200)
+    parser.add_argument(
+        "--runner", type=str, default="uniagent", choices=["uniagent", "mini_swe", "claude_code"],
+        help="Agent runner: 'uniagent', 'mini_swe', or 'claude_code'.",
+    )
+    parser.add_argument(
+        "--agent-config-path", type=str,
+        default="examples/swe_agent_blackbox/config/agent_config.yaml",
+        help="Path to agent config YAML.",
+    )
+    args = parser.parse_args()
+
+    os.environ["SWE_AGENT_MAX_TURNS"] = str(args.max_turns)
+
+    run_inference(
+        model_path=args.model_path,
+        data_path=args.data_path,
+        prompt_length=args.prompt_length,
+        response_length=args.response_length,
+        temperature=args.temperature,
+        top_p=args.top_p,
+        n=args.n,
+        max_samples=args.max_samples,
+        engine=args.engine,
+        nnodes=args.nnodes,
+        n_gpus_per_node=args.n_gpus_per_node,
+        tensor_parallel_size=args.tensor_parallel_size,
+        gateway_count=args.gateway_count,
+        max_concurrent_sessions=args.max_concurrent_sessions,
+        tool_parser=args.tool_parser,
+        agent_config_path=args.agent_config_path,
+        runner=args.runner,
+        tool_image=args.tool_image,
+        run_timeout=args.run_timeout,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/blackbox_recipes/mini_swe_agent/reward.py b/examples/blackbox_recipes/mini_swe_agent/reward.py
new file mode 100644
index 00000000..61da218b
--- /dev/null
+++ b/examples/blackbox_recipes/mini_swe_agent/reward.py
@@ -0,0 +1,74 @@
+"""Reward utilities for the blackbox SWE-agent recipe.
+
+Contains:
+- build_reward_context: extract reward metadata + eval_timeout from tools_kwargs
+- compute_score: thin reward function that reads reward_score from extra_info
+- evaluate_in_env: run reward evaluation in Docker env (shared by both runners)
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+from typing import Any
+
+logger = logging.getLogger(__name__)
+
+
+def build_reward_context(tools_kwargs: dict) -> tuple[dict[str, Any], int]:
+    """Extract reward metadata and eval_timeout from per-sample tools_kwargs."""
+    reward_config = tools_kwargs.get("reward", {})
+    metadata = {
+        "data_source": reward_config.get("name", "unknown"),
+        "reward_model": reward_config.get("metadata", {}),
+    }
+    eval_timeout = int(os.environ.get("SWE_AGENT_EVAL_TIMEOUT", "600"))
+    return metadata, eval_timeout
+
+
+def compute_score(data_source: str, solution_str: str, ground_truth: str, extra_info=None) -> dict:
+    """Read reward_score from extra_info, injected by SWEAgentFramework."""
+    score = 0.0
+    if extra_info and "reward_score" in extra_info:
+        score = float(extra_info["reward_score"])
+    return {"score": score}
+
+
+def _get_reward_spec(data_source: str):
+    """Load reward spec class by data_source name."""
+    from uni_agent.reward.registry import REWARD_SPEC_REGISTRY, _load_reward_spec_module
+
+    if data_source not in REWARD_SPEC_REGISTRY:
+        _load_reward_spec_module(data_source)
+    cls = REWARD_SPEC_REGISTRY.get(data_source)
+    if cls is None:
+        raise ValueError(f"Unknown data_source: {data_source}. Available: {list(REWARD_SPEC_REGISTRY.keys())}")
+    return cls
+
+
+async def evaluate_in_env(
+    env,
+    metadata: dict[str, Any],
+    eval_timeout: int = 600,
+) -> tuple[float, dict]:
+    """Run reward evaluation in the Docker env.
+
+    Returns (score, eval_result) where score is 1.0/0.0 and
+    eval_result contains details (eval_completed, resolved, etc.).
+    """
+    data_source = metadata.get("data_source", "unknown")
+    reward_model = metadata.get("reward_model", {})
+
+    spec_cls = _get_reward_spec(data_source)
+    spec_metadata = reward_model.get("ground_truth", reward_model)
+
+    spec = spec_cls(
+        run_id="swe_bb_eval",
+        metadata=spec_metadata,
+        env=env,
+        eval_timeout=eval_timeout,
+    )
+
+    resolved, result = await spec.compute_reward()
+    score = 1.0 if resolved else 0.0
+    return score, result
diff --git a/examples/blackbox_recipes/mini_swe_agent/run_agent.py b/examples/blackbox_recipes/mini_swe_agent/run_agent.py
new file mode 100644
index 00000000..c5a4b165
--- /dev/null
+++ b/examples/blackbox_recipes/mini_swe_agent/run_agent.py
@@ -0,0 +1,106 @@
+#!/opt/mini-swe-agent/bin/python
+"""Run mini-swe-agent inside the sandbox.
+
+Input:  task config JSON from **stdin**
+    - task: str — the issue description for the agent to solve
+    - gateway_url: str — LLM gateway endpoint (tunnel URL for OpenYuanRong sandbox)
+    - agent: dict — agent config (e.g. step_limit)
+
+Output: agent result JSON to **stdout**, or error JSON on failure
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import sys
+
+DEFAULT_ACTION_TIMEOUT = 600
+
+
+def _fail(msg: str, exit_status: str = "error") -> None:
+    """Write error result to stdout and exit."""
+    sys.stdout.write(json.dumps({"exit_status": exit_status, "submission": "", "error": msg}))
+    sys.stdout.write("\n")
+    sys.stdout.flush()
+
+
+def main() -> None:
+    try:
+        # 1. Read task config from stdin
+        config = json.load(sys.stdin)
+        task = config["task"]
+        gateway_url = config["gateway_url"]
+
+        # 2. Load swebench defaults
+        from minisweagent.config import builtin_config_dir, get_config_from_spec
+
+        swebench_cfg = get_config_from_spec(str(builtin_config_dir / "benchmarks" / "swebench.yaml"))
+
+        # 3. Create LocalEnvironment (use swebench defaults)
+        from minisweagent.environments.local import LocalEnvironment
+
+        env_cfg = dict(swebench_cfg.get("environment", {}))
+        env_cfg.pop("environment_class", None)
+        env_cfg["timeout"] = DEFAULT_ACTION_TIMEOUT
+        env_cfg.setdefault("env", {})
+        env_cfg["env"].setdefault("GIT_PAGER", "cat")
+        for key in ("image", "container_timeout", "run_args", "executable", "pull_timeout",
+                    "forward_env", "interpreter"):
+            env_cfg.pop(key, None)
+        env = LocalEnvironment(**env_cfg)
+
+        # 4. Create LitellmModel pointing at gateway
+        from minisweagent.models.litellm_model import LitellmModel
+
+        model_defaults = dict(swebench_cfg.get("model", {}))
+        model_defaults.pop("model_name", None)
+        model_defaults.pop("model_kwargs", None)
+        model_cfg = model_defaults
+        model_cfg.update({
+            "model_name": "openai/default",
+            "model_kwargs": {
+                "api_base": gateway_url,
+                "api_key": "not-needed",
+                "drop_params": True,
+            },
+            "cost_tracking": "ignore_errors",
+        })
+        model = LitellmModel(**model_cfg)
+
+        # 5. Create DefaultAgent
+        from minisweagent.agents.default import DefaultAgent
+
+        agent_defaults = dict(swebench_cfg.get("agent", {}))
+        agent_overrides = config.get("agent", {})
+        agent_defaults.update(agent_overrides)
+        agent_cfg = agent_defaults
+        step_limit = agent_cfg.get("step_limit", 100)
+        agent_cfg["step_limit"] = step_limit
+        agent = DefaultAgent(model, env, **agent_cfg)
+
+        # 6. Run agent
+        try:
+            info = agent.run(task=task)
+        except Exception as e:
+            info = {"exit_status": type(e).__name__, "submission": str(e)}
+
+        # 7. Write result to stdout
+        result = {
+            "exit_status": info.get("exit_status", "unknown"),
+            "submission": info.get("submission", ""),
+            "model_stats": {
+                "instance_cost": agent.cost,
+                "api_calls": agent.n_calls,
+            },
+        }
+        sys.stdout.write(json.dumps(result, ensure_ascii=False))
+        sys.stdout.write("\n")
+        sys.stdout.flush()
+
+    except Exception as e:
+        _fail(str(e), exit_status=type(e).__name__)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/blackbox_recipes/mini_swe_agent/subprocess_runner.py b/examples/blackbox_recipes/mini_swe_agent/subprocess_runner.py
new file mode 100644
index 00000000..b03dc7d7
--- /dev/null
+++ b/examples/blackbox_recipes/mini_swe_agent/subprocess_runner.py
@@ -0,0 +1,61 @@
+"""Ray-based subprocess runner for agent_runner execution.
+
+Launches agent_runner in a separate Ray worker process to prevent blocking
+operations (sleep, sync I/O, etc.) from stalling the framework's event loop.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+from typing import Any
+
+import ray
+
+from uni_agent.trainer.framework.types import SessionHandle
+
+logger = logging.getLogger(__name__)
+
+
+class _StubSessionRuntime:
+    """Captures reward_info from agent_runner's complete_session call."""
+
+    def __init__(self):
+        self.reward_info: dict[str, Any] | None = None
+
+    async def complete_session(self, session_id: str, reward_info: dict[str, Any] | None = None):
+        self.reward_info = reward_info
+
+
+@ray.remote(num_cpus=0)
+def remote_agent_run(
+    agent_runner_fqn: str,
+    raw_prompt,
+    session_id: str,
+    base_url: str,
+    sample_index: int,
+    runner_kwargs: dict,
+) -> dict[str, Any] | None:
+    """Run agent_runner in a dedicated Ray worker process."""
+    from verl.utils.import_utils import load_class_from_fqn
+
+    agent_runner = load_class_from_fqn(agent_runner_fqn)
+    stub_runtime = _StubSessionRuntime()
+    handle = SessionHandle(session_id=session_id, base_url=base_url)
+
+    async def _run():
+        try:
+            await agent_runner(
+                raw_prompt=raw_prompt,
+                session=handle,
+                sample_index=sample_index,
+                session_runtime=stub_runtime,
+                **runner_kwargs,
+            )
+            return stub_runtime.reward_info
+        except Exception as e:
+            logger.error("remote_agent_run failed: session_id=%s, sample=%d, error=%s",
+                         session_id, sample_index, e, exc_info=True)
+            raise
+
+    return asyncio.run(_run())
diff --git a/examples/blackbox_recipes/sandbox/sandbox.py b/examples/blackbox_recipes/sandbox/sandbox.py
new file mode 100644
index 00000000..fb21ac94
--- /dev/null
+++ b/examples/blackbox_recipes/sandbox/sandbox.py
@@ -0,0 +1,10 @@
+"""OpenYuanRong (AKernel) remote sandbox command execution.
+
+Uses ``akernel_sdk.Sandbox`` with sidecar ``Mount`` to inject the
+mini-swe-agent tool image.  Supports upstream tunnel so the agent
+inside the sandbox can reach the gateway via ``http://127.0.0.1:<proxy_port>``.
+"""
+
+#TODO
+
+
diff --git a/examples/blackbox_recipes/scripts/build_tool.sh b/examples/blackbox_recipes/scripts/build_tool.sh
new file mode 100755
index 00000000..e5158629
--- /dev/null
+++ b/examples/blackbox_recipes/scripts/build_tool.sh
@@ -0,0 +1,75 @@
+#!/usr/bin/env bash
+# Build a SWE blackbox sidecar tool image.
+#
+# Usage:
+#   bash examples/swe_agent_blackbox/build_tool.sh
+#   bash examples/swe_agent_blackbox/build_tool.sh --tool claude_code
+#   bash examples/swe_agent_blackbox/build_tool.sh --pip-index https://pypi.tuna.tsinghua.edu.cn/simple/
+#   bash examples/swe_agent_blackbox/build_tool.sh --npm-registry https://registry.npmmirror.com
+#   bash examples/swe_agent_blackbox/build_tool.sh --tool-version latest
+#   bash examples/swe_agent_blackbox/build_tool.sh --registry reg.antgroup-inc.cn/myrepo
+#
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+TOOL_KIND="${TOOL_KIND:-mini_swe}"
+IMAGE_TAG="${TOOL_TAG:-latest}"
+TOOL_VERSION="${TOOL_VERSION:-latest}"
+
+# Parse args
+REGISTRY=""
+PIP_INDEX_URL="${PIP_INDEX_URL:-}"
+NPM_REGISTRY="${NPM_REGISTRY:-}"
+while [[ $# -gt 0 ]]; do
+    case "$1" in
+        --tool) TOOL_KIND="$2"; shift 2 ;;
+        --registry) REGISTRY="$2"; shift 2 ;;
+        --pip-index) PIP_INDEX_URL="$2"; shift 2 ;;
+        --npm-registry) NPM_REGISTRY="$2"; shift 2 ;;
+        --tool-version) TOOL_VERSION="$2"; shift 2 ;;
+        *) echo "Unknown arg: $1"; exit 1 ;;
+    esac
+done
+
+BUILD_ARGS=()
+DOCKERFILE="${SCRIPT_DIR}/Dockerfile.mini-swe-agent-tool"
+if [[ "${TOOL_KIND}" == "claude" ]]; then
+    TOOL_KIND="claude_code"
+fi
+if [[ "${TOOL_KIND}" == "claude_code" ]]; then
+    IMAGE_NAME="${TOOL_IMAGE:-claude-code-tool}"
+    DOCKERFILE="${SCRIPT_DIR}/Dockerfile.claude-code-tool"
+    BUILD_ARGS+=(--build-arg "TOOL_VERSION=${TOOL_VERSION}")
+    if [[ -n "${NPM_REGISTRY}" ]]; then
+        BUILD_ARGS+=(--build-arg "NPM_REGISTRY=${NPM_REGISTRY}")
+    fi
+elif [[ "${TOOL_KIND}" == "mini_swe" ]]; then
+    IMAGE_NAME="${TOOL_IMAGE:-mini-swe-agent-tool}"
+    if [[ -n "${PIP_INDEX_URL}" ]]; then
+        BUILD_ARGS+=(--build-arg PIP_INDEX_URL="${PIP_INDEX_URL}")
+    fi
+else
+    echo "Unknown tool: ${TOOL_KIND}; expected mini_swe or claude_code"
+    exit 1
+fi
+
+echo "==> Building ${TOOL_KIND} tool image: ${IMAGE_NAME}:${IMAGE_TAG}"
+docker build \
+    -f "${DOCKERFILE}" \
+    -t "${IMAGE_NAME}:${IMAGE_TAG}" \
+    "${BUILD_ARGS[@]}" \
+    "${SCRIPT_DIR}/"
+
+if [[ -n "${REGISTRY}" ]]; then
+    FULL_TAG="${REGISTRY}/${IMAGE_NAME}:${IMAGE_TAG}"
+    echo "==> Tagging and pushing: ${FULL_TAG}"
+    docker tag "${IMAGE_NAME}:${IMAGE_TAG}" "${FULL_TAG}"
+    docker push "${FULL_TAG}"
+    echo "    Pushed."
+fi
+
+echo ""
+echo "Tool image ready: ${IMAGE_NAME}:${IMAGE_TAG}"
+if [[ -n "${REGISTRY}" ]]; then
+    echo "  Remote sandbox: ${FULL_TAG}"
+fi
diff --git a/examples/blackbox_recipes/scripts/run_infer.sh b/examples/blackbox_recipes/scripts/run_infer.sh
new file mode 100755
index 00000000..d5703aa6
--- /dev/null
+++ b/examples/blackbox_recipes/scripts/run_infer.sh
@@ -0,0 +1,66 @@
+#!/usr/bin/env bash
+# Parallel inference for the blackbox SWE-agent recipe.
+#
+# Usage:
+#   bash examples/swe_agent_blackbox/scripts/run_infer.sh
+
+set -euo pipefail
+
+# ── Model & data ─────────────────────────────────────────────────────────
+MODEL_PATH="${MODEL_PATH:-$HOME/models/Qwen3.5-9B}"
+DATA_PATH="${DATA_PATH:-$HOME/data/swe_agent/swe_bench_verified.parquet}"
+
+# ── Inference parameters ─────────────────────────────────────────────────
+MAX_SAMPLES="${MAX_SAMPLES:--1}"
+PROMPT_LENGTH="${PROMPT_LENGTH:-4096}"
+RESPONSE_LENGTH="${RESPONSE_LENGTH:-65536}"
+TEMPERATURE="${TEMPERATURE:-1.0}"
+TOP_P="${TOP_P:-1.0}"
+N="${N:-8}"
+ENGINE="${ENGINE:-vllm}"
+TP="${TP:-4}"
+N_GPUS_PER_NODE="${N_GPUS_PER_NODE:-8}"
+GATEWAY_COUNT="${GATEWAY_COUNT:-1}"
+MAX_CONCURRENT_SESSIONS="${MAX_CONCURRENT_SESSIONS:-2}"
+
+# ── Agent parameters ─────────────────────────────────────────────────────
+RUNNER="${RUNNER:-uniagent}"
+AGENT_CONFIG_PATH="${AGENT_CONFIG_PATH:-examples/swe_agent_blackbox/config/agent_config.yaml}"
+export SWE_AGENT_MAX_TURNS="${SWE_AGENT_MAX_TURNS:-100}"
+export SWE_AGENT_EVAL_TIMEOUT="${SWE_AGENT_EVAL_TIMEOUT:-600}"
+SWE_AGENT_TOOL_IMAGE="${SWE_AGENT_TOOL_IMAGE:-}"
+SWE_AGENT_RUN_TIMEOUT="${SWE_AGENT_RUN_TIMEOUT:-7200}"
+
+# ── Logging ──────────────────────────────────────────────────────────────
+export VERL_LOGGING_LEVEL="${VERL_LOGGING_LEVEL:-INFO}"
+export ROLLOUT_GPU_MEM_UTIL="${ROLLOUT_GPU_MEM_UTIL:-0.5}"
+
+echo "=== SWE-Agent Blackbox Inference ==="
+echo "Model: ${MODEL_PATH}"
+echo "Data:  ${DATA_PATH}"
+echo "Max samples: ${MAX_SAMPLES}"
+echo "Engine: ${ENGINE} (TP=${TP})"
+echo "Runner: ${RUNNER}"
+echo "Gateway count: ${GATEWAY_COUNT}"
+echo "Max concurrent sessions: ${MAX_CONCURRENT_SESSIONS}"
+echo "====================================="
+
+python examples/swe_agent_blackbox/parallel_infer.py \
+    --model-path "${MODEL_PATH}" \
+    --data-path "${DATA_PATH}" \
+    --max-samples "${MAX_SAMPLES}" \
+    --prompt-length "${PROMPT_LENGTH}" \
+    --response-length "${RESPONSE_LENGTH}" \
+    --temperature "${TEMPERATURE}" \
+    --top-p "${TOP_P}" \
+    --n "${N}" \
+    --engine "${ENGINE}" \
+    --tensor-parallel-size "${TP}" \
+    --max-turns "${SWE_AGENT_MAX_TURNS}" \
+    --runner "${RUNNER}" \
+    --agent-config-path "${AGENT_CONFIG_PATH}" \
+    --n-gpus-per-node "${N_GPUS_PER_NODE}" \
+    --gateway-count "${GATEWAY_COUNT}" \
+    --max-concurrent-sessions "${MAX_CONCURRENT_SESSIONS}" \
+    --tool-image "${SWE_AGENT_TOOL_IMAGE}" \
+    --run-timeout "${SWE_AGENT_RUN_TIMEOUT}"
diff --git a/examples/blackbox_recipes/scripts/run_train.sh b/examples/blackbox_recipes/scripts/run_train.sh
new file mode 100755
index 00000000..cf08005d
--- /dev/null
+++ b/examples/blackbox_recipes/scripts/run_train.sh
@@ -0,0 +1,122 @@
+#!/usr/bin/env bash
+# Training launch script for the blackbox SWE-agent recipe.
+#
+# Uses GRPO + AgentFrameworkRolloutAdapter with reward computed in-process
+# by the agent runner, then passed through the reward worker's compute_score.
+#
+# Usage:
+#   bash examples/swe_agent_blackbox/scripts/run_train.sh
+#
+# All configurable via environment variables (see defaults below).
+
+set -euo pipefail
+
+# ── Model & data ─────────────────────────────────────────────────────────
+MODEL_PATH="${MODEL_PATH:-$HOME/models/Qwen3-Coder-30B-A3B-Instruct}"
+TRAIN_DATA="${TRAIN_DATA:-$HOME/data/swe_agent/swe_bench_verified.parquet}"
+VAL_DATA="${VAL_DATA:-$HOME/data/swe_agent/swe_bench_verified.parquet}"
+
+# ── Hardware ─────────────────────────────────────────────────────────────
+NNODES="${NNODES:-1}"
+NGPUS_PER_NODE="${NGPUS_PER_NODE:-8}"
+
+# ── Training parameters ─────────────────────────────────────────────────
+TRAIN_BATCH_SIZE="${TRAIN_BATCH_SIZE:-128}"
+PROMPT_LENGTH="${PROMPT_LENGTH:-4096}"
+RESPONSE_LENGTH="${RESPONSE_LENGTH:-131072}"
+ACTOR_LR="${ACTOR_LR:-1e-6}"
+TOTAL_EPOCHS="${TOTAL_EPOCHS:-10}"
+SAVE_FREQ="${SAVE_FREQ:-10}"
+TEST_FREQ="${TEST_FREQ:-10}"
+
+# ── Rollout parameters ──────────────────────────────────────────────────
+ENGINE="${ENGINE:-vllm}"
+TP="${TP:-4}"
+ROLLOUT_GPU_MEM_UTIL="${ROLLOUT_GPU_MEM_UTIL:-0.7}"
+N="${N:-8}"
+TEMPERATURE="${TEMPERATURE:-1.0}"
+
+# ── Agent parameters ─────────────────────────────────────────────────────
+RUNNER="${RUNNER:-mini_swe}"
+MAX_TURNS="${MAX_TURNS:-100}"
+AGENT_CONFIG_PATH="${AGENT_CONFIG_PATH:-examples/swe_agent_blackbox/config/agent_config.yaml}"
+COMPLETION_TIMEOUT="${COMPLETION_TIMEOUT:-600}"
+if [[ "${RUNNER}" == "claude_code" ]]; then
+    AGENT_RUNNER_FQN="examples.swe_agent_blackbox.claude_code_runner.claude_code_runner"
+    SWE_AGENT_TOOL_IMAGE="${SWE_AGENT_TOOL_IMAGE:-claude-code-tool:latest}"
+elif [[ "${RUNNER}" == "mini_swe" ]]; then
+    AGENT_RUNNER_FQN="examples.swe_agent_blackbox.mini_swe_agent_runner.mini_swe_agent_runner"
+    SWE_AGENT_TOOL_IMAGE="${SWE_AGENT_TOOL_IMAGE:-swr.cn-east-3.myhuaweicloud.com/openyuanrong/mini-swe-agent-tool:latest}"
+elif [[ "${RUNNER}" == "uniagent" ]]; then
+    AGENT_RUNNER_FQN="examples.swe_agent_blackbox.agent_runner.swe_agent_runner"
+    SWE_AGENT_TOOL_IMAGE=""
+else
+    echo "Unknown RUNNER=${RUNNER}; expected mini_swe, claude_code, or uniagent" >&2
+    exit 1
+fi
+SWE_AGENT_RUN_TIMEOUT="${SWE_AGENT_RUN_TIMEOUT:-7200}"
+RUNNER_ARGS=(
+    "actor_rollout_ref.rollout.custom.agent_framework.agent_runner_fqn=${AGENT_RUNNER_FQN}"
+)
+if [[ "${RUNNER}" != "uniagent" ]]; then
+    RUNNER_ARGS+=(
+        "+actor_rollout_ref.rollout.custom.agent_framework.agent_runner_kwargs.tool_image=${SWE_AGENT_TOOL_IMAGE}"
+        "+actor_rollout_ref.rollout.custom.agent_framework.agent_runner_kwargs.run_timeout=${SWE_AGENT_RUN_TIMEOUT}"
+    )
+fi
+
+# ── Logging ──────────────────────────────────────────────────────────────
+PROJECT_NAME="${PROJECT_NAME:-swe_agent_blackbox}"
+EXPERIMENT_NAME="${EXPERIMENT_NAME:-swe_agent_$(date +%Y%m%d_%H%M)}"
+VERL_LOGGING_LEVEL="${VERL_LOGGING_LEVEL:-INFO}"
+
+export SWE_AGENT_MAX_TURNS="${MAX_TURNS}"
+export SWE_AGENT_EVAL_TIMEOUT="${SWE_AGENT_EVAL_TIMEOUT:-600}"
+export VERL_LOGGING_LEVEL
+
+# ── Environment for NCCL ─────────────────────────────────────────────────
+export NCCL_P2P_DISABLE="${NCCL_P2P_DISABLE:-1}"
+export NCCL_SHM_DISABLE="${NCCL_SHM_DISABLE:-1}"
+
+echo "=== SWE-Agent Blackbox Training ==="
+echo "Model:       ${MODEL_PATH}"
+echo "Train data:  ${TRAIN_DATA}"
+echo "Val data:    ${VAL_DATA}"
+echo "Engine:      ${ENGINE} (TP=${TP})"
+echo "Runner:      ${RUNNER}"
+echo "Batch size:  ${TRAIN_BATCH_SIZE}, N=${N}"
+echo "Epochs:      ${TOTAL_EPOCHS}"
+echo "====================================="
+
+python3 -m verl.trainer.main_ppo_sync \
+    --config-name=swe_agent_blackbox \
+    --config-path="$(pwd)/examples/swe_agent_blackbox/config" \
+    actor_rollout_ref.model.path="${MODEL_PATH}" \
+    data.train_files="['${TRAIN_DATA}']" \
+    data.val_files="['${VAL_DATA}']" \
+    data.train_batch_size=${TRAIN_BATCH_SIZE} \
+    data.max_prompt_length=${PROMPT_LENGTH} \
+    data.max_response_length=${RESPONSE_LENGTH} \
+    actor_rollout_ref.rollout.name=${ENGINE} \
+    actor_rollout_ref.rollout.tensor_model_parallel_size=${TP} \
+    actor_rollout_ref.rollout.gpu_memory_utilization=${ROLLOUT_GPU_MEM_UTIL} \
+    actor_rollout_ref.rollout.n=${N} \
+    actor_rollout_ref.rollout.temperature=${TEMPERATURE} \
+    actor_rollout_ref.rollout.prompt_length=${PROMPT_LENGTH} \
+    actor_rollout_ref.rollout.response_length=${RESPONSE_LENGTH} \
+    actor_rollout_ref.rollout.max_model_len=$((PROMPT_LENGTH + RESPONSE_LENGTH + 1024)) \
+    actor_rollout_ref.rollout.multi_turn.max_assistant_turns=${MAX_TURNS} \
+    actor_rollout_ref.actor.optim.lr=${ACTOR_LR} \
+    actor_rollout_ref.rollout.nnodes=${NNODES} \
+    actor_rollout_ref.rollout.n_gpus_per_node=${NGPUS_PER_NODE} \
+    trainer.nnodes=${NNODES} \
+    trainer.n_gpus_per_node=${NGPUS_PER_NODE} \
+    trainer.total_epochs=${TOTAL_EPOCHS} \
+    trainer.save_freq=${SAVE_FREQ} \
+    trainer.test_freq=${TEST_FREQ} \
+    trainer.project_name=${PROJECT_NAME} \
+    trainer.experiment_name=${EXPERIMENT_NAME} \
+    actor_rollout_ref.rollout.custom.agent_framework.agent_runner_kwargs.agent_config_path="${AGENT_CONFIG_PATH}" \
+    actor_rollout_ref.rollout.custom.agent_framework.completion_timeout_seconds=${COMPLETION_TIMEOUT} \
+    "${RUNNER_ARGS[@]}" \
+    "$@"
diff --git a/examples/blackbox_recipes/scripts/run_train_megatron_async.sh b/examples/blackbox_recipes/scripts/run_train_megatron_async.sh
new file mode 100755
index 00000000..db3a8264
--- /dev/null
+++ b/examples/blackbox_recipes/scripts/run_train_megatron_async.sh
@@ -0,0 +1,199 @@
+#!/usr/bin/env bash
+# Megatron + TQ fully-async training for the blackbox SWE-agent recipe.
+#
+# Uses FullyAsyncAgentFrameworkRolloutAdapter + SWEAgentFramework with Megatron backend.
+# Data flows through TransferQueue (zero-copy) with ReplayBuffer flow control.
+#
+# Usage:
+#   bash examples/swe_agent_blackbox/scripts/run_train_megatron_async.sh
+#
+# All configurable via environment variables (see defaults below).
+
+set -euo pipefail
+
+# ── Model & data ─────────────────────────────────────────────────────────
+MODEL_PATH="${MODEL_PATH:-${HOME}/models/Qwen3.5-9B}"
+TRAIN_DATA="${TRAIN_DATA:-${HOME}/data/swe_agent/swe_rebench_filtered.parquet}"
+VAL_DATA="${VAL_DATA:-${HOME}/data/swe_agent/swe_bench_verified.parquet}"
+RUNTIME_ENV="${RUNTIME_ENV:-}"
+
+# ── Hardware ─────────────────────────────────────────────────────────────
+NNODES_TRAIN="${NNODES_TRAIN:-1}"
+NNODES_ROLLOUT="${NNODES_ROLLOUT:-1}"
+NGPUS_PER_NODE="${NGPUS_PER_NODE:-8}"
+
+# ── Algorithm ────────────────────────────────────────────────────────────
+CLIP_RATIO_LOW="${CLIP_RATIO_LOW:-0.2}"
+CLIP_RATIO_HIGH="${CLIP_RATIO_HIGH:-0.28}"
+ACTOR_LR="${ACTOR_LR:-1e-6}"
+
+# ── Sequence lengths ─────────────────────────────────────────────────────
+PROMPT_LENGTH="${PROMPT_LENGTH:-4096}"
+RESPONSE_LENGTH="${RESPONSE_LENGTH:-131072}"
+MAX_MODEL_LEN=$((PROMPT_LENGTH + RESPONSE_LENGTH))
+
+# ── Rollout parameters ───────────────────────────────────────────────────
+ENGINE="${ENGINE:-vllm}"
+GEN_TP="${GEN_TP:-2}"
+N="${N:-8}"
+TEMPERATURE="${TEMPERATURE:-1.0}"
+ROLLOUT_GPU_MEM_UTIL="${ROLLOUT_GPU_MEM_UTIL:-0.7}"
+
+# ── Megatron training parallelism ────────────────────────────────────────
+TRAIN_TP="${TRAIN_TP:-8}"
+TRAIN_PP="${TRAIN_PP:-1}"
+TRAIN_CP="${TRAIN_CP:-1}"
+OFFLOAD="${OFFLOAD:-True}"
+OPTIMIZER_OFFLOAD_FRACTION="${OFFLOAD_FRACTION:-1.0}"
+USE_MBRIDGE="${USE_MBRIDGE:-True}"
+PPO_MINI_BATCH_SIZE="${PPO_MINI_BATCH_SIZE:-16}"
+
+# ── Agent parameters ─────────────────────────────────────────────────────
+RUNNER="${RUNNER:-mini_swe}"
+MAX_TURNS="${MAX_TURNS:-100}"
+AGENT_CONFIG_PATH="${AGENT_CONFIG_PATH:-examples/swe_agent_blackbox/config/agent_config.yaml}"
+COMPLETION_TIMEOUT="${COMPLETION_TIMEOUT:-600}"
+if [[ "${RUNNER}" == "claude_code" ]]; then
+    AGENT_RUNNER_FQN="examples.swe_agent_blackbox.claude_code_runner.claude_code_runner"
+    SWE_AGENT_TOOL_IMAGE="${SWE_AGENT_TOOL_IMAGE:-claude-code-tool:latest}"
+elif [[ "${RUNNER}" == "mini_swe" ]]; then
+    AGENT_RUNNER_FQN="examples.swe_agent_blackbox.mini_swe_agent_runner.mini_swe_agent_runner"
+    SWE_AGENT_TOOL_IMAGE="${SWE_AGENT_TOOL_IMAGE:-swr.cn-east-3.myhuaweicloud.com/openyuanrong/mini-swe-agent-tool:latest}"
+elif [[ "${RUNNER}" == "uniagent" ]]; then
+    AGENT_RUNNER_FQN="examples.swe_agent_blackbox.agent_runner.swe_agent_runner"
+    SWE_AGENT_TOOL_IMAGE=""
+else
+    echo "Unknown RUNNER=${RUNNER}; expected mini_swe, claude_code, or uniagent" >&2
+    exit 1
+fi
+SWE_AGENT_RUN_TIMEOUT="${SWE_AGENT_RUN_TIMEOUT:-7200}"
+CONDA_ENV="${CONDA_ENV:-testbed}"
+RUNNER_ARGS=(
+    "actor_rollout_ref.rollout.custom.agent_framework.agent_runner_fqn=${AGENT_RUNNER_FQN}"
+)
+if [[ "${RUNNER}" != "uniagent" ]]; then
+    RUNNER_ARGS+=(
+        "+actor_rollout_ref.rollout.custom.agent_framework.agent_runner_kwargs.tool_image=${SWE_AGENT_TOOL_IMAGE}"
+        "+actor_rollout_ref.rollout.custom.agent_framework.agent_runner_kwargs.run_timeout=${SWE_AGENT_RUN_TIMEOUT}"
+        "+actor_rollout_ref.rollout.custom.agent_framework.agent_runner_kwargs.conda_env=${CONDA_ENV}"
+    )
+fi
+
+# ── OpenYuanRong (YR remote sandbox) ─────────────────────────────────────
+OPENYUANRONG_SERVER_ADDRESS="${OPENYUANRONG_SERVER_ADDRESS:-}"
+OPENYUANRONG_TOKEN="${OPENYUANRONG_TOKEN:-}"
+OPENYUANRONG_TUNNEL_SSL_VERIFY="${OPENYUANRONG_TUNNEL_SSL_VERIFY:-0}"
+
+# ── Async training ───────────────────────────────────────────────────────
+TOTAL_ROLLOUT_STEPS="${TOTAL_ROLLOUT_STEPS:-100000}"
+STALENESS_THRESHOLD="${STALENESS_THRESHOLD:-1.0}"
+TRIGGER_SYNC_STEP="${TRIGGER_SYNC_STEP:-4}"
+PARTIAL_ROLLOUT="${PARTIAL_ROLLOUT:-True}"
+
+# ── Logging & checkpointing ──────────────────────────────────────────────
+PROJECT_NAME="${PROJECT_NAME:-swe_agent_blackbox}"
+EXPERIMENT_NAME="${EXPERIMENT_NAME:-swe_agent_$(date +%Y%m%d_%H%M)}"
+SAVE_FREQ="${SAVE_FREQ:-10}"
+TEST_FREQ="${TEST_FREQ:-10}"
+CKPTS_DIR="${CKPTS_DIR:-checkpoints/${PROJECT_NAME}/${EXPERIMENT_NAME}}"
+
+export SWE_AGENT_MAX_TURNS="${MAX_TURNS}"
+export SWE_AGENT_EVAL_TIMEOUT="${SWE_AGENT_EVAL_TIMEOUT:-600}"
+export OPENYUANRONG_SERVER_ADDRESS
+export OPENYUANRONG_TOKEN
+export OPENYUANRONG_TUNNEL_SSL_VERIFY
+
+echo "=== SWE-Agent Blackbox Megatron Async Training ==="
+echo "Model:       ${MODEL_PATH}"
+echo "Train data:  ${TRAIN_DATA}"
+echo "Val data:    ${VAL_DATA}"
+echo "Engine:      ${ENGINE} (gen_tp=${GEN_TP}, train_tp=${TRAIN_TP})"
+echo "Runner:      ${RUNNER}"
+echo "Batch:       n=${N}, mini_bsz=${PPO_MINI_BATCH_SIZE}"
+echo "Sequence:    prompt=${PROMPT_LENGTH}, response=${RESPONSE_LENGTH}"
+echo "Nodes:       train=${NNODES_TRAIN}, rollout=${NNODES_ROLLOUT}"
+echo "==================================================="
+
+# ── Compute derived parameters ───────────────────────────────────────────
+ACTOR_PPO_MAX_TOKEN_LEN=$(( (PROMPT_LENGTH + RESPONSE_LENGTH) / TRAIN_CP ))
+INFER_PPO_MAX_TOKEN_LEN=$(( (PROMPT_LENGTH + RESPONSE_LENGTH) / TRAIN_CP ))
+
+RUNTIME_ENV_ARGS=()
+if [ -n "${RUNTIME_ENV}" ]; then
+    RUNTIME_ENV_ARGS=(--runtime-env "${RUNTIME_ENV}")
+fi
+
+# ── Ensure Ray is running ────────────────────────────────────────────────
+TOTAL_GPUS=$(( (NNODES_TRAIN + NNODES_ROLLOUT) * NGPUS_PER_NODE ))
+if ! ray status &>/dev/null; then
+    echo "Starting Ray cluster (${TOTAL_GPUS} GPUs)..."
+    ray start --head --num-gpus="${TOTAL_GPUS}" --disable-usage-stats
+else
+    echo "Ray cluster already running."
+fi
+
+# ── Launch ────────────────────────────────────────────────────────────────
+WORKING_DIR="${WORKING_DIR:-$(pwd)}"
+
+ray job submit --no-wait --working-dir="${WORKING_DIR}" "${RUNTIME_ENV_ARGS[@]}" \
+    -- python3 -m verl.experimental.fully_async_policy.fully_async_main \
+    --config-name=swe_agent_blackbox_megatron_async \
+    --config-path="$(pwd)/examples/swe_agent_blackbox/config" \
+    hydra.searchpath=[pkg://verl.trainer.config] \
+    actor_rollout_ref.model.path="${MODEL_PATH}" \
+    data.train_files="['${TRAIN_DATA}']" \
+    data.val_files="['${VAL_DATA}']" \
+    data.max_prompt_length=${PROMPT_LENGTH} \
+    data.max_response_length=${RESPONSE_LENGTH} \
+    actor_rollout_ref.rollout.n=${N} \
+    actor_rollout_ref.rollout.name=${ENGINE} \
+    actor_rollout_ref.rollout.prompt_length=${PROMPT_LENGTH} \
+    actor_rollout_ref.rollout.response_length=${RESPONSE_LENGTH} \
+    actor_rollout_ref.rollout.max_model_len=${MAX_MODEL_LEN} \
+    actor_rollout_ref.rollout.max_num_batched_tokens=${MAX_MODEL_LEN} \
+    actor_rollout_ref.rollout.temperature=${TEMPERATURE} \
+    actor_rollout_ref.rollout.tensor_model_parallel_size=${GEN_TP} \
+    actor_rollout_ref.rollout.gpu_memory_utilization=${ROLLOUT_GPU_MEM_UTIL} \
+    actor_rollout_ref.rollout.multi_turn.max_assistant_turns=${MAX_TURNS} \
+    actor_rollout_ref.rollout.custom.agent_framework.completion_timeout_seconds=${COMPLETION_TIMEOUT} \
+    actor_rollout_ref.rollout.custom.agent_framework.agent_runner_kwargs.agent_config_path="${AGENT_CONFIG_PATH}" \
+    "${RUNNER_ARGS[@]}" \
+    actor_rollout_ref.actor.clip_ratio_low=${CLIP_RATIO_LOW} \
+    actor_rollout_ref.actor.clip_ratio_high=${CLIP_RATIO_HIGH} \
+    actor_rollout_ref.actor.ppo_mini_batch_size=${PPO_MINI_BATCH_SIZE} \
+    actor_rollout_ref.actor.ppo_max_token_len_per_gpu=${ACTOR_PPO_MAX_TOKEN_LEN} \
+    actor_rollout_ref.actor.optim.lr=${ACTOR_LR} \
+    actor_rollout_ref.actor.optim.lr_decay_steps=${TOTAL_ROLLOUT_STEPS} \
+    +actor_rollout_ref.actor.optim.override_optimizer_config.optimizer_offload_fraction=${OPTIMIZER_OFFLOAD_FRACTION} \
+    +actor_rollout_ref.actor.optim.override_optimizer_config.overlap_cpu_optimizer_d2h_h2d=True \
+    +actor_rollout_ref.actor.optim.override_optimizer_config.use_precision_aware_optimizer=True \
+    +actor_rollout_ref.actor.optim.override_optimizer_config.optimizer_cpu_offload=True \
+    actor_rollout_ref.actor.megatron.param_offload=${OFFLOAD} \
+    actor_rollout_ref.actor.megatron.grad_offload=${OFFLOAD} \
+    actor_rollout_ref.actor.megatron.optimizer_offload=${OFFLOAD} \
+    actor_rollout_ref.actor.megatron.tensor_model_parallel_size=${TRAIN_TP} \
+    actor_rollout_ref.actor.megatron.pipeline_model_parallel_size=${TRAIN_PP} \
+    actor_rollout_ref.actor.megatron.context_parallel_size=${TRAIN_CP} \
+    actor_rollout_ref.actor.megatron.use_mbridge=${USE_MBRIDGE} \
+    actor_rollout_ref.ref.megatron.param_offload=${OFFLOAD} \
+    actor_rollout_ref.ref.megatron.tensor_model_parallel_size=${TRAIN_TP} \
+    actor_rollout_ref.ref.megatron.pipeline_model_parallel_size=${TRAIN_PP} \
+    actor_rollout_ref.ref.megatron.context_parallel_size=${TRAIN_CP} \
+    actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=1 \
+    actor_rollout_ref.rollout.log_prob_max_token_len_per_gpu=${INFER_PPO_MAX_TOKEN_LEN} \
+    actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=1 \
+    actor_rollout_ref.ref.log_prob_max_token_len_per_gpu=${INFER_PPO_MAX_TOKEN_LEN} \
+    trainer.project_name="${PROJECT_NAME}" \
+    trainer.experiment_name="${EXPERIMENT_NAME}" \
+    trainer.save_freq=${SAVE_FREQ} \
+    trainer.test_freq=${TEST_FREQ} \
+    trainer.default_local_dir="${CKPTS_DIR}" \
+    trainer.nnodes=${NNODES_TRAIN} \
+    trainer.n_gpus_per_node=${NGPUS_PER_NODE} \
+    rollout.nnodes=${NNODES_ROLLOUT} \
+    rollout.n_gpus_per_node=${NGPUS_PER_NODE} \
+    rollout.total_rollout_steps=${TOTAL_ROLLOUT_STEPS} \
+    async_training.staleness_threshold=${STALENESS_THRESHOLD} \
+    async_training.trigger_parameter_sync_step=${TRIGGER_SYNC_STEP} \
+    async_training.partial_rollout=${PARTIAL_ROLLOUT} \
+    "$@"
diff --git a/examples/blackbox_recipes/scripts/run_train_megatron_sync.sh b/examples/blackbox_recipes/scripts/run_train_megatron_sync.sh
new file mode 100755
index 00000000..1a0c19d3
--- /dev/null
+++ b/examples/blackbox_recipes/scripts/run_train_megatron_sync.sh
@@ -0,0 +1,138 @@
+#!/usr/bin/env bash
+# Megatron sync training for the blackbox SWE-agent recipe.
+#
+# Uses main_ppo_sync + Megatron backend with the same blackbox agent infrastructure
+# (AgentFrameworkRolloutAdapter, subprocess_runner, SWEAgentFramework).
+#
+# Usage:
+#   bash examples/swe_agent_blackbox/scripts/run_train_megatron_sync.sh
+#
+# All configurable via environment variables (see defaults below).
+
+set -euo pipefail
+
+# ── Model & data ─────────────────────────────────────────────────────────
+MODEL_PATH="${MODEL_PATH:-$HOME/models/Qwen3.5-9B}"
+TRAIN_DATA="${TRAIN_DATA:-$HOME/data/swe_agent/swe_rebench_filtered.parquet}"
+VAL_DATA="${VAL_DATA:-$HOME/data/swe_agent/swe_bench_verified.parquet}"
+
+# ── Hardware ─────────────────────────────────────────────────────────────
+NNODES="${NNODES:-1}"
+NGPUS_PER_NODE="${NGPUS_PER_NODE:-8}"
+
+# ── Training parameters ─────────────────────────────────────────────────
+TRAIN_BATCH_SIZE="${TRAIN_BATCH_SIZE:-128}"
+PROMPT_LENGTH="${PROMPT_LENGTH:-4096}"
+RESPONSE_LENGTH="${RESPONSE_LENGTH:-131072}"
+ACTOR_LR="${ACTOR_LR:-1e-6}"
+TOTAL_EPOCHS="${TOTAL_EPOCHS:-10}"
+SAVE_FREQ="${SAVE_FREQ:-10}"
+TEST_FREQ="${TEST_FREQ:-10}"
+PPO_MINI_BATCH_SIZE="${PPO_MINI_BATCH_SIZE:-16}"
+
+# ── Rollout parameters ──────────────────────────────────────────────────
+ENGINE="${ENGINE:-vllm}"
+TP="${TP:-4}"
+ROLLOUT_GPU_MEM_UTIL="${ROLLOUT_GPU_MEM_UTIL:-0.7}"
+N="${N:-8}"
+TEMPERATURE="${TEMPERATURE:-1.0}"
+
+# ── Megatron parallelism ────────────────────────────────────────────────
+TRAIN_TP="${TRAIN_TP:-8}"
+TRAIN_PP="${TRAIN_PP:-1}"
+TRAIN_CP="${TRAIN_CP:-1}"
+OFFLOAD="${OFFLOAD:-true}"
+USE_MBRIDGE="${USE_MBRIDGE:-true}"
+
+# ── Agent parameters ─────────────────────────────────────────────────────
+RUNNER="${RUNNER:-mini_swe}"
+MAX_TURNS="${MAX_TURNS:-100}"
+AGENT_CONFIG_PATH="${AGENT_CONFIG_PATH:-examples/swe_agent_blackbox/config/agent_config.yaml}"
+COMPLETION_TIMEOUT="${COMPLETION_TIMEOUT:-600}"
+if [[ "${RUNNER}" == "claude_code" ]]; then
+    AGENT_RUNNER_FQN="examples.swe_agent_blackbox.claude_code_runner.claude_code_runner"
+    SWE_AGENT_TOOL_IMAGE="${SWE_AGENT_TOOL_IMAGE:-claude-code-tool:latest}"
+elif [[ "${RUNNER}" == "mini_swe" ]]; then
+    AGENT_RUNNER_FQN="examples.swe_agent_blackbox.mini_swe_agent_runner.mini_swe_agent_runner"
+    SWE_AGENT_TOOL_IMAGE="${SWE_AGENT_TOOL_IMAGE:-swr.cn-east-3.myhuaweicloud.com/openyuanrong/mini-swe-agent-tool:latest}"
+elif [[ "${RUNNER}" == "uniagent" ]]; then
+    AGENT_RUNNER_FQN="examples.swe_agent_blackbox.agent_runner.swe_agent_runner"
+    SWE_AGENT_TOOL_IMAGE=""
+else
+    echo "Unknown RUNNER=${RUNNER}; expected mini_swe, claude_code, or uniagent" >&2
+    exit 1
+fi
+SWE_AGENT_RUN_TIMEOUT="${SWE_AGENT_RUN_TIMEOUT:-7200}"
+RUNNER_ARGS=(
+    "actor_rollout_ref.rollout.custom.agent_framework.agent_runner_fqn=${AGENT_RUNNER_FQN}"
+)
+if [[ "${RUNNER}" != "uniagent" ]]; then
+    RUNNER_ARGS+=(
+        "+actor_rollout_ref.rollout.custom.agent_framework.agent_runner_kwargs.tool_image=${SWE_AGENT_TOOL_IMAGE}"
+        "+actor_rollout_ref.rollout.custom.agent_framework.agent_runner_kwargs.run_timeout=${SWE_AGENT_RUN_TIMEOUT}"
+    )
+fi
+
+# ── Logging ──────────────────────────────────────────────────────────────
+PROJECT_NAME="${PROJECT_NAME:-swe_agent_blackbox}"
+EXPERIMENT_NAME="${EXPERIMENT_NAME:-swe_agent_$(date +%Y%m%d_%H%M)}"
+VERL_LOGGING_LEVEL="${VERL_LOGGING_LEVEL:-INFO}"
+
+export SWE_AGENT_MAX_TURNS="${MAX_TURNS}"
+export SWE_AGENT_EVAL_TIMEOUT="${SWE_AGENT_EVAL_TIMEOUT:-600}"
+export VERL_LOGGING_LEVEL
+
+# ── Environment for NCCL ────────────────────────────────────────────────
+export NCCL_P2P_DISABLE="${NCCL_P2P_DISABLE:-1}"
+export NCCL_SHM_DISABLE="${NCCL_SHM_DISABLE:-1}"
+
+echo "=== SWE-Agent Blackbox Megatron Sync Training ==="
+echo "Model:       ${MODEL_PATH}"
+echo "Train data:  ${TRAIN_DATA}"
+echo "Val data:    ${VAL_DATA}"
+echo "Engine:      ${ENGINE} (gen_tp=${TP}, train_tp=${TRAIN_TP})"
+echo "Runner:      ${RUNNER}"
+echo "Batch size:  ${TRAIN_BATCH_SIZE}, N=${N}"
+echo "Sequence:    prompt=${PROMPT_LENGTH}, response=${RESPONSE_LENGTH}"
+echo "==============================================="
+
+python3 -m verl.trainer.main_ppo_sync \
+    --config-name=swe_agent_blackbox_megatron_sync \
+    --config-path="$(pwd)/examples/swe_agent_blackbox/config" \
+    hydra.searchpath=[pkg://verl.trainer.config] \
+    actor_rollout_ref.model.path="${MODEL_PATH}" \
+    data.train_files="['${TRAIN_DATA}']" \
+    data.val_files="['${VAL_DATA}']" \
+    data.train_batch_size=${TRAIN_BATCH_SIZE} \
+    data.max_prompt_length=${PROMPT_LENGTH} \
+    data.max_response_length=${RESPONSE_LENGTH} \
+    actor_rollout_ref.rollout.name=${ENGINE} \
+    actor_rollout_ref.rollout.tensor_model_parallel_size=${TP} \
+    actor_rollout_ref.rollout.gpu_memory_utilization=${ROLLOUT_GPU_MEM_UTIL} \
+    actor_rollout_ref.rollout.n=${N} \
+    actor_rollout_ref.rollout.temperature=${TEMPERATURE} \
+    actor_rollout_ref.rollout.prompt_length=${PROMPT_LENGTH} \
+    actor_rollout_ref.rollout.response_length=${RESPONSE_LENGTH} \
+    actor_rollout_ref.rollout.max_model_len=$((PROMPT_LENGTH + RESPONSE_LENGTH)) \
+    actor_rollout_ref.rollout.multi_turn.max_assistant_turns=${MAX_TURNS} \
+    actor_rollout_ref.actor.optim.lr=${ACTOR_LR} \
+    actor_rollout_ref.actor.ppo_mini_batch_size=${PPO_MINI_BATCH_SIZE} \
+    actor_rollout_ref.actor.megatron.tensor_model_parallel_size=${TRAIN_TP} \
+    actor_rollout_ref.actor.megatron.pipeline_model_parallel_size=${TRAIN_PP} \
+    actor_rollout_ref.actor.megatron.context_parallel_size=${TRAIN_CP} \
+    actor_rollout_ref.actor.megatron.param_offload=${OFFLOAD} \
+    actor_rollout_ref.actor.megatron.grad_offload=${OFFLOAD} \
+    actor_rollout_ref.actor.megatron.use_mbridge=${USE_MBRIDGE} \
+    actor_rollout_ref.rollout.nnodes=${NNODES} \
+    actor_rollout_ref.rollout.n_gpus_per_node=${NGPUS_PER_NODE} \
+    trainer.nnodes=${NNODES} \
+    trainer.n_gpus_per_node=${NGPUS_PER_NODE} \
+    trainer.total_epochs=${TOTAL_EPOCHS} \
+    trainer.save_freq=${SAVE_FREQ} \
+    trainer.test_freq=${TEST_FREQ} \
+    trainer.project_name=${PROJECT_NAME} \
+    trainer.experiment_name=${EXPERIMENT_NAME} \
+    actor_rollout_ref.rollout.custom.agent_framework.agent_runner_kwargs.agent_config_path="${AGENT_CONFIG_PATH}" \
+    actor_rollout_ref.rollout.custom.agent_framework.completion_timeout_seconds=${COMPLETION_TIMEOUT} \
+    "${RUNNER_ARGS[@]}" \
+    "$@"

From a28540a4452be30d2d5284b2c71c7c0b3f3354e9 Mon Sep 17 00:00:00 2001
From: sheng <sheng.gao.dev@gmail.com>
Date: Fri, 26 Jun 2026 14:18:43 +0800
Subject: [PATCH 2/3] feat: add openyuanrong sandbox

---
 examples/blackbox_recipes/sandbox/sandbox.py | 180 ++++++++++++++++++-
 1 file changed, 179 insertions(+), 1 deletion(-)

diff --git a/examples/blackbox_recipes/sandbox/sandbox.py b/examples/blackbox_recipes/sandbox/sandbox.py
index fb21ac94..e6e46a8d 100644
--- a/examples/blackbox_recipes/sandbox/sandbox.py
+++ b/examples/blackbox_recipes/sandbox/sandbox.py
@@ -5,6 +5,184 @@
 inside the sandbox can reach the gateway via ``http://127.0.0.1:<proxy_port>``.
 """
 
-#TODO
+from __future__ import annotations
 
+import asyncio
+import logging
+import os
+from dataclasses import dataclass
+from typing import Any
+from urllib.parse import urlparse
 
+
+@dataclass
+class CommandResult:
+    """Result of a command executed inside a sandbox."""
+
+    stdout: str
+    stderr: str
+    exit_code: int
+
+logger = logging.getLogger(__name__)
+
+DEFAULT_PROXY_PORT = 38197
+
+
+def _configure_akernel_env() -> None:
+    """Map OPENYUANRONG_* env vars to AKERNEL_* before importing akernel_sdk."""
+    server = os.getenv("OPENYUANRONG_SERVER_ADDRESS")
+    token = os.getenv("OPENYUANRONG_TOKEN")
+    tunnel_ssl_verify = os.getenv("OPENYUANRONG_TUNNEL_SSL_VERIFY", "0")
+    if not server or not token:
+        raise ValueError(
+            "OPENYUANRONG_SERVER_ADDRESS and OPENYUANRONG_TOKEN "
+            "environment variables must be set for YR sandbox"
+        )
+    os.environ["AKERNEL_SERVER_ADDRESS"] = server
+    os.environ["AKERNEL_TOKEN"] = token
+    os.environ["TUNNEL_SSL_VERIFY"] = tunnel_ssl_verify
+
+
+def extract_upstream(gateway_url: str) -> str:
+    """Extract host:port from a gateway URL for upstream tunnel config.
+
+    Example: "http://8.92.9.155:40169/sessions/abc/v1" -> "8.92.9.155:40169"
+    """
+    parsed = urlparse(gateway_url)
+    return f"{parsed.hostname}:{parsed.port}"
+
+
+def rewrite_gateway_url(
+    gateway_url: str,
+    proxy_port: int = DEFAULT_PROXY_PORT,
+    *,
+    strip_v1: bool = False,
+) -> str:
+    """Rewrite gateway URL to use the sandbox-internal tunnel.
+
+    Replaces host:port with 127.0.0.1:<proxy_port>, keeps path intact.
+
+    Example:
+        "http://8.92.9.155:40169/sessions/abc/v1"
+        -> "http://127.0.0.1:8766/sessions/abc/v1"
+    """
+    parsed = urlparse(gateway_url)
+    path = parsed.path.removesuffix("/v1") if strip_v1 else parsed.path
+    return f"http://127.0.0.1:{proxy_port}{path}"
+
+
+class YRSandbox:
+    """Command execution via OpenYuanRong (AKernel) remote sandbox."""
+
+    def __init__(self, sandbox: Any) -> None:
+        self._sandbox = sandbox
+
+    @property
+    def sandbox_id(self) -> str:
+        return getattr(self._sandbox, "sandbox_id", "unknown")
+
+
+    @classmethod
+    async def create(
+        cls,
+        *,
+        image: str,
+        sidecar_image: str,
+        upstream: str = "",
+        proxy_port: int = DEFAULT_PROXY_PORT,
+        env: dict[str, str] | None = None,
+        cpu: int = 1000,
+        memory: int = 2048,
+        cpu_limit: int = 4000,
+        mem_limit: int = 8192,
+        idle_timeout: int = 7200,
+        sidecar_target: str = "/opt/mini-swe-agent",
+        max_retries: int = 5,
+        **sandbox_kwargs: Any,
+    ) -> "YRSandbox":
+        """Create an OpenYuanRong sandbox with sidecar tool mounted.
+
+        The sidecar image is mounted at ``sidecar_target`` inside the
+        sandbox via ``akernel_sdk.Mount``.
+
+        If ``upstream`` is provided, a tunnel is set up so the sandbox can
+        reach the local gateway via ``http://127.0.0.1:<proxy_port>``.
+        """
+        _configure_akernel_env()
+        from akernel_sdk import Mount, Sandbox
+
+        sb_kwargs: dict[str, Any] = {
+            "image": image,
+            "cpu": cpu,
+            "memory": memory,
+            "cpu_limit": cpu_limit,
+            "mem_limit": mem_limit,
+            "idle_timeout": idle_timeout,
+            "mounts": [
+                Mount(target=sidecar_target, image_url=sidecar_image),
+            ],
+        }
+        if upstream:
+            sb_kwargs["upstream"] = upstream
+            sb_kwargs["proxy_port"] = proxy_port
+        if env:
+            sb_kwargs["env"] = env
+        sb_kwargs.update(sandbox_kwargs)
+
+        logger.info(
+            "Creating YR sandbox (image=%s, cpu=%d, memory=%d, sidecar=%s:%s, upstream=%s)",
+            image, cpu, memory, sidecar_image, sidecar_target, upstream or "none",
+        )
+        last_error: Exception | None = None
+        for retry in range(max_retries):
+            sandbox = None
+            try:
+                sandbox = await asyncio.to_thread(lambda: Sandbox(**sb_kwargs))
+                logger.info("YR sandbox created: %s", getattr(sandbox, "sandbox_id", "?"))
+                return cls(sandbox=sandbox)
+            except Exception as exc:
+                last_error = exc
+                sandbox_id = getattr(sandbox, "sandbox_id", None)
+                logger.critical(
+                    "Failed to create YR sandbox (sandbox_id=%s): %s",
+                    sandbox_id or "n/a", exc,
+                )
+                if sandbox is not None:
+                    try:
+                        await asyncio.to_thread(sandbox.kill)
+                    except Exception:
+                        pass
+                if retry < max_retries - 1:
+                    sleep_time = min(30, 2 ** retry)
+                    logger.info("Retrying YR sandbox creation in %d seconds...", sleep_time)
+                    await asyncio.sleep(sleep_time)
+
+        raise RuntimeError(f"Failed to create YR sandbox after {max_retries} retries") from last_error
+
+    async def run(self, cmd: str, *, timeout: int = 600) -> CommandResult:
+        """Execute *cmd* inside the OpenYuanRong sandbox via ``sandbox.commands.run``."""
+        try:
+            result = await asyncio.to_thread(
+                self._sandbox.commands.run, cmd, timeout=timeout,
+            )
+            return CommandResult(
+                stdout=getattr(result, "stdout", ""),
+                stderr=getattr(result, "stderr", ""),
+                exit_code=getattr(result, "exit_code", -1),
+            )
+        except Exception as e:
+            return CommandResult(stdout="", stderr=str(e), exit_code=-1)
+
+    async def cleanup(self) -> None:
+        """Kill the OpenYuanRong sandbox if still running."""
+        if self._sandbox is not None:
+            sandbox_id = getattr(self._sandbox, "sandbox_id", "?")
+            try:
+                if self._sandbox.is_running():
+                    await asyncio.to_thread(self._sandbox.kill)
+                    logger.info("YR sandbox %s killed", sandbox_id)
+                else:
+                    logger.info("YR sandbox %s already stopped", sandbox_id)
+            except Exception as e:
+                logger.warning("Failed to kill YR sandbox %s: %s", sandbox_id, e)
+            self._sandbox = None

From 6de203b8a6a1fe1b07137c6dce8c81e73508267e Mon Sep 17 00:00:00 2001
From: zhaizhiqiang <584508161@qq.com>
Date: Mon, 29 Jun 2026 03:30:36 +0000
Subject: [PATCH 3/3] 9235bf603757c85c41deef22fe93490a1b5f0921

---
 .../claude_code/Dockerfile.claude-code-tool   |  21 -
 .../claude_code/claude_code_runner.py         | 232 ---------
 .../claude_code/config/claude_code.yaml       |   1 -
 .../blackbox_recipes/mini_swe_agent/README.md | 248 ++-------
 .../mini_swe_agent/build_tool.sh              |  56 ++
 .../mini_swe_agent/config/agent_config.yaml   |  36 --
 .../config/agent_config_openyuanrong.yaml     |  37 --
 .../mini_swe_agent/config/parallel_infer.yaml |  31 --
 .../config/swe_agent_blackbox.yaml            | 123 -----
 .../swe_agent_blackbox_megatron_sync.yaml     | 129 -----
 ...ml => swe_agent_blackbox_megatron_v1.yaml} |  70 +--
 .../mini_swe_agent/dataset.py                 |   1 -
 .../mini_swe_agent/framework.py               | 105 ----
 .../mini_swe_agent/mini_swe_agent_runner.py   |  69 ++-
 .../mini_swe_agent/parallel_infer.py          | 486 ++++++++----------
 .../blackbox_recipes/mini_swe_agent/reward.py |   2 +-
 .../mini_swe_agent/run_agent.py               |  34 +-
 .../mini_swe_agent/run_infer.sh               |  80 +++
 .../mini_swe_agent/run_train.sh               | 300 +++++++++++
 .../mini_swe_agent/subprocess_runner.py       |  61 ---
 .../{sandbox/sandbox.py => sandbox_client.py} |  85 +--
 .../blackbox_recipes/scripts/build_tool.sh    |  75 ---
 .../blackbox_recipes/scripts/run_infer.sh     |  66 ---
 .../blackbox_recipes/scripts/run_train.sh     | 122 -----
 .../scripts/run_train_megatron_async.sh       | 199 -------
 .../scripts/run_train_megatron_sync.sh        | 138 -----
 .../r2e_gym_subset_filtered.py                |   7 +
 .../data_preprocess/swe_bench_verified.py     |   9 +
 examples/data_preprocess/swe_rebench.py       |   8 +
 uni_agent/gateway/session/codec.py            |   4 +-
 uni_agent/interaction/model.py                |   4 +-
 verl                                          |   2 +-
 32 files changed, 881 insertions(+), 1960 deletions(-)
 delete mode 100644 examples/blackbox_recipes/claude_code/Dockerfile.claude-code-tool
 delete mode 100644 examples/blackbox_recipes/claude_code/claude_code_runner.py
 delete mode 100644 examples/blackbox_recipes/claude_code/config/claude_code.yaml
 create mode 100755 examples/blackbox_recipes/mini_swe_agent/build_tool.sh
 delete mode 100644 examples/blackbox_recipes/mini_swe_agent/config/agent_config.yaml
 delete mode 100644 examples/blackbox_recipes/mini_swe_agent/config/agent_config_openyuanrong.yaml
 delete mode 100644 examples/blackbox_recipes/mini_swe_agent/config/parallel_infer.yaml
 delete mode 100644 examples/blackbox_recipes/mini_swe_agent/config/swe_agent_blackbox.yaml
 delete mode 100644 examples/blackbox_recipes/mini_swe_agent/config/swe_agent_blackbox_megatron_sync.yaml
 rename examples/blackbox_recipes/mini_swe_agent/config/{swe_agent_blackbox_megatron_async.yaml => swe_agent_blackbox_megatron_v1.yaml} (62%)
 delete mode 100644 examples/blackbox_recipes/mini_swe_agent/framework.py
 create mode 100755 examples/blackbox_recipes/mini_swe_agent/run_infer.sh
 create mode 100755 examples/blackbox_recipes/mini_swe_agent/run_train.sh
 delete mode 100644 examples/blackbox_recipes/mini_swe_agent/subprocess_runner.py
 rename examples/blackbox_recipes/{sandbox/sandbox.py => sandbox_client.py} (66%)
 delete mode 100755 examples/blackbox_recipes/scripts/build_tool.sh
 delete mode 100755 examples/blackbox_recipes/scripts/run_infer.sh
 delete mode 100755 examples/blackbox_recipes/scripts/run_train.sh
 delete mode 100755 examples/blackbox_recipes/scripts/run_train_megatron_async.sh
 delete mode 100755 examples/blackbox_recipes/scripts/run_train_megatron_sync.sh

diff --git a/examples/blackbox_recipes/claude_code/Dockerfile.claude-code-tool b/examples/blackbox_recipes/claude_code/Dockerfile.claude-code-tool
deleted file mode 100644
index 3d12af4c..00000000
--- a/examples/blackbox_recipes/claude_code/Dockerfile.claude-code-tool
+++ /dev/null
@@ -1,21 +0,0 @@
-# Claude Code sidecar tool image.
-#
-# Mounted at /opt/claude-code inside the SWE-bench sandbox.
-
-FROM node:20-bookworm-slim AS builder
-
-ARG TOOL_VERSION="latest"
-ARG NPM_REGISTRY=""
-
-ENV DISABLE_AUTOUPDATER=1 \
-    IS_SANDBOX=1 \
-    npm_config_audit=false \
-    npm_config_fund=false \
-    npm_config_update_notifier=false
-
-RUN if [ -n "${NPM_REGISTRY}" ]; then npm config set registry "${NPM_REGISTRY}"; fi \
-    && npm install -g --prefix /opt/claude-code "@anthropic-ai/claude-code@${TOOL_VERSION}" \
-    && /opt/claude-code/bin/claude --version
-
-FROM scratch
-COPY --from=builder /opt/claude-code /
diff --git a/examples/blackbox_recipes/claude_code/claude_code_runner.py b/examples/blackbox_recipes/claude_code/claude_code_runner.py
deleted file mode 100644
index bee41aaf..00000000
--- a/examples/blackbox_recipes/claude_code/claude_code_runner.py
+++ /dev/null
@@ -1,232 +0,0 @@
-"""Claude Code runner for the blackbox SWE-agent recipe."""
-
-from __future__ import annotations
-
-import json
-import logging
-import os
-import shlex
-import time
-
-from uni_agent.trainer.framework.types import SessionHandle, SessionRuntime
-
-logger = logging.getLogger(__name__)
-
-DEFAULT_TOOL_IMAGE = "claude-code-tool:latest"
-TOOL_TARGET = "/opt/claude-code"
-
-
-def extract_task(raw_prompt) -> str:
-    if isinstance(raw_prompt, str):
-        return raw_prompt
-    return next(
-        (m["content"] for m in raw_prompt if isinstance(m, dict) and m.get("role") == "user"),
-        str(raw_prompt),
-    )
-
-
-def _extract_issue_text(task: str) -> str:
-    start = task.find("<issue_description>")
-    end = task.find("</issue_description>")
-    if start >= 0 and end > start:
-        return task[start + len("<issue_description>"):end].strip()
-    marker = "\nFollow these steps to resolve the issue:"
-    if marker in task:
-        return task.split(marker, 1)[0].strip()
-    return task.strip()
-
-
-def _decode_metadata_list(value) -> list[str]:
-    if not value:
-        return []
-    if isinstance(value, list):
-        return [str(item) for item in value]
-    if isinstance(value, str):
-        try:
-            parsed = json.loads(value)
-        except json.JSONDecodeError:
-            return [value]
-        if isinstance(parsed, list):
-            return [str(item) for item in parsed]
-    return [str(value)]
-
-
-def build_claude_task(raw_prompt, tools_kwargs: dict | None = None) -> str:
-    tools_kwargs = tools_kwargs or {}
-    task = extract_task(raw_prompt)
-    metadata = ((tools_kwargs.get("reward") or {}).get("metadata") or {})
-    issue = metadata.get("problem_statement") or _extract_issue_text(task)
-    tests = _decode_metadata_list(metadata.get("FAIL_TO_PASS"))
-    if not tests:
-        tests = _decode_metadata_list(metadata.get("PASS_TO_PASS"))[:3]
-    tests_block = "\n".join(f"- {test}" for test in tests) if tests else "- Run the closest relevant tests you identify."
-
-    return (
-        "You are fixing a SWE-bench task in /testbed.\n\n"
-        "Issue:\n"
-        f"{issue}\n\n"
-        "Rules:\n"
-        "- Edit source files only. Do not modify tests.\n"
-        "- The development environment is already installed; do not install packages unless a test command proves it is necessary.\n"
-        "- There is no submit tool in this environment. Do not try to submit.\n"
-        "- Do not create extra edge-case test files after the relevant tests pass.\n"
-        "- Do not run `pytest --collect-only`, `git log`, or any other command that does not directly validate the fix.\n"
-        "- Do not analyze unrelated `is_separable` behavior.\n"
-        "- Do not run additional ad-hoc verification after the listed relevant pytest command passes.\n"
-        "- Do not commit.\n"
-        "- After the minimal fix is applied and a relevant pytest command passes, print a one-line summary and exit immediately.\n\n"
-        "Relevant tests to run after the fix:\n"
-        f"{tests_block}\n"
-    )
-
-
-def build_claude_command(
-    *,
-    task: str,
-    base_url: str,
-    max_turns: int,
-    model: str = "default",
-    permission_mode: str = "bypassPermissions",
-    conda_env: str | None = "testbed",
-    disable_web_tools: bool = True,
-    disable_slash_commands: bool = True,
-) -> str:
-    env = {
-        "ANTHROPIC_BASE_URL": base_url,
-        "ANTHROPIC_API_KEY": "not-needed",
-        "ANTHROPIC_MODEL": model,
-        "ANTHROPIC_DEFAULT_HAIKU_MODEL": model,
-        "ANTHROPIC_DEFAULT_SONNET_MODEL": model,
-        "ANTHROPIC_DEFAULT_OPUS_MODEL": model,
-        "ANTHROPIC_SMALL_FAST_MODEL": model,
-        "CLAUDE_CODE_DISABLE_BACKGROUND_TASKS": "1",
-        "CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC": "1",
-        "CLAUDE_CODE_FORK_SUBAGENT": "0",
-        "CLAUDE_CODE_SUBAGENT_MODEL": model,
-        "DISABLE_AUTOUPDATER": "1",
-        "IS_SANDBOX": "1",
-    }
-    env_assignments = [f"{key}={shlex.quote(value)}" for key, value in env.items()]
-    if conda_env:
-        conda_prefix = f"/opt/miniconda3/envs/{conda_env}"
-        env_assignments.extend(
-            [
-                f"CONDA_DEFAULT_ENV={shlex.quote(conda_env)}",
-                f"CONDA_PREFIX={shlex.quote(conda_prefix)}",
-                f"PATH={shlex.quote(conda_prefix + '/bin')}:/opt/miniconda3/bin:$PATH",
-            ]
-        )
-    env_prefix = " ".join(env_assignments)
-    argv = [
-        "/opt/claude-code/bin/claude",
-        "-p",
-        task,
-        "--model",
-        model,
-        "--max-turns",
-        str(max_turns),
-        "--permission-mode",
-        permission_mode,
-    ]
-    if disable_slash_commands:
-        argv.append("--disable-slash-commands")
-    if disable_web_tools:
-        argv.extend(["--disallowedTools", "Agent", "Task", "WebFetch", "WebSearch"])
-    return (
-        "unset HTTP_PROXY HTTPS_PROXY http_proxy https_proxy NO_PROXY no_proxy; "
-        "cd /testbed; "
-        f"{env_prefix} "
-        + shlex.join(argv)
-    )
-
-
-async def _create_claude_sandbox(
-    *,
-    image: str,
-    sidecar_image: str,
-    gateway_url: str,
-):
-    from examples.swe_agent_blackbox.sandbox import YRSandbox, extract_upstream
-
-    upstream = extract_upstream(gateway_url) if gateway_url else ""
-    return await YRSandbox.create(
-        image=image,
-        sidecar_image=sidecar_image,
-        sidecar_target=TOOL_TARGET,
-        upstream=upstream,
-    )
-
-
-async def claude_code_runner(
-    *,
-    raw_prompt,
-    session: SessionHandle,
-    sample_index: int,
-    session_runtime: SessionRuntime,
-    tools_kwargs: dict | None = None,
-    tool_image: str = DEFAULT_TOOL_IMAGE,
-    run_timeout: int = 7200,
-    **kwargs,
-) -> None:
-    from examples.swe_agent_blackbox.dataset import extract_image
-    from examples.swe_agent_blackbox.mini_swe_agent_runner import SandboxEnvForReward
-    from examples.swe_agent_blackbox.reward import build_reward_context, evaluate_in_env
-
-    tools_kwargs = tools_kwargs or {}
-    task = build_claude_task(raw_prompt, tools_kwargs)
-    env_config = tools_kwargs.get("env", {})
-    image = extract_image(env_config)
-    if not image:
-        raise ValueError(f"No Docker image found in tools_kwargs.env for sample {sample_index}")
-
-    gateway_url = session.base_url
-    if not gateway_url:
-        raise ValueError(f"gateway_url is empty for sample {sample_index}")
-
-    sandbox = await _create_claude_sandbox(
-        image=image,
-        sidecar_image=tool_image,
-        gateway_url=gateway_url,
-    )
-
-    try:
-        post_setup_cmd = env_config.get("post_setup_cmd", "")
-        if post_setup_cmd:
-            setup_result = await sandbox.run(post_setup_cmd, timeout=120)
-            if setup_result.exit_code != 0:
-                logger.warning("post_setup_cmd failed rc=%s: %.300s", setup_result.exit_code, setup_result.stdout + setup_result.stderr)
-
-        from examples.swe_agent_blackbox.sandbox import rewrite_gateway_url
-
-        claude_base_url = rewrite_gateway_url(gateway_url, strip_v1=True)
-        max_turns = int(os.environ.get("SWE_AGENT_MAX_TURNS", "100"))
-        agent_cmd = build_claude_command(
-            task=task,
-            base_url=claude_base_url,
-            max_turns=max_turns,
-        )
-
-        started_at = time.perf_counter()
-        result = await sandbox.run(agent_cmd, timeout=int(run_timeout))
-        elapsed = time.perf_counter() - started_at
-        logger.info("[sample %d] claude-code finished rc=%s elapsed=%.1fs", sample_index, result.exit_code, elapsed)
-        if result.exit_code != 0:
-            logger.warning(
-                "[sample %d] claude-code failed stdout_tail=%r stderr_tail=%r",
-                sample_index,
-                (result.stdout or "")[-4000:],
-                (result.stderr or "")[-4000:],
-            )
-
-        metadata, eval_timeout = build_reward_context(tools_kwargs)
-        score, eval_result = await evaluate_in_env(SandboxEnvForReward(sandbox), metadata, eval_timeout)
-        logger.info("[sample %d] reward done score=%s resolved=%s", sample_index, score, eval_result.get("resolved"))
-
-        reward_info = {
-            "reward_score": score,
-            "claude_code_exit_code": result.exit_code,
-            **eval_result,
-        }
-        await session_runtime.complete_session(session.session_id, reward_info=reward_info)
-    finally:
-        await sandbox.cleanup()
diff --git a/examples/blackbox_recipes/claude_code/config/claude_code.yaml b/examples/blackbox_recipes/claude_code/config/claude_code.yaml
deleted file mode 100644
index 503fa1da..00000000
--- a/examples/blackbox_recipes/claude_code/config/claude_code.yaml
+++ /dev/null
@@ -1 +0,0 @@
-#TODO
\ No newline at end of file
diff --git a/examples/blackbox_recipes/mini_swe_agent/README.md b/examples/blackbox_recipes/mini_swe_agent/README.md
index b32a637a..436d3c65 100644
--- a/examples/blackbox_recipes/mini_swe_agent/README.md
+++ b/examples/blackbox_recipes/mini_swe_agent/README.md
@@ -2,268 +2,118 @@
 
 ## Overview
 
-`mini_swe` and `claude_code` both run inside the SWE-bench sandbox through a
-sidecar tool image. The external runner creates the sandbox, mounts the selected
-tool image, starts the agent process, and evaluates the reward in the same
-sandbox.
-
-For `mini_swe`, the agent executes commands through `LocalEnvironment` (local
-bash) inside the sandbox and calls the LLM through the gateway URL passed in via
-stdin. For `claude_code`, the runner starts the Claude Code CLI from the sidecar
-image and points it at the same Anthropic-compatible gateway.
-
-The `mini_swe` tool image uses
+`mini-swe-agent` runs inside the SWE-bench sandbox through a sidecar tool image.
+The external runner creates the sandbox, mounts the tool image at
+`/opt/mini-swe-agent`, starts the agent process, and evaluates the reward in the
+same sandbox.
+
+The agent executes commands through `LocalEnvironment` (local bash) inside the
+sandbox and calls the LLM through the gateway URL passed in via stdin. The
+`mini_swe` tool image uses
 [python-build-standalone](https://github.com/astral-sh/python-build-standalone)
-to build an isolated Python environment. The Claude Code tool image uses a Node
-builder to install the Claude Code npm package. Both images use a minimal
+to build an isolated Python environment, then copies the result into a minimal
 `FROM scratch` final stage, so the sandbox base image does not need to provide
-Python, Node, or npm for the sidecar tool runtime.
+Python for the sidecar tool runtime.
+
+**This recipe is self-contained.** It shares only
+[`../sandbox_client.py`](../sandbox_client.py) with the claude-code recipe;
+everything else (`dataset.py`, `reward.py`, `run_agent.py`, `build_tool.sh`,
+`run_train.sh`, config) lives in this directory and does not depend on
+`claude_code/`.
 
 **Supported runners:**
 
 | runner | Description |
 |--------|-------------|
-| `uniagent` | Original SWE-agent runner |
 | `mini_swe` | mini-swe-agent sidecar runner |
-| `claude_code` | Claude Code sidecar runner; reward is returned through `complete_session(reward_info)` without writing a separate reward JSON file |
 
 **Supported sandbox types:**
 
 | Type | Description |
 |------|-------------|
-| OpenYuanRong (`"openyuanrong"`) | Uses `akernel_sdk.Mount` and `sandbox.commands.run()` |
-
-At runtime, the selected runner depends directly on its tool image. The tool
-image does not need to be extracted into a host directory ahead of time.
+| openyuanrong | Uses `akernel_sdk.Mount` and `sandbox.commands.run()` |
 
 ## Architecture
 
 ```text
-[Rollouter Host: mini_swe_agent_runner / claude_code_runner]
+[Rollouter Host: mini_swe_agent_runner]
   |
-  |-- _create_sandbox(image, sidecar_image)
-  |     `-- openyuanrong: Sandbox(mounts=[Mount(target="/opt/<tool>", ...)])
+  |-- SandboxClient.create(image, sidecar_image, sidecar_target="/opt/mini-swe-agent")
+  |     `-- akernel: Sandbox(mounts=[Mount(target="/opt/mini-swe-agent", ...)])
   |
   |-- sandbox.run("<tool entrypoint>")
   |     `-- [Inside Sandbox]
-  |           /opt/mini-swe-agent/bin/python3.12 or /opt/claude-code/bin/claude
+  |           /opt/mini-swe-agent/bin/python /opt/mini-swe-agent/bin/run_agent.py
   |           stdin <- task config JSON (task, gateway_url, agent)
   |           commands run inside the SWE-bench sandbox
-  |           stdout -> runner-specific execution result
+  |           stdout -> agent execution result JSON
   |
   |-- parse agent result
   |-- SandboxEnvForReward(sandbox) -> evaluate_in_env()
-  `-- session_runtime.complete_session(reward_info)
+  `-- POST session.reward_info_url
 ```
 
 ## Prerequisites
 
-1. **OpenYuanRong** - set `OPENYUANRONG_SERVER_ADDRESS` and `OPENYUANRONG_TOKEN`.
-2. **Runner tool image** - build the selected tool image and push it to a remote
+1. **AKernel** — set `AKERNEL_SERVER_ADDRESS` and `AKERNEL_TOKEN`.
+2. **Tool image** — build the mini-swe-agent tool image and push it to a remote
    registry if the sandbox service cannot access local Docker images.
 
 ## 1. Build Tool Image
 
-`mini_swe` and `claude_code` are both injected into the SWE-bench sandbox as
-sidecar tool images, but they differ in image contents, mount paths, and
-accelerator/mirror options. Use `build_tool.sh` for both runners, and select the
-target runner with `--tool` or `TOOL_KIND`.
+`mini_swe` is injected into the SWE-bench sandbox as a sidecar tool image. Use
+`build_tool.sh` to build it.
 
-| runner | Default tool image | Dockerfile | Sandbox mount path | Image contents | Mirror option |
-|--------|--------------------|------------|--------------------|----------------|---------------|
-| `mini_swe` | `mini-swe-agent-tool:latest` | `Dockerfile.mini-swe-agent-tool` | `/opt/mini-swe-agent` | Standalone Python 3.12, `mini-swe-agent`, `litellm`, and `run_agent.py` | `--pip-index` / `PIP_INDEX_URL` |
-| `claude_code` | `claude-code-tool:latest` | `Dockerfile.claude-code-tool` | `/opt/claude-code` | Claude Code npm package installed by a Node 20 builder | `--npm-registry` / `NPM_REGISTRY` |
-
-### mini_swe Tool Image
-
-`mini_swe` is the default build target:
+| Default tool image | Dockerfile | Sandbox mount path | Image contents |
+|--------------------|------------|--------------------|----------------|
+| `mini-swe-agent-tool:latest` | `Dockerfile.mini-swe-agent-tool` | `/opt/mini-swe-agent` | Standalone Python 3.12, `mini-swe-agent`, `litellm`, and `run_agent.py` |
 
 ```bash
 # Use the default PyPI source.
-bash examples/swe_agent_blackbox/build_tool.sh
+bash examples/blackbox_recipes/mini_swe_agent/build_tool.sh
 
 # Use a custom PyPI mirror.
-bash examples/swe_agent_blackbox/build_tool.sh --pip-index https://pypi.tuna.tsinghua.edu.cn/simple/
+bash examples/blackbox_recipes/mini_swe_agent/build_tool.sh --pip-index https://pypi.tuna.tsinghua.edu.cn/simple/
 
 # Build and push to a remote registry.
-bash examples/swe_agent_blackbox/build_tool.sh --registry swr.cn-east-3.myhuaweicloud.com/openyuanrong
-```
-
-The `mini_swe` image uses `python-build-standalone` to build an isolated Python
-runtime. The final `FROM scratch` image contains only the files needed under
-`/opt/mini-swe-agent`, and it does not depend on the Python version installed in
-the sandbox base image.
-
-After pushing the image, point runtime inference at it with `SWE_AGENT_TOOL_IMAGE`:
-
-```bash
-SWE_AGENT_TOOL_IMAGE=swr.cn-east-3.myhuaweicloud.com/openyuanrong/mini-swe-agent-tool:latest \
-RUNNER=mini_swe \
-bash examples/swe_agent_blackbox/scripts/run_infer.sh
-```
-
-### Claude Code Tool Image
-
-Claude Code must be selected explicitly with `--tool claude_code`:
-
-```bash
-# Use the default npm registry.
-bash examples/swe_agent_blackbox/build_tool.sh --tool claude_code
-
-# Use a custom npm registry.
-bash examples/swe_agent_blackbox/build_tool.sh \
-    --tool claude_code \
-    --npm-registry https://registry.npmmirror.com
-
-# Select the Claude Code npm package version.
-bash examples/swe_agent_blackbox/build_tool.sh \
-    --tool claude_code \
-    --tool-version latest
-
-# Build and push the Claude Code sidecar image.
-bash examples/swe_agent_blackbox/build_tool.sh \
-    --tool claude_code \
-    --registry swr.cn-east-3.myhuaweicloud.com/openyuanrong
-```
-
-The Claude Code image uses `node:20-bookworm-slim` as the builder stage and
-installs `@anthropic-ai/claude-code` into `/opt/claude-code`. The final image is
-also a `FROM scratch` sidecar image. At runtime, the runner mounts it into the
-sandbox at `/opt/claude-code` and invokes `/opt/claude-code/bin/claude`.
-
-After pushing the image, point runtime inference at it with `SWE_AGENT_TOOL_IMAGE`:
-
-```bash
-SWE_AGENT_TOOL_IMAGE=swr.cn-east-3.myhuaweicloud.com/openyuanrong/claude-code-tool:latest \
-RUNNER=claude_code \
-bash examples/swe_agent_blackbox/scripts/run_infer.sh
-```
-
-### Combined Build Options
-
-`--tool`, image tags, mirrors, and registries can be combined:
-
-```bash
-bash examples/swe_agent_blackbox/build_tool.sh \
-    --tool mini_swe \
-    --pip-index https://pypi.tuna.tsinghua.edu.cn/simple/ \
-    --registry swr.cn-east-3.myhuaweicloud.com/openyuanrong
+bash examples/blackbox_recipes/mini_swe_agent/build_tool.sh --registry swr.cn-east-3.myhuaweicloud.com/openyuanrong
 ```
 
-The build script:
-
-1. Selects the Dockerfile and default image name from `--tool`:
-   - `mini_swe` -> `mini-swe-agent-tool:latest`
-   - `claude_code` -> `claude-code-tool:latest`
-2. Tags and pushes the image when `--registry` is provided.
-
-Both tool images are sidecar runtime dependencies, not SWE-bench task base
-images. The `mini_swe` Python runtime is fully isolated from the sandbox
-container's Python. The `claude_code` Node/npm dependencies live only under
-`/opt/claude-code`, so the sandbox base image does not need Node installed.
+The `mini_swe` Python runtime is fully isolated from the sandbox container's
+Python.
 
 ### Build Environment Variables
 
 | Variable | Default | Description |
 |----------|---------|-------------|
-| `TOOL_IMAGE` | `mini-swe-agent-tool` / `claude-code-tool` | Image name; the default changes with `TOOL_KIND` |
+| `TOOL_IMAGE` | `mini-swe-agent-tool` | Image name |
 | `TOOL_TAG` | `latest` | Image tag |
-| `TOOL_VERSION` | `latest` | Tool package version; for `claude_code`, this selects the `@anthropic-ai/claude-code` npm package version |
-| `PIP_INDEX_URL` | unset, use PyPI | pip index URL; equivalent to `--pip-index` |
-| `TOOL_KIND` | `mini_swe` | Tool kind: `mini_swe` or `claude_code` |
-| `NPM_REGISTRY` | unset, use npm default | npm registry URL; equivalent to `--npm-registry` |
+| `PIP_INDEX_URL` | unset, use PyPI | pip index URL (`--pip-index`) |
 
-## 2. Inference With OpenYuanRong Sandbox
+After pushing, point training at it with `SWE_AGENT_TOOL_IMAGE`.
 
-### Using run_infer.sh
+## 2. Training (Fully Async)
 
 ```bash
-cd "$(git rev-parse --show-toplevel)"
-
-RUNNER=mini_swe \
+AKERNEL_SERVER_ADDRESS="6.2.179.37:8888" \
+AKERNEL_TOKEN="<token>" \
 SWE_AGENT_TOOL_IMAGE=swr.cn-east-3.myhuaweicloud.com/openyuanrong/mini-swe-agent-tool:latest \
-MODEL_PATH=$HOME/models/Qwen3.5-9B \
-DATA_PATH=$HOME/data/swe_agent/r2e_gym.parquet \
-MAX_SAMPLES=1 \
-TP=1 \
-bash examples/swe_agent_blackbox/scripts/run_infer.sh
-```
-
-### Calling Python Directly
-
-```bash
-python examples/swe_agent_blackbox/parallel_infer.py \
-    --model-path ~/models/Qwen3.5-9B \
-    --data-path ~/data/swe_agent/r2e_gym.parquet \
-    --max-samples 1 \
-    --runner mini_swe \
-    --max-turns 100 \
-    --tensor-parallel-size 1
-```
-
-## 3. Inference
-
-### Environment Variables
-
-```bash
-export OPENYUANRONG_SERVER_ADDRESS="6.2.179.37:8888"
-export OPENYUANRONG_TOKEN="<your-token>"
-export DEPLOYMENT=openyuanrong
-```
-
-### Run mini_swe
-
-```bash
-RUNNER=mini_swe \
-OPENYUANRONG_SERVER_ADDRESS="6.2.179.37:8888" \
-OPENYUANRONG_TOKEN="<token>" \
-DEPLOYMENT=openyuanrong \
-SWE_AGENT_TOOL_IMAGE=swr.cn-east-3.myhuaweicloud.com/openyuanrong/mini-swe-agent-tool:latest \
-bash examples/swe_agent_blackbox/scripts/run_infer.sh
-```
-
-### Run Claude Code
-
-```bash
-RUNNER=claude_code \
-OPENYUANRONG_SERVER_ADDRESS="6.2.179.37:8888" \
-OPENYUANRONG_TOKEN="<token>" \
-DEPLOYMENT=openyuanrong \
-SWE_AGENT_TOOL_IMAGE=swr.cn-east-3.myhuaweicloud.com/openyuanrong/claude-code-tool:latest \
-SWE_AGENT_MAX_TURNS=50 \
-SWE_AGENT_RUN_TIMEOUT=7200 \
-bash examples/swe_agent_blackbox/scripts/run_infer.sh
-```
-
-## 4. Training (Fully Async)
-
-```bash
-OPENYUANRONG_SERVER_ADDRESS="6.2.179.37:8888" \
-OPENYUANRONG_TOKEN="<token>" \
 MODEL_PATH=~/models/Qwen3.5-9B \
-bash examples/swe_agent_blackbox/scripts/run_train_megatron_async.sh
+bash examples/blackbox_recipes/mini_swe_agent/run_train.sh
 ```
 
-The training YAML keeps `mini_swe` as the default runner:
+The training YAML keeps `mini_swe` as the only runner:
 
 ```yaml
-agent_runner_fqn: examples.swe_agent_blackbox.mini_swe_agent_runner.mini_swe_agent_runner
-```
-
-To run training with Claude Code, keep the YAML unchanged and override the runner
-FQN from the launch command:
-
-```bash
-python3 -m verl.experimental.fully_async_policy.fully_async_main \
-  --config-path examples/swe_agent_blackbox/config \
-  --config-name swe_agent_blackbox_megatron_async \
-  actor_rollout_ref.rollout.custom.agent_framework.agent_runner_fqn=examples.swe_agent_blackbox.claude_code_runner.claude_code_runner
+agent_runner_fqn: examples.blackbox_recipes.mini_swe_agent.mini_swe_agent_runner.mini_swe_agent_runner
 ```
 
-## 5. Configuration
+## 3. Configuration
 
 | Variable | Default | Description |
 |----------|---------|-------------|
-| `SWE_AGENT_MAX_TURNS` | `100` | Max agent steps |
+| `AGENT_MAX_TURNS` | `100` | mini-swe-agent `step_limit` (the agent's turn budget); read by the runner from the `AGENT_MAX_TURNS` env var |
+| `SWE_AGENT_EVAL_TIMEOUT` | `600` | Reward evaluation timeout (seconds) |
+| `SWE_AGENT_RUN_TIMEOUT` | `7200` | Max wall time for the agent process in the sandbox |
 | `SWE_AGENT_TOOL_IMAGE` | `swr.cn-east-3.myhuaweicloud.com/openyuanrong/mini-swe-agent-tool:latest` | Sidecar tool image |
-| `DEBUG_MODE` | (unset) | Set to 1 to enable debug logging |
+| `CONDA_ENV` | `testbed` | Conda env activated inside the sandbox before running the agent |
diff --git a/examples/blackbox_recipes/mini_swe_agent/build_tool.sh b/examples/blackbox_recipes/mini_swe_agent/build_tool.sh
new file mode 100755
index 00000000..7bcdbe1f
--- /dev/null
+++ b/examples/blackbox_recipes/mini_swe_agent/build_tool.sh
@@ -0,0 +1,56 @@
+#!/usr/bin/env bash
+# Build the mini-swe-agent sidecar tool image.
+#
+# The image uses python-build-standalone to build an isolated Python runtime
+# with mini-swe-agent + litellm + run_agent.py, copied into a minimal
+# `FROM scratch` final stage rooted at /opt/mini-swe-agent. It is mounted into
+# the SWE-bench sandbox at /opt/mini-swe-agent, so the sandbox base image does
+# not need Python for the sidecar tool runtime.
+#
+# Usage:
+#   bash examples/blackbox_recipes/mini_swe_agent/build_tool.sh
+#   bash examples/blackbox_recipes/mini_swe_agent/build_tool.sh --pip-index https://pypi.tuna.tsinghua.edu.cn/simple/
+#   bash examples/blackbox_recipes/mini_swe_agent/build_tool.sh --registry swr.cn-east-3.myhuaweicloud.com/openyuanrong
+#
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+IMAGE_NAME="${TOOL_IMAGE:-mini-swe-agent-tool}"
+IMAGE_TAG="${TOOL_TAG:-latest}"
+
+# Parse args
+REGISTRY=""
+PIP_INDEX_URL="${PIP_INDEX_URL:-}"
+while [[ $# -gt 0 ]]; do
+    case "$1" in
+        --registry) REGISTRY="$2"; shift 2 ;;
+        --pip-index) PIP_INDEX_URL="$2"; shift 2 ;;
+        *) echo "Unknown arg: $1"; exit 1 ;;
+    esac
+done
+
+BUILD_ARGS=()
+if [[ -n "${PIP_INDEX_URL}" ]]; then
+    BUILD_ARGS+=(--build-arg PIP_INDEX_URL="${PIP_INDEX_URL}")
+fi
+
+echo "==> Building mini_swe tool image: ${IMAGE_NAME}:${IMAGE_TAG}"
+docker build \
+    -f "${SCRIPT_DIR}/Dockerfile.mini-swe-agent-tool" \
+    -t "${IMAGE_NAME}:${IMAGE_TAG}" \
+    "${BUILD_ARGS[@]}" \
+    "${SCRIPT_DIR}/"
+
+if [[ -n "${REGISTRY}" ]]; then
+    FULL_TAG="${REGISTRY}/${IMAGE_NAME}:${IMAGE_TAG}"
+    echo "==> Tagging and pushing: ${FULL_TAG}"
+    docker tag "${IMAGE_NAME}:${IMAGE_TAG}" "${FULL_TAG}"
+    docker push "${FULL_TAG}"
+    echo "    Pushed."
+fi
+
+echo ""
+echo "Tool image ready: ${IMAGE_NAME}:${IMAGE_TAG}"
+if [[ -n "${REGISTRY}" ]]; then
+    echo "  Remote sandbox: ${FULL_TAG}"
+fi
diff --git a/examples/blackbox_recipes/mini_swe_agent/config/agent_config.yaml b/examples/blackbox_recipes/mini_swe_agent/config/agent_config.yaml
deleted file mode 100644
index b7352b72..00000000
--- a/examples/blackbox_recipes/mini_swe_agent/config/agent_config.yaml
+++ /dev/null
@@ -1,36 +0,0 @@
-- name: swe_agent
-
-  _target_: uni_agent.agent_loop.UniAgentLoop
-  concurrency: 64
-  log_dir: /tmp/swebench_qwen3_coder
-  mask_abnormal_exit_traj: false
-
-  interaction:
-    action_timeout: 300
-    max_turns: 100
-
-  env:
-    deployment:
-      type: local
-      command: /usr/bin/python3 -m swerex.server --auth-token {token}
-      timeout: 600
-      startup_timeout: 600
-      container_runtime: docker
-    env_variables:
-      PIP_PROGRESS_BAR: "off"
-      PIP_CACHE_DIR: "~/.cache/pip"
-      PAGER: "cat"
-      MANPAGER: "cat"
-      LESS: "-R"
-      TQDM_DISABLE: "1"
-      GIT_PAGER: "cat"
-
-  tool_parser: qwen3_coder
-
-  tools:
-    - name: str_replace_editor
-    - name: execute_bash
-    - name: submit
-
-  reward:
-    eval_timeout: 600
diff --git a/examples/blackbox_recipes/mini_swe_agent/config/agent_config_openyuanrong.yaml b/examples/blackbox_recipes/mini_swe_agent/config/agent_config_openyuanrong.yaml
deleted file mode 100644
index b298c676..00000000
--- a/examples/blackbox_recipes/mini_swe_agent/config/agent_config_openyuanrong.yaml
+++ /dev/null
@@ -1,37 +0,0 @@
-- name: swe_agent
-
-  _target_: uni_agent.agent_loop.UniAgentLoop
-  concurrency: 64
-  log_dir: /tmp/swebench_qwen3_coder
-  mask_abnormal_exit_traj: false
-
-  interaction:
-    action_timeout: 300
-    max_turns: 100
-
-  env:
-    deployment:
-      type: openyuanrong
-      command: /opt/swe-rex/bin/python /opt/swe-rex/bin/swerex-remote --host 0.0.0.0 --port {port} --auth-token {token}
-      timeout: 600
-      startup_timeout: 600
-      swerex_runtime_image: swr.cn-east-3.myhuaweicloud.com/openyuanrong/swerex-runtime:1.4.0
-      swerex_runtime_target: /opt/swe-rex
-    env_variables:
-      PIP_PROGRESS_BAR: "off"
-      PIP_CACHE_DIR: "~/.cache/pip"
-      PAGER: "cat"
-      MANPAGER: "cat"
-      LESS: "-R"
-      TQDM_DISABLE: "1"
-      GIT_PAGER: "cat"
-
-  tool_parser: qwen3_coder
-
-  tools:
-    - name: str_replace_editor
-    - name: execute_bash
-    - name: submit
-
-  reward:
-    eval_timeout: 600
diff --git a/examples/blackbox_recipes/mini_swe_agent/config/parallel_infer.yaml b/examples/blackbox_recipes/mini_swe_agent/config/parallel_infer.yaml
deleted file mode 100644
index 0829fdcd..00000000
--- a/examples/blackbox_recipes/mini_swe_agent/config/parallel_infer.yaml
+++ /dev/null
@@ -1,31 +0,0 @@
-# Parallel inference config for the blackbox SWE-agent recipe.
-# Composes verl's base configs with inference-specific overrides.
-
-defaults:
-  - model_engine: dp
-  - actor@actor_rollout_ref.actor: ${model_engine}_actor
-  - rollout@actor_rollout_ref.rollout: rollout
-  - model@actor_rollout_ref.model: hf_model
-  - reward: reward
-  - _self_
-
-hydra:
-  searchpath:
-    - pkg://verl.trainer.config
-
-actor_rollout_ref:
-  hybrid_engine: true
-  nccl_timeout: 600
-  model: {}
-  rollout:
-    agent: {}
-
-trainer:
-  nnodes: 1
-  n_gpus_per_node: 8
-  logger:
-    - console
-  device: cuda
-  total_epochs: 1
-  total_training_steps: null
-  balance_batch: false
diff --git a/examples/blackbox_recipes/mini_swe_agent/config/swe_agent_blackbox.yaml b/examples/blackbox_recipes/mini_swe_agent/config/swe_agent_blackbox.yaml
deleted file mode 100644
index 62b73da1..00000000
--- a/examples/blackbox_recipes/mini_swe_agent/config/swe_agent_blackbox.yaml
+++ /dev/null
@@ -1,123 +0,0 @@
-# PPO trainer config for the blackbox SWE-agent recipe (v2).
-# Uses the generic AgentFrameworkRolloutAdapter + SWEAgentFramework subclass.
-
-hydra:
-  searchpath:
-    - pkg://verl.trainer.config
-
-defaults:
-  - ppo_trainer
-  - _self_
-
-actor_rollout_ref:
-  hybrid_engine: true
-  nccl_timeout: 600
-
-  model:
-    path: ???
-    enable_gradient_checkpointing: true
-
-  rollout:
-    name: vllm
-    mode: async
-    prompt_length: 4096
-    response_length: 131072
-    max_model_len: 135168
-    temperature: 1.0
-    top_p: 1.0
-    n: 8
-    tensor_model_parallel_size: 4
-    gpu_memory_utilization: 0.7
-    calculate_log_probs: true
-    enable_sleep_mode: true
-    free_cache_engine: true
-
-    multi_turn:
-      enable: true
-      max_assistant_turns: 1
-      max_parallel_calls: 1
-      format: qwen3_coder
-
-    agent:
-      num_workers: 8
-      agent_loop_manager_class: uni_agent.trainer.framework.entry.AgentFrameworkRolloutAdapter
-
-    custom:
-      agent_framework:
-        framework_class_fqn: examples.swe_agent_blackbox.framework.SWEAgentFramework
-        agent_runner_fqn: examples.swe_agent_blackbox.mini_swe_agent_runner.mini_swe_agent_runner
-        gateway_count: 1
-        completion_timeout_seconds: 600
-        max_concurrent_sessions: 32
-        agent_runner_kwargs:
-          agent_config_path: examples/swe_agent_blackbox/config/agent_config.yaml
-
-  actor:
-    use_dynamic_bsz: true
-    ppo_mini_batch_size: 16
-    use_kl_loss: false
-    kl_loss_coef: 0.0
-    clip_ratio_low: 0.2
-    clip_ratio_high: 0.28
-    loss_agg_mode: token-mean
-    optim:
-      lr: 1e-6
-      weight_decay: 0.1
-      clip_grad: 1.0
-    fsdp_config:
-      param_offload: true
-      optimizer_offload: true
-      grad_offload: true
-
-data:
-  train_files: ???
-  val_files: ???
-  max_prompt_length: 4096
-  max_response_length: 131072
-  train_batch_size: 128
-  val_batch_size: 128
-  return_raw_chat: true
-  trust_remote_code: true
-  custom_cls:
-    path: pkg://examples.swe_agent_blackbox.dataset
-    name: SWEBenchDataset
-
-algorithm:
-  gamma: 1.0
-  lam: 1.0
-  adv_estimator: grpo
-  use_kl_in_reward: false
-  kl_ctrl:
-    type: fixed
-    kl_coef: 0.0
-
-reward:
-  custom_reward_function:
-    path: pkg://examples/swe_agent_blackbox.reward
-    name: compute_score
-
-trainer:
-  use_legacy_worker_impl: disable
-  nnodes: 1
-  n_gpus_per_node: 8
-  total_epochs: 10
-  project_name: swe_agent_blackbox
-  experiment_name: swe_agent
-  logger:
-    - console
-  device: cuda
-  balance_batch: false
-  val_before_train: true
-  val_only: false
-  save_freq: 10
-  test_freq: 10
-  default_local_dir: checkpoints/swe_agent_blackbox
-  resume_mode: disable
-
-ray_kwargs:
-  ray_init:
-    runtime_env:
-      env_vars:
-        TRANSFER_QUEUE_ENABLE: ""
-        NCCL_P2P_DISABLE: "1"
-        NCCL_SHM_DISABLE: "1"
diff --git a/examples/blackbox_recipes/mini_swe_agent/config/swe_agent_blackbox_megatron_sync.yaml b/examples/blackbox_recipes/mini_swe_agent/config/swe_agent_blackbox_megatron_sync.yaml
deleted file mode 100644
index 65b09b1a..00000000
--- a/examples/blackbox_recipes/mini_swe_agent/config/swe_agent_blackbox_megatron_sync.yaml
+++ /dev/null
@@ -1,129 +0,0 @@
-# Megatron sync training config for the blackbox SWE-agent recipe.
-# Uses main_ppo_sync + Megatron backend, same blackbox infrastructure as FSDP.
-#
-# Entry point: python3 -m verl.trainer.main_ppo_sync
-
-hydra:
-  searchpath:
-    - pkg://verl.trainer.config
-
-defaults:
-  - ppo_megatron_trainer
-  - _self_
-
-actor_rollout_ref:
-  hybrid_engine: true
-  nccl_timeout: 600
-
-  model:
-    path: ???
-    enable_gradient_checkpointing: true
-
-  rollout:
-    name: vllm
-    mode: async
-    prompt_length: 4096
-    response_length: 131072
-    max_model_len: 135168
-    temperature: 1.0
-    top_p: 1.0
-    n: 8
-    tensor_model_parallel_size: 4
-    gpu_memory_utilization: 0.7
-    calculate_log_probs: true
-    enable_sleep_mode: true
-    free_cache_engine: true
-
-    multi_turn:
-      enable: true
-      max_assistant_turns: 1
-      max_parallel_calls: 1
-      format: qwen3_coder
-
-    agent:
-      num_workers: 8
-      agent_loop_manager_class: uni_agent.trainer.framework.entry.AgentFrameworkRolloutAdapter
-
-    custom:
-      agent_framework:
-        framework_class_fqn: examples.swe_agent_blackbox.framework.SWEAgentFramework
-        agent_runner_fqn: examples.swe_agent_blackbox.mini_swe_agent_runner.mini_swe_agent_runner
-        gateway_count: 1
-        completion_timeout_seconds: 600
-        max_concurrent_sessions: 32
-        agent_runner_kwargs:
-          agent_config_path: examples/swe_agent_blackbox/config/agent_config.yaml
-
-  actor:
-    use_dynamic_bsz: true
-    ppo_mini_batch_size: 16
-    use_kl_loss: false
-    kl_loss_coef: 0.0
-    clip_ratio_low: 0.2
-    clip_ratio_high: 0.28
-    loss_agg_mode: token-mean
-    optim:
-      lr: 1e-6
-      weight_decay: 0.1
-      clip_grad: 1.0
-    megatron:
-      param_offload: true
-      grad_offload: true
-      optimizer_offload: true
-      tensor_model_parallel_size: 8
-      pipeline_model_parallel_size: 1
-      context_parallel_size: 1
-      use_mbridge: true
-
-data:
-  train_files: ???
-  val_files: ???
-  max_prompt_length: 4096
-  max_response_length: 131072
-  train_batch_size: 128
-  val_batch_size: 128
-  return_raw_chat: true
-  trust_remote_code: true
-  custom_cls:
-    path: pkg://examples.swe_agent_blackbox.dataset
-    name: SWEBenchDataset
-
-algorithm:
-  gamma: 1.0
-  lam: 1.0
-  adv_estimator: grpo
-  use_kl_in_reward: false
-  kl_ctrl:
-    type: fixed
-    kl_coef: 0.0
-
-reward:
-  custom_reward_function:
-    path: pkg://examples.swe_agent_blackbox.reward
-    name: compute_score
-
-trainer:
-  use_legacy_worker_impl: disable
-  nnodes: 1
-  n_gpus_per_node: 8
-  total_epochs: 10
-  project_name: swe_agent_blackbox
-  experiment_name: swe_agent
-  logger:
-    - console
-  device: cuda
-  balance_batch: false
-  val_before_train: true
-  val_only: false
-  save_freq: 10
-  test_freq: 10
-  default_local_dir: checkpoints/swe_agent_blackbox
-  resume_mode: disable
-
-ray_kwargs:
-  ray_init:
-    runtime_env:
-      env_vars:
-        TRANSFER_QUEUE_ENABLE: ""
-        NCCL_P2P_DISABLE: "1"
-        NCCL_SHM_DISABLE: "1"
diff --git a/examples/blackbox_recipes/mini_swe_agent/config/swe_agent_blackbox_megatron_async.yaml b/examples/blackbox_recipes/mini_swe_agent/config/swe_agent_blackbox_megatron_v1.yaml
similarity index 62%
rename from examples/blackbox_recipes/mini_swe_agent/config/swe_agent_blackbox_megatron_async.yaml
rename to examples/blackbox_recipes/mini_swe_agent/config/swe_agent_blackbox_megatron_v1.yaml
index d25fcce5..ad2c719d 100644
--- a/examples/blackbox_recipes/mini_swe_agent/config/swe_agent_blackbox_megatron_async.yaml
+++ b/examples/blackbox_recipes/mini_swe_agent/config/swe_agent_blackbox_megatron_v1.yaml
@@ -1,8 +1,8 @@
-# Megatron + TQ fully-async training config for the blackbox SWE-agent recipe.
-# Uses FullyAsyncAgentFrameworkRolloutAdapter + SWEAgentFramework with Megatron backend.
+# Megatron + V1 unified trainer config for the blackbox mini-swe recipe.
 #
-# Entry point: python3 -m verl.experimental.fully_async_policy.fully_async_main
-# Requires: transfer_queue.enable=true (selects TQ path in FullyAsyncTaskRunner)
+# Entry point: python3 -m verl.trainer.main_ppo
+# Default trainer mode is separate_async. On a single 8-GPU node this recipe
+# uses 4 GPUs for trainer and 4 GPUs for standalone rollout.
 
 hydra:
   searchpath:
@@ -13,7 +13,7 @@ defaults:
   - _self_
 
 actor_rollout_ref:
-  hybrid_engine: false
+  hybrid_engine: true
   nccl_timeout: 9600
 
   model:
@@ -22,13 +22,16 @@ actor_rollout_ref:
   rollout:
     name: vllm
     mode: async
+    nnodes: 1
+    n_gpus_per_node: 4
     prompt_length: 4096
     response_length: 131072
     max_model_len: 135168
     temperature: 1.0
     top_p: 1.0
+    top_k: -1
     n: 8
-    tensor_model_parallel_size: 2
+    tensor_model_parallel_size: 4
     gpu_memory_utilization: 0.7
     calculate_log_probs: true
     enable_sleep_mode: true
@@ -37,26 +40,29 @@ actor_rollout_ref:
     max_num_batched_tokens: 135168
     checkpoint_engine:
       backend: nccl
+      update_weights_bucket_megabytes: 2048
 
     multi_turn:
       enable: true
-      max_assistant_turns: 1
       max_parallel_calls: 1
       format: qwen3_coder
 
     agent:
       num_workers: 8
-      agent_loop_manager_class: uni_agent.trainer.framework.entry.FullyAsyncAgentFrameworkRolloutAdapter
+      agent_loop_manager_class: uni_agent.framework.entry.AgentFrameworkRolloutAdapter
 
     custom:
       agent_framework:
-        framework_class_fqn: examples.swe_agent_blackbox.framework.SWEAgentFramework
-        agent_runner_fqn: examples.swe_agent_blackbox.mini_swe_agent_runner.mini_swe_agent_runner
         gateway_count: 1
-        completion_timeout_seconds: 600
-        max_concurrent_sessions: 32
-        agent_runner_kwargs:
-          agent_config_path: examples/swe_agent_blackbox/config/agent_config.yaml
+        agent_runners:
+          swe_agent:
+            runner_fqn: examples.blackbox_recipes.mini_swe_agent.mini_swe_agent_runner.mini_swe_agent_runner
+            dispatch_mode: ray_task
+            max_concurrent_sessions: 32
+            runner_kwargs:
+              tool_image: swr.cn-east-3.myhuaweicloud.com/openyuanrong/mini-swe-agent-tool:latest
+              run_timeout: 3600
+              conda_env: testbed
 
   actor:
     use_dynamic_bsz: true
@@ -78,16 +84,17 @@ actor_rollout_ref:
       param_offload: true
       grad_offload: true
       optimizer_offload: true
-      tensor_model_parallel_size: 8
+      tensor_model_parallel_size: 4
       pipeline_model_parallel_size: 1
       context_parallel_size: 1
       use_mbridge: true
       use_remove_padding: false
 
   ref:
+    log_prob_micro_batch_size_per_gpu: 1
     megatron:
       param_offload: false
-      tensor_model_parallel_size: 8
+      tensor_model_parallel_size: 4
       pipeline_model_parallel_size: 1
       context_parallel_size: 1
 
@@ -98,12 +105,14 @@ data:
   truncation: left
   max_prompt_length: 4096
   max_response_length: 131072
-  train_batch_size: 0
+  train_batch_size: 1
+  val_batch_size: 1
   gen_batch_size: 1
   return_raw_chat: true
   trust_remote_code: true
+  dataloader_num_workers: 0
   custom_cls:
-    path: pkg://examples.swe_agent_blackbox.dataset
+    path: pkg://examples.blackbox_recipes.mini_swe_agent.dataset
     name: SWEBenchDataset
 
 algorithm:
@@ -119,13 +128,14 @@ algorithm:
 
 reward:
   custom_reward_function:
-    path: pkg://examples.swe_agent_blackbox.reward
+    path: pkg://examples.blackbox_recipes.mini_swe_agent.reward
     name: compute_score
 
 trainer:
   nnodes: 1
-  n_gpus_per_node: 8
+  n_gpus_per_node: 4
   total_epochs: 10
+  total_training_steps: null
   project_name: swe_agent_blackbox
   experiment_name: swe_agent
   logger:
@@ -137,18 +147,14 @@ trainer:
   test_freq: 10
   default_local_dir: checkpoints/swe_agent_blackbox
   resume_mode: auto
-
-rollout:
-  nnodes: 1
-  n_gpus_per_node: 8
-  total_rollout_steps: 100000
-
-async_training:
-  use_trainer_do_validate: false
-  staleness_threshold: 1.0
-  trigger_parameter_sync_step: 4
-  require_batches: 1
-  partial_rollout: true
+  use_v1: true
+  v1:
+    trainer_mode: separate_async
+    colocate_async:
+      num_warmup_batches: 1
+    separate_async:
+      num_warmup_batches: 4
+      parameter_sync_step: 4
 
 transfer_queue:
   enable: true
diff --git a/examples/blackbox_recipes/mini_swe_agent/dataset.py b/examples/blackbox_recipes/mini_swe_agent/dataset.py
index 89d65129..e7781c03 100644
--- a/examples/blackbox_recipes/mini_swe_agent/dataset.py
+++ b/examples/blackbox_recipes/mini_swe_agent/dataset.py
@@ -21,7 +21,6 @@ def extract_image(env_config: dict) -> str:
 
 
 class SWEBenchDataset(RLHFDataset):
-
     def __getitem__(self, item):
         row_dict = super().__getitem__(item)
         extra_info = row_dict.get("extra_info", {})
diff --git a/examples/blackbox_recipes/mini_swe_agent/framework.py b/examples/blackbox_recipes/mini_swe_agent/framework.py
deleted file mode 100644
index 7c5c027c..00000000
--- a/examples/blackbox_recipes/mini_swe_agent/framework.py
+++ /dev/null
@@ -1,105 +0,0 @@
-"""SWE-agent specific framework subclass.
-
-Injects reward_info (from agent_runner's complete_session call)
-into sample_fields["extra_info"] so the reward worker's
-compute_score can access it via extra_info.
-
-Overrides _run_session to execute agent_runner in a separate Ray worker
-process, preventing blocking operations from stalling the event loop.
-"""
-
-from __future__ import annotations
-
-import asyncio
-import functools
-import logging
-from dataclasses import replace
-from uuid import uuid4
-
-import ray
-
-from uni_agent.trainer.framework.framework import OpenAICompatibleAgentFramework
-
-from examples.swe_agent_blackbox.subprocess_runner import remote_agent_run
-
-logger = logging.getLogger(__name__)
-
-
-class SWEAgentFramework(OpenAICompatibleAgentFramework):
-
-    async def _score_trajectories(self, session_trajectories, sample_fields):
-        if session_trajectories and session_trajectories[-1].reward_info:
-            reward_info = session_trajectories[-1].reward_info
-            extra_info = dict(sample_fields.get("extra_info") or {})
-            sample_fields = {**sample_fields, "extra_info": {**extra_info, **reward_info}}
-        return await super()._score_trajectories(session_trajectories, sample_fields)
-
-    def _resolve_runner(self) -> tuple[str, dict]:
-        """Extract FQN and pre-bound kwargs from self.agent_runner.
-
-        self.agent_runner may be a functools.partial (from_config wraps it),
-        so we unpack the original function and its keywords.
-        """
-        fn = self.agent_runner
-        kwargs = {}
-        if isinstance(fn, functools.partial):
-            kwargs = dict(fn.keywords)
-            fn = fn.func
-        fqn = f"{fn.__module__}.{fn.__qualname__}"
-        return fqn, kwargs
-
-    async def _run_session(
-        self,
-        *,
-        prompts,
-        raw_prompt,
-        sample_index: int,
-        session_id: str | None = None,
-        runner_kwargs: dict | None = None,
-    ):
-        """Run agent_runner in a Ray worker process instead of in-process."""
-        session_id = session_id or f"session-{sample_index}-0-{uuid4().hex}"
-        sample_fields = self._extract_sample_fields(prompts=prompts, sample_index=sample_index)
-        session = await self.session_runtime.create_session(session_id)
-        agent_runner_fqn, resolved_kwargs = self._resolve_runner()
-
-        try:
-            if runner_kwargs:
-                resolved_kwargs = {**resolved_kwargs, **runner_kwargs}
-
-            ref = remote_agent_run.remote(
-                agent_runner_fqn=agent_runner_fqn,
-                raw_prompt=raw_prompt,
-                session_id=session_id,
-                base_url=session.base_url,
-                sample_index=sample_index,
-                runner_kwargs=resolved_kwargs,
-            )
-            loop = asyncio.get_running_loop()
-            reward_info = await loop.run_in_executor(None, ray.get, ref)
-
-            await self.session_runtime.complete_session(
-                session_id, reward_info=reward_info,
-            )
-            session_trajectories = await self.session_runtime.finalize_session(session_id)
-
-        except Exception as e:
-            logger.error("_run_session failed: session=%s, sample=%d, runner=%s: %s",
-                         session_id, sample_index, agent_runner_fqn, e, exc_info=True)
-            await self.session_runtime.abort_session(session_id)
-            raise
-
-        if not self.reward_loop_worker_handles or not session_trajectories:
-            return session_trajectories, sample_fields
-
-        annotations = await self._score_trajectories(session_trajectories, sample_fields)
-        scored_trajectories = []
-        for traj, (score, extra) in zip(session_trajectories, annotations, strict=True):
-            scored_trajectories.append(
-                replace(
-                    traj,
-                    reward_score=score,
-                    extra_fields={**traj.extra_fields, "reward_extra_info": extra},
-                )
-            )
-        return scored_trajectories, sample_fields
diff --git a/examples/blackbox_recipes/mini_swe_agent/mini_swe_agent_runner.py b/examples/blackbox_recipes/mini_swe_agent/mini_swe_agent_runner.py
index 33882bc8..2b16099a 100644
--- a/examples/blackbox_recipes/mini_swe_agent/mini_swe_agent_runner.py
+++ b/examples/blackbox_recipes/mini_swe_agent/mini_swe_agent_runner.py
@@ -1,6 +1,6 @@
 """Mini-swe-agent runner for the blackbox SWE-agent recipe.
 
-Agent runs inside a OpenYuanRong remote sandbox via sidecar tool image mount.
+Agent runs inside a remote sandbox via sidecar tool image mount.
 The runner creates the sandbox, pipes task config via stdin, parses
 the result from stdout, and evaluates reward in the same sandbox.
 """
@@ -15,21 +15,24 @@
 import time
 from pathlib import Path
 
-from uni_agent.trainer.framework.types import SessionHandle, SessionRuntime
+import httpx
 
-from examples.swe_agent_blackbox.dataset import extract_image
-from examples.swe_agent_blackbox.reward import build_reward_context, evaluate_in_env
-from examples.swe_agent_blackbox.sandbox import CommandResult, YRSandbox, extract_upstream, rewrite_gateway_url
+from examples.blackbox_recipes.mini_swe_agent.dataset import extract_image
+from examples.blackbox_recipes.mini_swe_agent.reward import build_reward_context, evaluate_in_env
+from examples.blackbox_recipes.sandbox_client import (
+    SandboxClient,
+    extract_upstream,
+    rewrite_gateway_url,
+)
+from uni_agent.gateway.session import SessionHandle
 
 logger = logging.getLogger(__name__)
-if os.environ.get("DEBUG_MODE"):
-    logger.setLevel(logging.DEBUG)
 
 DEFAULT_TOOL_IMAGE = "swr.cn-east-3.myhuaweicloud.com/openyuanrong/mini-swe-agent-tool:latest"
 
 
 class SandboxEnvForReward:
-    """Adapts :class:`YRSandbox` to the async env interface used by
+    """Adapts :class:`Sandbox` to the async env interface used by
     reward specs (``communicate``, ``write_file``, ``read_file``).
     """
 
@@ -67,7 +70,7 @@ def _build_task_config(
 ) -> dict:
     """Build the task config passed to run_agent.py via stdin."""
     agent_gateway_url = rewrite_gateway_url(gateway_url)
-    step_limit = int(os.environ.get("SWE_AGENT_MAX_TURNS", "100"))
+    step_limit = int(os.environ.get("AGENT_MAX_TURNS", "100"))
     return {
         "task": task,
         "gateway_url": agent_gateway_url,
@@ -84,15 +87,20 @@ def build_agent_command(
 ) -> str:
     """Build the command that runs run_agent.py inside the sandbox."""
     conda_prefix = f"/opt/miniconda3/envs/{conda_env}"
-    env_prefix = (
+    run_agent_env = (
         f"CONDA_DEFAULT_ENV={shlex.quote(conda_env)} "
         f"CONDA_PREFIX={shlex.quote(conda_prefix)} "
-        f"PATH={shlex.quote(conda_prefix + '/bin')}:/opt/miniconda3/bin:$PATH"
+        f"PATH={shlex.quote(conda_prefix + '/bin')}:/opt/miniconda3/bin:$PATH "
+        "PIP_DISABLE_PIP_VERSION_CHECK=1 "
+        "PIP_PROGRESS_BAR=off"
     )
     return (
         "unset HTTP_PROXY HTTPS_PROXY http_proxy https_proxy NO_PROXY no_proxy; "
-        f"{env_prefix} "
-        f"echo {config_b64} | base64 -d | "
+        f"env {run_agent_env} sh -c 'echo \"[mini_swe] shell env: CONDA_DEFAULT_ENV=$CONDA_DEFAULT_ENV "
+        'CONDA_PREFIX=$CONDA_PREFIX PATH=$PATH" >&2; '
+        'echo "[mini_swe] python=$(command -v python) pip=$(command -v pip)" >&2\' ; '
+        f"printf %s {shlex.quote(config_b64)} | base64 -d | "
+        f"env {run_agent_env} "
         "/opt/mini-swe-agent/bin/python /opt/mini-swe-agent/bin/run_agent.py"
     )
 
@@ -102,21 +110,21 @@ async def mini_swe_agent_runner(
     raw_prompt,
     session: SessionHandle,
     sample_index: int,
-    session_runtime: SessionRuntime,
     tools_kwargs: dict | None = None,
     tool_image: str = DEFAULT_TOOL_IMAGE,
     run_timeout: int = 7200,
     conda_env: str = "testbed",
+    sandbox_max_retries: int = 10,
     **kwargs,
 ) -> None:
     """Run mini-swe-agent inside a sandbox with sidecar tool mount.
 
     Flow:
-        1. Create OpenYuanRong remote sandbox with mini-swe-agent sidecar
+        1. Create remote sandbox with mini-swe-agent sidecar
         2. Pipe task config to run_agent.py via stdin
         3. Parse agent result from stdout
         4. Evaluate reward in the same sandbox
-        5. Complete session with reward_info
+        5. Post reward_info for the framework reward path
     """
     tools_kwargs = tools_kwargs or {}
     logger.info("mini_swe_agent_runner called, sample_index=%d", sample_index)
@@ -130,14 +138,17 @@ async def mini_swe_agent_runner(
     if not image:
         raise ValueError(f"No sandbox image found in tools_kwargs.env for sample {sample_index}")
 
-    # Gateway URL — extract upstream for OpenYuanRong tunnel
+    # Gateway URL — extract upstream for tunnel
     gateway_url = session.base_url
     if not gateway_url:
         raise ValueError(f"gateway_url is empty for sample {sample_index}")
 
     upstream = extract_upstream(gateway_url)
-    sandbox = await YRSandbox.create(
-        image=image, sidecar_image=tool_image, upstream=upstream,
+    sandbox = await SandboxClient.create(
+        image=image,
+        sidecar_image=tool_image,
+        upstream=upstream,
+        max_retries=int(sandbox_max_retries),
     )
     sandbox_id = sandbox.sandbox_id
     logger.info("Sandbox created (image=%s, sandbox_id=%s)", image, sandbox_id)
@@ -168,14 +179,17 @@ async def mini_swe_agent_runner(
         elapsed = time.perf_counter() - t0
         logger.debug(
             "[sample %d] agent process finished: rc=%d (%.1fs)",
-            sample_index, agent_result.exit_code, elapsed,
+            sample_index,
+            agent_result.exit_code,
+            elapsed,
         )
 
         # Parse agent result from stdout
         agent_info = _parse_agent_result(agent_result.stdout, sample_index)
         logger.info(
             "[sample %d] agent: exit_status=%s, submission=%d chars",
-            sample_index, agent_info.get("exit_status"),
+            sample_index,
+            agent_info.get("exit_status"),
             len(agent_info.get("submission", "")),
         )
 
@@ -186,11 +200,18 @@ async def mini_swe_agent_runner(
         score, eval_result = await evaluate_in_env(reward_env, metadata, eval_timeout)
         logger.debug(
             "[sample %d] reward done: score=%s, resolved=%s (%.1fs)",
-            sample_index, score, eval_result.get("resolved"), time.perf_counter() - t0,
+            sample_index,
+            score,
+            eval_result.get("resolved"),
+            time.perf_counter() - t0,
         )
 
         reward_info = {"reward_score": score, **eval_result}
-        await session_runtime.complete_session(session.session_id, reward_info=reward_info)
+        if not session.reward_info_url:
+            raise ValueError(f"reward_info_url is empty for session {session.session_id}")
+        async with httpx.AsyncClient(timeout=30.0) as client:
+            response = await client.post(session.reward_info_url, json={"reward_info": reward_info})
+            response.raise_for_status()
 
     except Exception as e:
         logger.warning("Mini-swe-agent runner failed for sample %d (sandbox_id=%s): %s", sample_index, sandbox_id, e)
@@ -212,7 +233,7 @@ def _parse_agent_result(stdout: str, sample_index: int) -> dict:
     if not stdout:
         return {"exit_status": "error", "submission": ""}
     # Try the last line that looks like JSON first
-    lines = [l.strip() for l in stdout.split("\n") if l.strip()]
+    lines = [ln.strip() for ln in stdout.split("\n") if ln.strip()]
     for line in reversed(lines):
         if line.startswith("{"):
             try:
diff --git a/examples/blackbox_recipes/mini_swe_agent/parallel_infer.py b/examples/blackbox_recipes/mini_swe_agent/parallel_infer.py
index c74765e0..cd792cd5 100644
--- a/examples/blackbox_recipes/mini_swe_agent/parallel_infer.py
+++ b/examples/blackbox_recipes/mini_swe_agent/parallel_infer.py
@@ -1,11 +1,16 @@
-"""Parallel inference runner for the blackbox SWE-agent recipe (v2).
+"""Standalone inference runner for the blackbox mini-swe-agent recipe.
 
-Creates an LLM server, GatewayServingRuntime, and SWEAgentFramework,
-then runs agent sessions in parallel and reports resolve rate.
+Spins up vLLM + gateway + a reward worker, runs agent sessions in parallel,
+and reports resolve rate. Does NOT start the Megatron trainer.
 
-Usage (CLI):
-    python examples/swe_agent_blackbox/parallel_infer.py \
-        --model-path ~/models/Qwen3-Coder-30B-A3B-Instruct \
+Reuses the recipe's existing training config
+(config/swe_agent_blackbox_megatron_v1.yaml); its megatron/optimizer sections
+are inert here since this driver never builds the actor worker group — only
+the rollout, agent_framework, model, and reward sections are read.
+
+Usage:
+    python examples/blackbox_recipes/mini_swe_agent/parallel_infer.py \
+        --model-path ~/models/Qwen3.5-9B \
         --data-path ~/data/swe_agent/swe_bench_verified.parquet \
         --max-samples 10
 """
@@ -14,32 +19,20 @@
 
 import argparse
 import asyncio
-import json
 import logging
 import os
-from functools import partial
 from typing import Any
 from uuid import uuid4
 
 import numpy as np
 import ray
 
-from verl import DataProto
-from verl.protocol import pad_dataproto_to_divisor
-from verl.utils import hf_tokenizer
-from verl.utils.transferqueue_utils import tq as _tq_mock
+from verl.experimental.reward_loop.reward_loop import RewardLoopWorker
+from verl.utils import tensordict_utils as tu
+from verl.utils.transferqueue_utils import tq
 from verl.workers.rollout.llm_server import LLMServerManager
 
-from uni_agent.trainer.gateway.runtime import GatewayServingRuntime
-
-from examples.swe_agent_blackbox.framework import SWEAgentFramework
-from examples.swe_agent_blackbox.agent_runner import swe_agent_runner
-from examples.swe_agent_blackbox.claude_code_runner import claude_code_runner
-
-try:
-    from examples.swe_agent_blackbox.mini_swe_agent_runner import mini_swe_agent_runner
-except ImportError:
-    mini_swe_agent_runner = None
+from uni_agent.framework.entry import build_agent_framework, build_gateway_manager
 
 logging.basicConfig(
     format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
@@ -48,9 +41,14 @@
 )
 logger = logging.getLogger(__name__)
 
+# ── Recipe-specific constants (only these two differ between recipes) ──────
+_CONFIG_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "config")
+_CONFIG_NAME = "swe_agent_blackbox_megatron_v1"
+_DEFAULT_TOOL_IMAGE = "swr.cn-east-3.myhuaweicloud.com/openyuanrong/mini-swe-agent-tool:latest"
+
 
 # =====================================================================
-# Dataset loading (inlined from dataset.py — only used here)
+# Dataset loading (inlined; keeps the driver self-contained)
 # =====================================================================
 
 
@@ -83,7 +81,6 @@ def _remap_sample_images(sample: dict[str, Any]) -> dict[str, Any]:
 
 
 def _inject_reward_fields(sample: dict[str, Any]) -> None:
-    """Inject verl-standard data_source and reward_model from extra_info.tools_kwargs.reward."""
     extra_info = sample.get("extra_info", {})
     tools_kwargs = extra_info.get("tools_kwargs", {})
     reward_config = tools_kwargs.get("reward", {})
@@ -91,334 +88,272 @@ def _inject_reward_fields(sample: dict[str, Any]) -> None:
     sample.setdefault("reward_model", {"ground_truth": {}})
 
 
-def load_swe_dataset(data_path: str | list[str], max_samples: int = -1) -> list[dict[str, Any]]:
+def load_swe_dataset(data_path: str, max_samples: int = -1) -> list[dict[str, Any]]:
     import pyarrow.parquet as pq
 
-    if isinstance(data_path, list):
-        paths = [os.path.expanduser(p) for p in data_path]
-    else:
-        paths = os.path.expanduser(data_path)
-
-    logger.info("Loading dataset from: %s", data_path)
-    if isinstance(paths, list):
-        import pyarrow as pa
-        tables = [pq.read_table(p) for p in paths]
-        table = pa.concat_tables(tables)
-    else:
-        table = pq.read_table(paths)
-    samples = table.to_pylist()
-
+    path = os.path.expanduser(data_path)
+    logger.info("Loading dataset from: %s", path)
+    samples = pq.read_table(path).to_pylist()
     for i, sample in enumerate(samples):
         samples[i] = _remap_sample_images(sample)
         _inject_reward_fields(samples[i])
-
     if max_samples > 0:
         samples = samples[:max_samples]
-        logger.info("Using first %d samples (max_samples=%d)", len(samples), max_samples)
-
-    logger.info("Loaded %d samples from %s", len(samples), data_path)
+    logger.info("Loaded %d samples", len(samples))
     return samples
 
 
-class _MockReplayBuffer:
-    """Minimal replay buffer for inference mode (no actual training)."""
-
-    def add(self, partition_id, items):
-        pass
+# =====================================================================
+# Config
+# =====================================================================
 
 
-def run_inference(
+def _load_config(
     *,
     model_path: str,
-    data_path: str,
-    prompt_length: int = 4096,
-    response_length: int = 65536,
-    temperature: float = 0.8,
-    top_p: float = 0.9,
-    n: int = 1,
-    max_samples: int = -1,
-    engine: str = "vllm",
-    nnodes: int = 1,
-    n_gpus_per_node: int = 8,
-    tensor_parallel_size: int = 4,
-    gateway_count: int = 1,
-    max_concurrent_sessions: int = 2,
-    completion_timeout: float = 600.0,
-    tool_parser: str | None = None,
-    agent_config_path: str | None = None,
-    runner: str = "uniagent",
-    tool_image: str | None = None,
-    run_timeout: int = 7200,
-) -> dict[str, Any]:
-    """Run parallel SWE-agent inference using the blackbox framework."""
-    if runner == "mini_swe":
-        if mini_swe_agent_runner is None:
-            raise ImportError("mini-swe-agent is required for --runner mini_swe. Install with: pip install mini-swe-agent")
-        _agent_runner = partial(
-            mini_swe_agent_runner,
-            tool_image=tool_image or "swr.cn-east-3.myhuaweicloud.com/openyuanrong/mini-swe-agent-tool:latest",
-            run_timeout=run_timeout,
-        )
-    elif runner == "claude_code":
-        _agent_runner = partial(
-            claude_code_runner,
-            tool_image=tool_image or "claude-code-tool:latest",
-            run_timeout=run_timeout,
-        )
-    else:
-        _agent_runner = swe_agent_runner
+    engine: str,
+    prompt_length: int,
+    response_length: int,
+    temperature: float,
+    top_p: float,
+    n: int,
+    nnodes: int,
+    n_gpus_per_node: int,
+    tensor_parallel_size: int,
+    gateway_count: int,
+    max_concurrent_sessions: int,
+    tool_image: str | None,
+    run_timeout: int,
+) -> Any:
+    """Compose the recipe's training config and override inference fields.
 
-    if not ray.is_initialized():
-        ray.init()
+    The megatron/actor/optimizer sections are left untouched and never read.
+    """
+    from hydra import compose, initialize_config_dir
+    from omegaconf import OmegaConf
 
-    # 1. Init Hydra config
-    config = _init_hydra_config(
-        model_path=model_path,
-        engine=engine,
-        prompt_length=prompt_length,
-        response_length=response_length,
-        temperature=temperature,
-        top_p=top_p,
-        n=n,
-        nnodes=nnodes,
-        n_gpus_per_node=n_gpus_per_node,
-        tensor_parallel_size=tensor_parallel_size,
-    )
+    with initialize_config_dir(config_dir=_CONFIG_DIR, version_base=None):
+        config = compose(config_name=_CONFIG_NAME)
 
-    # 2. Load dataset
-    samples = load_swe_dataset(data_path, max_samples=max_samples)
-    logger.info(
-        "Loaded %d samples, %d rollout(s) each, runner=%s, gateway_count=%d, max_concurrent_sessions=%d",
-        len(samples),
-        n,
-        runner,
-        gateway_count,
-        max_concurrent_sessions,
-    )
+    OmegaConf.set_struct(config, False)
 
-    if not samples:
-        raise ValueError("No samples to process")
+    config.actor_rollout_ref.model.path = os.path.expanduser(model_path)
 
-    # 3. Create LLM server
-    logger.info("Initializing LLM server manager...")
-    llm_server_manager = LLMServerManager.create(config=config)
+    ro = config.actor_rollout_ref.rollout
+    ro.name = engine
+    ro.mode = "async"
+    ro.prompt_length = prompt_length
+    ro.response_length = response_length
+    ro.max_model_len = prompt_length + response_length + 1024
+    ro.max_num_batched_tokens = ro.max_model_len
+    ro.n = n
+    ro.temperature = temperature
+    ro.top_p = top_p
+    ro.tensor_model_parallel_size = tensor_parallel_size
+    ro.gpu_memory_utilization = float(os.getenv("ROLLOUT_GPU_MEM_UTIL", "0.7"))
+    ro.nnodes = nnodes
+    ro.n_gpus_per_node = n_gpus_per_node
+    ro.calculate_log_probs = True
+    ro.enable_sleep_mode = False
+
+    af = ro.custom.agent_framework
+    af.gateway_count = gateway_count
+    runner_name = next(iter(af.agent_runners.keys()))
+    runner_cfg = af.agent_runners[runner_name]
+    runner_cfg.max_concurrent_sessions = max_concurrent_sessions
+    if tool_image:
+        runner_cfg.runner_kwargs.tool_image = tool_image
+    runner_cfg.runner_kwargs.run_timeout = run_timeout
 
-    # 4. Create GatewayServingRuntime
-    logger.info("Using tool_parser=%r", tool_parser)
+    config.trainer.nnodes = nnodes
+    config.trainer.n_gpus_per_node = n_gpus_per_node
 
-    llm_client = llm_server_manager.get_client()
-    gateway_actor_kwargs = {
-        "tokenizer": hf_tokenizer(os.path.expanduser(model_path)),
-        "base_sampling_params": {"temperature": temperature, "top_p": top_p, "max_tokens": response_length},
-    }
-    if tool_parser:
-        gateway_actor_kwargs["tool_parser_name"] = tool_parser
-
-    gateway_runtime = GatewayServingRuntime(
-        llm_client=llm_client,
-        gateway_count=gateway_count,
-        gateway_actor_kwargs=gateway_actor_kwargs,
-    )
+    OmegaConf.set_struct(config, True)
+    return config
 
-    # 5. Create RewardLoopWorker for compute_score
-    from verl.experimental.reward_loop.reward_loop import RewardLoopWorker
-    reward_worker = ray.remote(RewardLoopWorker).remote(config, None)
 
-    # 6. Create framework
-    framework = SWEAgentFramework(
-        session_runtime=gateway_runtime,
-        agent_runner=_agent_runner,
-        replay_buffer=_MockReplayBuffer(),
-        rollout_config={"n": n, "val_kwargs": {"n": n}},
-        completion_timeout=completion_timeout,
-        wait_for_completion_after_agent_run=True,
-        max_concurrent_sessions=max_concurrent_sessions,
-        reward_loop_worker_handles=[reward_worker],
-    )
-
-    # 6. Build batch data and run
-    _tools_kwargs_list = []
-    for sample in samples:
-        tk = (sample.get("extra_info") or {}).get("tools_kwargs", {})
-        if runner == "uniagent" and agent_config_path:
-            tk["agent_config_path"] = agent_config_path
-        tk["model_path"] = os.path.expanduser(model_path)
-        _tools_kwargs_list.append(tk)
+# =====================================================================
+# Batch + score capture
+# =====================================================================
 
-    from tensordict import TensorDict
-    from verl.utils import tensordict_utils as _tu
 
+def _build_prompts(samples: list[dict[str, Any]]) -> tuple[Any, list[str]]:
     raw_prompts = [sample["prompt"] for sample in samples]
     uids = [str(uuid4()) for _ in samples]
-    td = TensorDict({"uid": uids, "global_steps": [0] * len(samples)}, batch_size=[len(samples)])
-    _tu.assign_non_tensor_stack(td, "raw_prompt", raw_prompts)
-    _tu.assign_non_tensor_stack(td, "tools_kwargs", _tools_kwargs_list)
-    _tu.assign_non_tensor_stack(td, "data_source", [sample["data_source"] for sample in samples])
-    _tu.assign_non_tensor_stack(td, "reward_model", [sample["reward_model"] for sample in samples])
+    tools_kwargs_list = [dict((sample.get("extra_info") or {}).get("tools_kwargs", {})) for sample in samples]
+    prompts = tu.get_tensordict(
+        tensor_dict={
+            "raw_prompt": raw_prompts,
+            "uid": uids,
+            "data_source": [sample["data_source"] for sample in samples],
+            "reward_model": [sample["reward_model"] for sample in samples],
+            "tools_kwargs": tools_kwargs_list,
+        },
+        non_tensor_dict={"global_steps": 0},
+    )
+    return prompts, uids
 
-    batch = DataProto(batch=td, meta_info={}).repeat(n)
 
-    size_divisor = gateway_count
-    batch_padded, pad_size = pad_dataproto_to_divisor(batch, size_divisor)
-    logger.info("Starting %d agent session(s)...", len(batch_padded))
+def _install_tq_capture() -> tuple[dict[str, float], dict[str, str]]:
+    """Monkeypatch the process-local TransferQueue to capture rm_scores in-memory.
 
-    _tq_store: dict[str, Any] = {}
+    Runner dispatch is a Ray task, but session finalize/score/TQ-writes happen
+    in this driver process, so patching ``tq`` here captures every write.
+    """
+    captured_scores: dict[str, float] = {}
+    uid_status: dict[str, str] = {}
 
-    async def _dummy_kv_put(key, partition_id=None, tag=None, **kwargs):
-        _tq_store[key] = tag
+    async def _fake_put(*, key, partition_id=None, tag=None, **kwargs):
+        if isinstance(tag, dict) and "status" in tag:
+            uid_status[str(key)] = str(tag["status"])
 
-    async def _dummy_kv_batch_put(keys=None, fields=None, tags=None, partition_id=None, **kwargs):
+    async def _fake_batch_put(*, keys=None, fields=None, tags=None, partition_id=None, **kwargs):
+        if fields is None or keys is None or "rm_scores" not in fields:
+            return
+        rm = fields["rm_scores"]  # nested tensor; rm[i] is trajectory i's response scores
         for i, key in enumerate(keys):
-            _tq_store[key] = {"fields": fields, "tag": tags[i] if tags else None}
+            row = rm[i]
+            captured_scores[str(key)] = float(row[-1].item()) if row.numel() else 0.0
 
-    _tq_mock.async_kv_put = _dummy_kv_put
-    _tq_mock.async_kv_batch_put = _dummy_kv_batch_put
+    tq.async_kv_put = _fake_put
+    tq.async_kv_batch_put = _fake_batch_put
+    return captured_scores, uid_status
 
-    async def _generate():
-        return await framework.generate_sequences(batch_padded.batch)
 
-    try:
-        stats = asyncio.run(_generate())
-    except RuntimeError as e:
-        logger.warning("generate_sequences failed: %s", e)
-        stats = {}
-
-    # 7. Collect scores
-    uid_to_sample_idx = {uid: i for i, uid in enumerate(uids)}
-    per_sample_scores = [0.0] * len(samples)
-    sample_trajectory_counts = [0] * len(samples)
-    for key, value in _tq_store.items():
-        if not isinstance(value, dict) or "fields" not in value:
-            continue
-        fields = value["fields"]
-        rm_scores = fields.get("rm_scores", None)
-        if rm_scores is None:
-            continue
-        # Key format: {uid}_{session_index}_{index}
+def _report(samples, uids, captured_scores) -> dict[str, Any]:
+    uid_to_index = {uid: i for i, uid in enumerate(uids)}
+    per_sample_sum = [0.0] * len(samples)
+    per_sample_cnt = [0] * len(samples)
+    for key, score in captured_scores.items():
+        # key format: {uid}_{session_index}_{index}
         uid = key.rsplit("_", 2)[0]
-        sample_idx = uid_to_sample_idx.get(uid)
-        if sample_idx is None:
+        idx = uid_to_index.get(uid)
+        if idx is None:
             continue
-        score = float(rm_scores.float()[-1, -1].item())
-        per_sample_scores[sample_idx] += score
-        sample_trajectory_counts[sample_idx] += 1
-
-    for i in range(len(samples)):
-        if sample_trajectory_counts[i] > 0:
-            per_sample_scores[i] /= sample_trajectory_counts[i]
-
-    resolved_count = sum(1 for s in per_sample_scores if s > 0)
-    overall_mean = float(np.mean(per_sample_scores)) if per_sample_scores else 0.0
+        per_sample_sum[idx] += score
+        per_sample_cnt[idx] += 1
+    per_sample_scores = [
+        per_sample_sum[i] / per_sample_cnt[i] if per_sample_cnt[i] else 0.0 for i in range(len(samples))
+    ]
+    resolved = sum(1 for s in per_sample_scores if s > 0)
+    mean = float(np.mean(per_sample_scores)) if per_sample_scores else 0.0
     logger.info(
         "Resolved %d / %d samples (%.2f%%), mean score: %.4f",
-        resolved_count, len(samples), 100.0 * resolved_count / max(len(samples), 1), overall_mean,
+        resolved, len(samples), 100.0 * resolved / max(len(samples), 1), mean,
     )
-
-    # 8. Cleanup
-    asyncio.run(gateway_runtime.shutdown())
-
-    return {
-        "stats": stats,
-        "mean_score": overall_mean,
-        "per_sample_scores": per_sample_scores,
-    }
+    return {"resolved": resolved, "total": len(samples), "mean_score": mean, "per_sample_scores": per_sample_scores}
 
 
 # =====================================================================
-# Helpers
+# Runner
 # =====================================================================
 
 
-def _init_hydra_config(
+def run_inference(
     *,
     model_path: str,
-    engine: str,
+    data_path: str,
     prompt_length: int,
     response_length: int,
     temperature: float,
     top_p: float,
     n: int,
+    max_samples: int,
+    engine: str,
     nnodes: int,
     n_gpus_per_node: int,
     tensor_parallel_size: int,
-) -> Any:
-    """Initialize Hydra config with rollout/model settings."""
-    from hydra import compose, initialize_config_dir
-    from omegaconf import OmegaConf
+    gateway_count: int,
+    max_concurrent_sessions: int,
+    tool_image: str | None,
+    run_timeout: int,
+) -> dict[str, Any]:
+    if not ray.is_initialized():
+        ray.init()
 
-    config_dir = os.path.abspath("examples/swe_agent_blackbox/config")
-    with initialize_config_dir(config_dir=config_dir, version_base=None):
-        config = compose(config_name="parallel_infer")
+    config = _load_config(
+        model_path=model_path,
+        engine=engine,
+        prompt_length=prompt_length,
+        response_length=response_length,
+        temperature=temperature,
+        top_p=top_p,
+        n=n,
+        nnodes=nnodes,
+        n_gpus_per_node=n_gpus_per_node,
+        tensor_parallel_size=tensor_parallel_size,
+        gateway_count=gateway_count,
+        max_concurrent_sessions=max_concurrent_sessions,
+        tool_image=tool_image,
+        run_timeout=run_timeout,
+    )
 
-    config.actor_rollout_ref.model.path = os.path.expanduser(model_path)
-    config.actor_rollout_ref.rollout.name = engine
-    config.actor_rollout_ref.rollout.mode = "async"
-    config.actor_rollout_ref.rollout.prompt_length = prompt_length
-    config.actor_rollout_ref.rollout.response_length = response_length
-    config.actor_rollout_ref.rollout.max_model_len = prompt_length + response_length + 1024
-    config.actor_rollout_ref.rollout.n = n
-    config.actor_rollout_ref.rollout.tensor_model_parallel_size = tensor_parallel_size
-    config.actor_rollout_ref.rollout.gpu_memory_utilization = float(os.getenv("ROLLOUT_GPU_MEM_UTIL", "0.5"))
-    config.actor_rollout_ref.rollout.temperature = temperature
-    config.actor_rollout_ref.rollout.top_p = top_p
-    config.actor_rollout_ref.rollout.val_kwargs.temperature = temperature
-    config.actor_rollout_ref.rollout.val_kwargs.top_p = top_p
-    config.actor_rollout_ref.rollout.calculate_log_probs = True
-    config.actor_rollout_ref.rollout.multi_turn.max_assistant_turns = 100
-    config.actor_rollout_ref.rollout.multi_turn.max_parallel_calls = 1
-    config.actor_rollout_ref.rollout.nnodes = nnodes
-    config.actor_rollout_ref.rollout.n_gpus_per_node = n_gpus_per_node
-    config.trainer.nnodes = nnodes
-    config.trainer.n_gpus_per_node = n_gpus_per_node
+    samples = load_swe_dataset(data_path, max_samples=max_samples)
+    if not samples:
+        raise ValueError("No samples to process")
 
-    config.reward.custom_reward_function.path = "pkg://examples.swe_agent_blackbox.reward"
-    config.reward.custom_reward_function.name = "compute_score"
-    config.reward.num_workers = 1
+    logger.info("Initializing LLM server manager...")
+    llm_server_manager = LLMServerManager.create(config=config)
+    llm_client = llm_server_manager.get_client()
 
-    OmegaConf.set_struct(config.actor_rollout_ref.rollout, False)
-    config.actor_rollout_ref.rollout.enable_sleep_mode = False
-    config.actor_rollout_ref.rollout.enforce_eager = os.getenv("ROLLOUT_ENFORCE_EAGER", "0") == "1"
-    OmegaConf.set_struct(config.actor_rollout_ref.rollout, True)
-    return config
+    gateway_manager = build_gateway_manager(config=config, llm_client=llm_client)
+    reward_worker = ray.remote(RewardLoopWorker).remote(config, None)
+    framework = build_agent_framework(
+        config=config,
+        gateway_manager=gateway_manager,
+        reward_loop_worker_handles=[reward_worker],
+    )
+
+    prompts, uids = _build_prompts(samples)
+    captured_scores, _uid_status = _install_tq_capture()
+
+    logger.info("Starting %d sample(s), %d session(s) each...", len(samples), n)
+    try:
+        asyncio.run(framework.generate_sequences(prompts))
+    except RuntimeError as exc:
+        logger.warning("generate_sequences failed: %s", exc)
+
+    if not captured_scores:
+        logger.warning(
+            "No trajectory scores captured — all rollouts may have failed (see the "
+            "generate_sequences summary above), or the TransferQueue monkeypatch did not "
+            "reach the writer; resolve rate will be reported as 0."
+        )
+
+    result = _report(samples, uids, captured_scores)
+
+    asyncio.run(gateway_manager.shutdown())
+    return result
 
 
 # =====================================================================
-# CLI entry point
+# CLI
 # =====================================================================
 
 
 def main():
-    parser = argparse.ArgumentParser(description="SWE-Agent Blackbox Parallel Inference")
+    parser = argparse.ArgumentParser(description="Blackbox mini-swe-agent standalone inference")
+    parser.add_argument("--model-path", "--model", type=str, default="~/models/Qwen3.5-9B")
     parser.add_argument("--data-path", type=str, default="~/data/swe_agent/swe_bench_verified.parquet")
-    parser.add_argument("--model-path", "--model", type=str, default="~/models/Qwen3-Coder-30B-A3B-Instruct")
-    parser.add_argument("--max-turns", type=int, default=100)
+    parser.add_argument("--max-samples", type=int, default=-1)
     parser.add_argument("--prompt-length", type=int, default=4096)
-    parser.add_argument("--response-length", type=int, default=65536)
-    parser.add_argument("--temperature", type=float, default=0.8)
-    parser.add_argument("--top-p", type=float, default=0.9)
+    parser.add_argument("--response-length", type=int, default=131072)
+    parser.add_argument("--temperature", type=float, default=1.0)
+    parser.add_argument("--top-p", type=float, default=1.0)
     parser.add_argument("--n", type=int, default=1)
-    parser.add_argument("--max-samples", type=int, default=-1)
     parser.add_argument("--engine", type=str, default="vllm", choices=["vllm", "sglang"])
+    parser.add_argument("--tensor-parallel-size", "--tp", type=int, default=4)
     parser.add_argument("--nnodes", type=int, default=1)
     parser.add_argument("--n-gpus-per-node", type=int, default=8)
-    parser.add_argument("--tensor-parallel-size", "--tp", type=int, default=4)
     parser.add_argument("--gateway-count", type=int, default=1)
-    parser.add_argument("--max-concurrent-sessions", type=int, default=2)
-    parser.add_argument("--tool-parser", type=str, default="qwen3_coder")
-    parser.add_argument("--tool-image", type=str, default=None)
+    parser.add_argument("--max-concurrent-sessions", type=int, default=8)
+    parser.add_argument("--tool-image", type=str, default=_DEFAULT_TOOL_IMAGE)
     parser.add_argument("--run-timeout", type=int, default=7200)
-    parser.add_argument(
-        "--runner", type=str, default="uniagent", choices=["uniagent", "mini_swe", "claude_code"],
-        help="Agent runner: 'uniagent', 'mini_swe', or 'claude_code'.",
-    )
-    parser.add_argument(
-        "--agent-config-path", type=str,
-        default="examples/swe_agent_blackbox/config/agent_config.yaml",
-        help="Path to agent config YAML.",
-    )
+    parser.add_argument("--max-turns", type=int, default=100)
     args = parser.parse_args()
 
-    os.environ["SWE_AGENT_MAX_TURNS"] = str(args.max_turns)
+    # Set before ray.init so runner Ray tasks inherit it.
+    os.environ["AGENT_MAX_TURNS"] = str(args.max_turns)
 
     run_inference(
         model_path=args.model_path,
@@ -435,9 +370,6 @@ def main():
         tensor_parallel_size=args.tensor_parallel_size,
         gateway_count=args.gateway_count,
         max_concurrent_sessions=args.max_concurrent_sessions,
-        tool_parser=args.tool_parser,
-        agent_config_path=args.agent_config_path,
-        runner=args.runner,
         tool_image=args.tool_image,
         run_timeout=args.run_timeout,
     )
diff --git a/examples/blackbox_recipes/mini_swe_agent/reward.py b/examples/blackbox_recipes/mini_swe_agent/reward.py
index 61da218b..267cfea5 100644
--- a/examples/blackbox_recipes/mini_swe_agent/reward.py
+++ b/examples/blackbox_recipes/mini_swe_agent/reward.py
@@ -27,7 +27,7 @@ def build_reward_context(tools_kwargs: dict) -> tuple[dict[str, Any], int]:
 
 
 def compute_score(data_source: str, solution_str: str, ground_truth: str, extra_info=None) -> dict:
-    """Read reward_score from extra_info, injected by SWEAgentFramework."""
+    """Read reward_score from extra_info, injected by the agent runner."""
     score = 0.0
     if extra_info and "reward_score" in extra_info:
         score = float(extra_info["reward_score"])
diff --git a/examples/blackbox_recipes/mini_swe_agent/run_agent.py b/examples/blackbox_recipes/mini_swe_agent/run_agent.py
index c5a4b165..68406803 100644
--- a/examples/blackbox_recipes/mini_swe_agent/run_agent.py
+++ b/examples/blackbox_recipes/mini_swe_agent/run_agent.py
@@ -3,7 +3,7 @@
 
 Input:  task config JSON from **stdin**
     - task: str — the issue description for the agent to solve
-    - gateway_url: str — LLM gateway endpoint (tunnel URL for OpenYuanRong sandbox)
+    - gateway_url: str — LLM gateway endpoint (tunnel URL for remote sandbox)
     - agent: dict — agent config (e.g. step_limit)
 
 Output: agent result JSON to **stdout**, or error JSON on failure
@@ -12,7 +12,6 @@
 from __future__ import annotations
 
 import json
-import os
 import sys
 
 DEFAULT_ACTION_TIMEOUT = 600
@@ -45,8 +44,15 @@ def main() -> None:
         env_cfg["timeout"] = DEFAULT_ACTION_TIMEOUT
         env_cfg.setdefault("env", {})
         env_cfg["env"].setdefault("GIT_PAGER", "cat")
-        for key in ("image", "container_timeout", "run_args", "executable", "pull_timeout",
-                    "forward_env", "interpreter"):
+        for key in (
+            "image",
+            "container_timeout",
+            "run_args",
+            "executable",
+            "pull_timeout",
+            "forward_env",
+            "interpreter",
+        ):
             env_cfg.pop(key, None)
         env = LocalEnvironment(**env_cfg)
 
@@ -57,15 +63,17 @@ def main() -> None:
         model_defaults.pop("model_name", None)
         model_defaults.pop("model_kwargs", None)
         model_cfg = model_defaults
-        model_cfg.update({
-            "model_name": "openai/default",
-            "model_kwargs": {
-                "api_base": gateway_url,
-                "api_key": "not-needed",
-                "drop_params": True,
-            },
-            "cost_tracking": "ignore_errors",
-        })
+        model_cfg.update(
+            {
+                "model_name": "openai/default",
+                "model_kwargs": {
+                    "api_base": gateway_url,
+                    "api_key": "not-needed",
+                    "drop_params": True,
+                },
+                "cost_tracking": "ignore_errors",
+            }
+        )
         model = LitellmModel(**model_cfg)
 
         # 5. Create DefaultAgent
diff --git a/examples/blackbox_recipes/mini_swe_agent/run_infer.sh b/examples/blackbox_recipes/mini_swe_agent/run_infer.sh
new file mode 100755
index 00000000..2dfa997d
--- /dev/null
+++ b/examples/blackbox_recipes/mini_swe_agent/run_infer.sh
@@ -0,0 +1,80 @@
+#!/usr/bin/env bash
+# Standalone inference for the blackbox mini-swe-agent recipe.
+# Runs rollout + reward only (no Megatron trainer) and reports resolve rate.
+#
+# Usage:
+#   bash examples/blackbox_recipes/mini_swe_agent/run_infer.sh
+#
+# All configurable via environment variables (see defaults below).
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+REPO_ROOT="${REPO_ROOT:-$(cd "${SCRIPT_DIR}/../../.." && pwd)}"
+cd "${REPO_ROOT}"
+
+# ── Model & data ─────────────────────────────────────────────────────────
+MODEL_PATH="${MODEL_PATH:-${HOME}/models/Qwen3.5-9B}"
+DATA_PATH="${DATA_PATH:-${HOME}/data/swe_agent/swe_bench_verified.parquet}"
+
+# ── Inference parameters ─────────────────────────────────────────────────
+MAX_SAMPLES="${MAX_SAMPLES:--1}"
+PROMPT_LENGTH="${PROMPT_LENGTH:-4096}"
+RESPONSE_LENGTH="${RESPONSE_LENGTH:-131072}"
+TEMPERATURE="${TEMPERATURE:-1.0}"
+TOP_P="${TOP_P:-1.0}"
+N="${N:-1}"
+ENGINE="${ENGINE:-vllm}"
+TP="${TP:-4}"
+NNODES="${NNODES:-1}"
+N_GPUS_PER_NODE="${N_GPUS_PER_NODE:-8}"
+GATEWAY_COUNT="${GATEWAY_COUNT:-1}"
+MAX_CONCURRENT_SESSIONS="${MAX_CONCURRENT_SESSIONS:-8}"
+
+# ── Agent parameters ─────────────────────────────────────────────────────
+AGENT_MAX_TURNS="${AGENT_MAX_TURNS:-100}"
+SWE_AGENT_TOOL_IMAGE="${SWE_AGENT_TOOL_IMAGE:-swr.cn-east-3.myhuaweicloud.com/openyuanrong/mini-swe-agent-tool:latest}"
+SWE_AGENT_RUN_TIMEOUT="${SWE_AGENT_RUN_TIMEOUT:-7200}"
+
+# ── AKernel (remote sandbox) ─────────────────────────────────────────────
+export AKERNEL_SERVER_ADDRESS="${AKERNEL_SERVER_ADDRESS:-}"
+export AKERNEL_TOKEN="${AKERNEL_TOKEN:-}"
+export AKERNEL_TUNNEL_SSL_VERIFY="${AKERNEL_TUNNEL_SSL_VERIFY:-0}"
+
+# ── Logging & env ────────────────────────────────────────────────────────
+export VERL_LOGGING_LEVEL="${VERL_LOGGING_LEVEL:-INFO}"
+export ROLLOUT_GPU_MEM_UTIL="${ROLLOUT_GPU_MEM_UTIL:-0.7}"
+export AGENT_MAX_TURNS
+export SWE_AGENT_EVAL_TIMEOUT="${SWE_AGENT_EVAL_TIMEOUT:-600}"
+export PYTHONPATH="${REPO_ROOT}:${REPO_ROOT}/verl:${PYTHONPATH:-}"
+
+echo "=== Mini-SWE-Agent Blackbox Inference ==="
+echo "Model:       ${MODEL_PATH}"
+echo "Data:        ${DATA_PATH}"
+echo "Max samples: ${MAX_SAMPLES}"
+echo "Engine:      ${ENGINE} (TP=${TP})"
+echo "Tool image:  ${SWE_AGENT_TOOL_IMAGE}"
+echo "Batch:       n=${N}, gateway=${GATEWAY_COUNT}, max_sessions=${MAX_CONCURRENT_SESSIONS}"
+if [[ -n "${GATEWAY_MESSAGE_JSONL_PATH}" ]]; then
+    echo "Messages:    ${GATEWAY_MESSAGE_JSONL_PATH}"
+fi
+echo "========================================="
+
+python examples/blackbox_recipes/mini_swe_agent/parallel_infer.py \
+    --model-path "${MODEL_PATH}" \
+    --data-path "${DATA_PATH}" \
+    --max-samples "${MAX_SAMPLES}" \
+    --prompt-length "${PROMPT_LENGTH}" \
+    --response-length "${RESPONSE_LENGTH}" \
+    --temperature "${TEMPERATURE}" \
+    --top-p "${TOP_P}" \
+    --n "${N}" \
+    --engine "${ENGINE}" \
+    --tensor-parallel-size "${TP}" \
+    --nnodes "${NNODES}" \
+    --n-gpus-per-node "${N_GPUS_PER_NODE}" \
+    --gateway-count "${GATEWAY_COUNT}" \
+    --max-concurrent-sessions "${MAX_CONCURRENT_SESSIONS}" \
+    --tool-image "${SWE_AGENT_TOOL_IMAGE}" \
+    --run-timeout "${SWE_AGENT_RUN_TIMEOUT}" \
+    --max-turns "${AGENT_MAX_TURNS}"
diff --git a/examples/blackbox_recipes/mini_swe_agent/run_train.sh b/examples/blackbox_recipes/mini_swe_agent/run_train.sh
new file mode 100755
index 00000000..75a042ba
--- /dev/null
+++ b/examples/blackbox_recipes/mini_swe_agent/run_train.sh
@@ -0,0 +1,300 @@
+#!/usr/bin/env bash
+# Megatron + V1 async training for the blackbox mini-swe recipe.
+#
+# Uses verl.trainer.main_ppo with the V1 unified trainer. The default mode is
+# separate_async, which uses separate trainer and rollout GPU pools.
+#
+# Usage:
+#   bash examples/blackbox_recipes/mini_swe_agent/run_train.sh
+#
+# All configurable via environment variables (see defaults below).
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+REPO_ROOT="${REPO_ROOT:-$(cd "${SCRIPT_DIR}/../../.." && pwd)}"
+cd "${REPO_ROOT}"
+
+# ── Model & data ─────────────────────────────────────────────────────────
+MODEL_PATH="${MODEL_PATH:-${HOME}/models/Qwen3.5-9B}"
+TRAIN_DATA="${TRAIN_DATA:-${HOME}/data/swe_agent/swe_rebench_filtered.parquet}"
+VAL_DATA="${VAL_DATA:-${HOME}/data/swe_agent/swe_bench_verified.parquet}"
+RUNTIME_ENV="${RUNTIME_ENV:-}"
+
+# ── V1 trainer ───────────────────────────────────────────────────────────
+TRAINER_MODE="${TRAINER_MODE:-separate_async}"
+NUM_WARMUP_BATCHES="${NUM_WARMUP_BATCHES:-1}"
+SEPARATE_NUM_WARMUP_BATCHES="${SEPARATE_NUM_WARMUP_BATCHES:-${NUM_WARMUP_BATCHES}}"
+PARAMETER_SYNC_STEP="${PARAMETER_SYNC_STEP:-4}"
+RAY_SUBMIT_MODE="${RAY_SUBMIT_MODE:-job}"
+RAY_INIT_ADDRESS="${RAY_INIT_ADDRESS:-auto}"
+RAY_STATUS_TIMEOUT="${RAY_STATUS_TIMEOUT:-5}"
+CONFIG_NAME="${CONFIG_NAME:-swe_agent_blackbox_megatron_v1}"
+
+# ── Hardware ─────────────────────────────────────────────────────────────
+NNODES="${NNODES:-${NNODES_TRAIN:-1}}"
+PHYSICAL_GPUS_PER_NODE="${PHYSICAL_GPUS_PER_NODE:-8}"
+if [[ "${TRAINER_MODE}" == "separate_async" ]]; then
+    N_GPUS_PER_NODE="${N_GPUS_PER_NODE:-${TRAIN_NGPUS_PER_NODE:-4}}"
+    ROLLOUT_NNODES="${ROLLOUT_NNODES:-${NNODES_ROLLOUT:-${NNODES}}}"
+    ROLLOUT_NGPUS_PER_NODE="${ROLLOUT_NGPUS_PER_NODE:-${NGPUS_PER_NODE_ROLLOUT:-4}}"
+else
+    N_GPUS_PER_NODE="${N_GPUS_PER_NODE:-${TRAIN_NGPUS_PER_NODE:-${PHYSICAL_GPUS_PER_NODE}}}"
+    ROLLOUT_NNODES="${ROLLOUT_NNODES:-${NNODES_ROLLOUT:-0}}"
+    ROLLOUT_NGPUS_PER_NODE="${ROLLOUT_NGPUS_PER_NODE:-${NGPUS_PER_NODE_ROLLOUT:-${N_GPUS_PER_NODE}}}"
+fi
+
+# ── Algorithm ────────────────────────────────────────────────────────────
+CLIP_RATIO_LOW="${CLIP_RATIO_LOW:-0.2}"
+CLIP_RATIO_HIGH="${CLIP_RATIO_HIGH:-0.28}"
+ACTOR_LR="${ACTOR_LR:-1e-6}"
+
+# ── Sequence lengths ─────────────────────────────────────────────────────
+PROMPT_LENGTH="${PROMPT_LENGTH:-4096}"
+RESPONSE_LENGTH="${RESPONSE_LENGTH:-131072}"
+MAX_MODEL_LEN=$((PROMPT_LENGTH + RESPONSE_LENGTH))
+
+# ── Rollout parameters ───────────────────────────────────────────────────
+ENGINE="${ENGINE:-vllm}"
+if [[ "${TRAINER_MODE}" == "separate_async" ]]; then
+    GEN_TP="${GEN_TP:-${TP:-${ROLLOUT_NGPUS_PER_NODE}}}"
+else
+    GEN_TP="${GEN_TP:-${TP:-2}}"
+fi
+N="${N:-8}"
+TEMPERATURE="${TEMPERATURE:-1.0}"
+TOP_P="${TOP_P:-1.0}"
+TOP_K="${TOP_K:--1}"
+ROLLOUT_GPU_MEM_UTIL="${ROLLOUT_GPU_MEM_UTIL:-0.7}"
+UPDATE_WEIGHTS_BUCKET_MB="${UPDATE_WEIGHTS_BUCKET_MB:-2048}"
+
+# ── Megatron training parallelism ────────────────────────────────────────
+if [[ "${TRAINER_MODE}" == "separate_async" ]]; then
+    TRAIN_TP="${TRAIN_TP:-${TP:-${N_GPUS_PER_NODE}}}"
+else
+    TRAIN_TP="${TRAIN_TP:-${TP:-8}}"
+fi
+TRAIN_PP="${TRAIN_PP:-1}"
+TRAIN_CP="${TRAIN_CP:-1}"
+OFFLOAD="${OFFLOAD:-True}"
+OPTIMIZER_OFFLOAD_FRACTION="${OFFLOAD_FRACTION:-1.0}"
+USE_MBRIDGE="${USE_MBRIDGE:-True}"
+PPO_MINI_BATCH_SIZE="${PPO_MINI_BATCH_SIZE:-16}"
+
+# ── Agent parameters ─────────────────────────────────────────────────────
+# AGENT_MAX_TURNS is the agent's turn budget inside the sandbox: it becomes the
+# mini-swe-agent step_limit (read by the runner via the AGENT_MAX_TURNS env var).
+# Note: the trainer's multi_turn.max_assistant_turns is NOT enforced on the
+# blackbox rollout path (AgentFrameworkRolloutAdapter), so it is not exposed here.
+RUNNER="${RUNNER:-mini_swe}"
+AGENT_MAX_TURNS="${AGENT_MAX_TURNS:-100}"
+if [[ "${RUNNER}" == "mini_swe" ]]; then
+    AGENT_RUNNER_FQN="examples.blackbox_recipes.mini_swe_agent.mini_swe_agent_runner.mini_swe_agent_runner"
+    SWE_AGENT_TOOL_IMAGE="${SWE_AGENT_TOOL_IMAGE:-swr.cn-east-3.myhuaweicloud.com/openyuanrong/mini-swe-agent-tool:latest}"
+else
+    echo "Unknown RUNNER=${RUNNER}; this recipe currently supports mini_swe only" >&2
+    exit 1
+fi
+SWE_AGENT_RUN_TIMEOUT="${SWE_AGENT_RUN_TIMEOUT:-7200}"
+CONDA_ENV="${CONDA_ENV:-testbed}"
+GATEWAY_COUNT="${GATEWAY_COUNT:-1}"
+MAX_CONCURRENT_SESSIONS="${MAX_CONCURRENT_SESSIONS:-32}"
+NUM_AGENT_WORKERS="${NUM_AGENT_WORKERS:-8}"
+RUNNER_ARGS=(
+    "actor_rollout_ref.rollout.agent.agent_loop_manager_class=uni_agent.framework.entry.AgentFrameworkRolloutAdapter"
+    "actor_rollout_ref.rollout.custom.agent_framework.gateway_count=${GATEWAY_COUNT}"
+    "actor_rollout_ref.rollout.custom.agent_framework.agent_runners.swe_agent.runner_fqn=${AGENT_RUNNER_FQN}"
+    "actor_rollout_ref.rollout.custom.agent_framework.agent_runners.swe_agent.dispatch_mode=ray_task"
+    "actor_rollout_ref.rollout.custom.agent_framework.agent_runners.swe_agent.max_concurrent_sessions=${MAX_CONCURRENT_SESSIONS}"
+    "actor_rollout_ref.rollout.custom.agent_framework.agent_runners.swe_agent.runner_kwargs.tool_image=${SWE_AGENT_TOOL_IMAGE}"
+    "actor_rollout_ref.rollout.custom.agent_framework.agent_runners.swe_agent.runner_kwargs.run_timeout=${SWE_AGENT_RUN_TIMEOUT}"
+    "actor_rollout_ref.rollout.custom.agent_framework.agent_runners.swe_agent.runner_kwargs.conda_env=${CONDA_ENV}"
+)
+
+# ── AKernel (remote sandbox) ─────────────────────────────────────────────
+AKERNEL_SERVER_ADDRESS="${AKERNEL_SERVER_ADDRESS:-}"
+AKERNEL_TOKEN="${AKERNEL_TOKEN:-}"
+AKERNEL_TUNNEL_SSL_VERIFY="${AKERNEL_TUNNEL_SSL_VERIFY:-0}"
+
+# ── Logging & checkpointing ──────────────────────────────────────────────
+PROJECT_NAME="${PROJECT_NAME:-swe_agent_blackbox}"
+EXPERIMENT_NAME="${EXPERIMENT_NAME:-swe_agent_$(date +%Y%m%d_%H%M)}"
+SAVE_FREQ="${SAVE_FREQ:-10}"
+TEST_FREQ="${TEST_FREQ:-10}"
+TOTAL_EPOCHS="${TOTAL_EPOCHS:-10}"
+TOTAL_TRAINING_STEPS="${TOTAL_TRAINING_STEPS:-}"
+VAL_BEFORE_TRAIN="${VAL_BEFORE_TRAIN:-true}"
+CKPTS_DIR="${CKPTS_DIR:-checkpoints/${PROJECT_NAME}/${EXPERIMENT_NAME}}"
+TRAIN_MAX_SAMPLES="${TRAIN_MAX_SAMPLES:-${MAX_SAMPLES:--1}}"
+VAL_MAX_SAMPLES="${VAL_MAX_SAMPLES:-${MAX_SAMPLES:--1}}"
+TRAIN_BATCH_SIZE="${TRAIN_BATCH_SIZE:-${PPO_MINI_BATCH_SIZE}}"
+VAL_BATCH_SIZE="${VAL_BATCH_SIZE:-${TRAIN_BATCH_SIZE}}"
+
+export AGENT_MAX_TURNS
+export SWE_AGENT_EVAL_TIMEOUT="${SWE_AGENT_EVAL_TIMEOUT:-600}"
+export SWE_AGENT_TOOL_IMAGE
+export SWE_AGENT_RUN_TIMEOUT
+export CONDA_ENV
+export GATEWAY_COUNT
+export AKERNEL_SERVER_ADDRESS
+export AKERNEL_TOKEN
+export AKERNEL_TUNNEL_SSL_VERIFY
+export PYTHONPATH="${REPO_ROOT}:${REPO_ROOT}/verl:${PYTHONPATH:-}"
+
+echo "=== SWE-Agent Blackbox Megatron Async Training ==="
+echo "Model:       ${MODEL_PATH}"
+echo "Train data:  ${TRAIN_DATA}"
+echo "Val data:    ${VAL_DATA}"
+echo "Engine:      ${ENGINE} (gen_tp=${GEN_TP}, train_tp=${TRAIN_TP})"
+echo "Runner:      ${RUNNER}"
+echo "Turns:       agent_max_turns=${AGENT_MAX_TURNS}"
+echo "Batch:       n=${N}, mini_bsz=${PPO_MINI_BATCH_SIZE}"
+echo "Sequence:    prompt=${PROMPT_LENGTH}, response=${RESPONSE_LENGTH}"
+echo "Trainer:     V1 ${TRAINER_MODE}"
+if [[ "${TRAINER_MODE}" == "separate_async" ]]; then
+    echo "Resources:   trainer=${NNODES}x${N_GPUS_PER_NODE}, rollout=${ROLLOUT_NNODES}x${ROLLOUT_NGPUS_PER_NODE}"
+else
+    echo "Resources:   colocated=${NNODES}x${N_GPUS_PER_NODE}"
+fi
+echo "Samples:     train_max=${TRAIN_MAX_SAMPLES}, val_max=${VAL_MAX_SAMPLES}"
+echo "==================================================="
+
+# ── Compute derived parameters ───────────────────────────────────────────
+ACTOR_PPO_MAX_TOKEN_LEN=$(( (PROMPT_LENGTH + RESPONSE_LENGTH) / TRAIN_CP ))
+INFER_PPO_MAX_TOKEN_LEN=$(( (PROMPT_LENGTH + RESPONSE_LENGTH) / TRAIN_CP ))
+
+RUNTIME_ENV_ARGS=()
+if [ -n "${RUNTIME_ENV}" ]; then
+    RUNTIME_ENV_ARGS=(--runtime-env "${RUNTIME_ENV}")
+else
+    RUNTIME_ENV_JSON="$(
+        python3 - <<'PY'
+import json
+import os
+
+env_vars = {
+    key: value
+    for key in (
+        "PYTHONPATH",
+        "AKERNEL_SERVER_ADDRESS",
+        "AKERNEL_TOKEN",
+        "AKERNEL_TUNNEL_SSL_VERIFY",
+        "AGENT_MAX_TURNS",
+        "SWE_AGENT_EVAL_TIMEOUT",
+        "SWE_AGENT_TOOL_IMAGE",
+        "SWE_AGENT_RUN_TIMEOUT",
+        "CONDA_ENV",
+        "GATEWAY_COUNT",
+    )
+    if (value := os.environ.get(key)) is not None
+}
+env_vars.setdefault("TRANSFER_QUEUE_ENABLE", "")
+env_vars.setdefault("NCCL_P2P_DISABLE", "1")
+env_vars.setdefault("NCCL_SHM_DISABLE", "1")
+print(json.dumps({"env_vars": env_vars}))
+PY
+    )"
+    RUNTIME_ENV_ARGS=(--runtime-env-json "${RUNTIME_ENV_JSON}")
+fi
+
+# ── Ensure Ray is running ────────────────────────────────────────────────
+if [[ "${TRAINER_MODE}" == "separate_async" ]]; then
+    TOTAL_GPUS=$(( NNODES * N_GPUS_PER_NODE + ROLLOUT_NNODES * ROLLOUT_NGPUS_PER_NODE ))
+else
+    TOTAL_GPUS=$(( NNODES * N_GPUS_PER_NODE ))
+fi
+if ! timeout "${RAY_STATUS_TIMEOUT}" ray status &>/dev/null; then
+    echo "Starting Ray cluster (${TOTAL_GPUS} GPUs)..."
+    ray start --head --num-gpus="${TOTAL_GPUS}" --disable-usage-stats
+else
+    echo "Ray cluster already running."
+fi
+
+# ── Launch ────────────────────────────────────────────────────────────────
+WORKING_DIR="${WORKING_DIR:-$(pwd)}"
+
+MAIN_CMD=(
+    python3 -m verl.trainer.main_ppo
+    --config-name="${CONFIG_NAME}" \
+    --config-path="${REPO_ROOT}/examples/blackbox_recipes/mini_swe_agent/config" \
+    hydra.searchpath=[pkg://verl.trainer.config] \
+    +ray_kwargs.ray_init.address="${RAY_INIT_ADDRESS}" \
+    trainer.use_v1=True \
+    trainer.v1.trainer_mode="${TRAINER_MODE}" \
+    trainer.v1.colocate_async.num_warmup_batches=${NUM_WARMUP_BATCHES} \
+    trainer.v1.separate_async.num_warmup_batches=${SEPARATE_NUM_WARMUP_BATCHES} \
+    trainer.v1.separate_async.parameter_sync_step=${PARAMETER_SYNC_STEP} \
+    transfer_queue.enable=True \
+    actor_rollout_ref.model.path="${MODEL_PATH}" \
+    data.train_files="['${TRAIN_DATA}']" \
+    data.val_files="['${VAL_DATA}']" \
+    data.train_max_samples=${TRAIN_MAX_SAMPLES} \
+    data.val_max_samples=${VAL_MAX_SAMPLES} \
+    data.train_batch_size=${TRAIN_BATCH_SIZE} \
+    data.val_batch_size=${VAL_BATCH_SIZE} \
+    data.max_prompt_length=${PROMPT_LENGTH} \
+    data.max_response_length=${RESPONSE_LENGTH} \
+    actor_rollout_ref.rollout.n=${N} \
+    actor_rollout_ref.rollout.name=${ENGINE} \
+    actor_rollout_ref.rollout.prompt_length=${PROMPT_LENGTH} \
+    actor_rollout_ref.rollout.response_length=${RESPONSE_LENGTH} \
+    actor_rollout_ref.rollout.max_model_len=${MAX_MODEL_LEN} \
+    actor_rollout_ref.rollout.max_num_batched_tokens=${MAX_MODEL_LEN} \
+    actor_rollout_ref.rollout.temperature=${TEMPERATURE} \
+    actor_rollout_ref.rollout.top_p=${TOP_P} \
+    actor_rollout_ref.rollout.top_k=${TOP_K} \
+    actor_rollout_ref.rollout.checkpoint_engine.update_weights_bucket_megabytes=${UPDATE_WEIGHTS_BUCKET_MB} \
+    actor_rollout_ref.rollout.nnodes=${ROLLOUT_NNODES} \
+    actor_rollout_ref.rollout.n_gpus_per_node=${ROLLOUT_NGPUS_PER_NODE} \
+    actor_rollout_ref.rollout.tensor_model_parallel_size=${GEN_TP} \
+    actor_rollout_ref.rollout.gpu_memory_utilization=${ROLLOUT_GPU_MEM_UTIL} \
+    actor_rollout_ref.rollout.agent.num_workers=${NUM_AGENT_WORKERS} \
+    "${RUNNER_ARGS[@]}" \
+    actor_rollout_ref.actor.clip_ratio_low=${CLIP_RATIO_LOW} \
+    actor_rollout_ref.actor.clip_ratio_high=${CLIP_RATIO_HIGH} \
+    actor_rollout_ref.actor.ppo_mini_batch_size=${PPO_MINI_BATCH_SIZE} \
+    actor_rollout_ref.actor.ppo_max_token_len_per_gpu=${ACTOR_PPO_MAX_TOKEN_LEN} \
+    actor_rollout_ref.actor.optim.lr=${ACTOR_LR} \
+    +actor_rollout_ref.actor.optim.override_optimizer_config.optimizer_offload_fraction=${OPTIMIZER_OFFLOAD_FRACTION} \
+    +actor_rollout_ref.actor.optim.override_optimizer_config.overlap_cpu_optimizer_d2h_h2d=True \
+    +actor_rollout_ref.actor.optim.override_optimizer_config.use_precision_aware_optimizer=True \
+    +actor_rollout_ref.actor.optim.override_optimizer_config.optimizer_cpu_offload=True \
+    actor_rollout_ref.actor.megatron.param_offload=${OFFLOAD} \
+    actor_rollout_ref.actor.megatron.grad_offload=${OFFLOAD} \
+    actor_rollout_ref.actor.megatron.optimizer_offload=${OFFLOAD} \
+    actor_rollout_ref.actor.megatron.tensor_model_parallel_size=${TRAIN_TP} \
+    actor_rollout_ref.actor.megatron.pipeline_model_parallel_size=${TRAIN_PP} \
+    actor_rollout_ref.actor.megatron.context_parallel_size=${TRAIN_CP} \
+    actor_rollout_ref.actor.megatron.use_mbridge=${USE_MBRIDGE} \
+    actor_rollout_ref.ref.megatron.param_offload=${OFFLOAD} \
+    actor_rollout_ref.ref.megatron.tensor_model_parallel_size=${TRAIN_TP} \
+    actor_rollout_ref.ref.megatron.pipeline_model_parallel_size=${TRAIN_PP} \
+    actor_rollout_ref.ref.megatron.context_parallel_size=${TRAIN_CP} \
+    actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=1 \
+    actor_rollout_ref.rollout.log_prob_max_token_len_per_gpu=${INFER_PPO_MAX_TOKEN_LEN} \
+    actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=1 \
+    actor_rollout_ref.ref.log_prob_max_token_len_per_gpu=${INFER_PPO_MAX_TOKEN_LEN} \
+    trainer.project_name="${PROJECT_NAME}" \
+    trainer.experiment_name="${EXPERIMENT_NAME}" \
+    trainer.total_epochs=${TOTAL_EPOCHS} \
+    trainer.val_before_train=${VAL_BEFORE_TRAIN} \
+    trainer.save_freq=${SAVE_FREQ} \
+    trainer.test_freq=${TEST_FREQ} \
+    trainer.default_local_dir="${CKPTS_DIR}" \
+    trainer.nnodes=${NNODES} \
+    trainer.n_gpus_per_node=${N_GPUS_PER_NODE} \
+    "$@"
+)
+
+if [[ -n "${TOTAL_TRAINING_STEPS}" ]]; then
+    MAIN_CMD+=(trainer.total_training_steps=${TOTAL_TRAINING_STEPS})
+fi
+
+if [[ "${RAY_SUBMIT_MODE}" == "job" ]]; then
+    ray job submit --no-wait --working-dir="${WORKING_DIR}" "${RUNTIME_ENV_ARGS[@]}" -- "${MAIN_CMD[@]}"
+elif [[ "${RAY_SUBMIT_MODE}" == "local" ]]; then
+    "${MAIN_CMD[@]}"
+else
+    echo "Unknown RAY_SUBMIT_MODE=${RAY_SUBMIT_MODE}; expected job or local" >&2
+    exit 1
+fi
diff --git a/examples/blackbox_recipes/mini_swe_agent/subprocess_runner.py b/examples/blackbox_recipes/mini_swe_agent/subprocess_runner.py
deleted file mode 100644
index b03dc7d7..00000000
--- a/examples/blackbox_recipes/mini_swe_agent/subprocess_runner.py
+++ /dev/null
@@ -1,61 +0,0 @@
-"""Ray-based subprocess runner for agent_runner execution.
-
-Launches agent_runner in a separate Ray worker process to prevent blocking
-operations (sleep, sync I/O, etc.) from stalling the framework's event loop.
-"""
-
-from __future__ import annotations
-
-import asyncio
-import logging
-from typing import Any
-
-import ray
-
-from uni_agent.trainer.framework.types import SessionHandle
-
-logger = logging.getLogger(__name__)
-
-
-class _StubSessionRuntime:
-    """Captures reward_info from agent_runner's complete_session call."""
-
-    def __init__(self):
-        self.reward_info: dict[str, Any] | None = None
-
-    async def complete_session(self, session_id: str, reward_info: dict[str, Any] | None = None):
-        self.reward_info = reward_info
-
-
-@ray.remote(num_cpus=0)
-def remote_agent_run(
-    agent_runner_fqn: str,
-    raw_prompt,
-    session_id: str,
-    base_url: str,
-    sample_index: int,
-    runner_kwargs: dict,
-) -> dict[str, Any] | None:
-    """Run agent_runner in a dedicated Ray worker process."""
-    from verl.utils.import_utils import load_class_from_fqn
-
-    agent_runner = load_class_from_fqn(agent_runner_fqn)
-    stub_runtime = _StubSessionRuntime()
-    handle = SessionHandle(session_id=session_id, base_url=base_url)
-
-    async def _run():
-        try:
-            await agent_runner(
-                raw_prompt=raw_prompt,
-                session=handle,
-                sample_index=sample_index,
-                session_runtime=stub_runtime,
-                **runner_kwargs,
-            )
-            return stub_runtime.reward_info
-        except Exception as e:
-            logger.error("remote_agent_run failed: session_id=%s, sample=%d, error=%s",
-                         session_id, sample_index, e, exc_info=True)
-            raise
-
-    return asyncio.run(_run())
diff --git a/examples/blackbox_recipes/sandbox/sandbox.py b/examples/blackbox_recipes/sandbox_client.py
similarity index 66%
rename from examples/blackbox_recipes/sandbox/sandbox.py
rename to examples/blackbox_recipes/sandbox_client.py
index e6e46a8d..631c8722 100644
--- a/examples/blackbox_recipes/sandbox/sandbox.py
+++ b/examples/blackbox_recipes/sandbox_client.py
@@ -1,5 +1,7 @@
-"""OpenYuanRong (AKernel) remote sandbox command execution.
+"""AKernel remote sandbox command execution.
 
+AKernel is an agent sandbox infra collaboratively developed by the
+OpenYuanrong team and the Ant AKernel team.
 Uses ``akernel_sdk.Sandbox`` with sidecar ``Mount`` to inject the
 mini-swe-agent tool image.  Supports upstream tunnel so the agent
 inside the sandbox can reach the gateway via ``http://127.0.0.1:<proxy_port>``.
@@ -10,6 +12,7 @@
 import asyncio
 import logging
 import os
+import uuid
 from dataclasses import dataclass
 from typing import Any
 from urllib.parse import urlparse
@@ -23,24 +26,31 @@ class CommandResult:
     stderr: str
     exit_code: int
 
+
 logger = logging.getLogger(__name__)
 
 DEFAULT_PROXY_PORT = 38197
 
 
 def _configure_akernel_env() -> None:
-    """Map OPENYUANRONG_* env vars to AKERNEL_* before importing akernel_sdk."""
-    server = os.getenv("OPENYUANRONG_SERVER_ADDRESS")
-    token = os.getenv("OPENYUANRONG_TOKEN")
-    tunnel_ssl_verify = os.getenv("OPENYUANRONG_TUNNEL_SSL_VERIFY", "0")
+    """Validate AKernel credentials and map the tunnel SSL flag for akernel_sdk.
+
+    ``akernel_sdk`` reads ``AKERNEL_SERVER_ADDRESS`` / ``AKERNEL_TOKEN`` directly,
+    so only the tunnel SSL flag needs to be translated to ``TUNNEL_SSL_VERIFY``.
+    """
+    server = os.getenv("AKERNEL_SERVER_ADDRESS")
+    token = os.getenv("AKERNEL_TOKEN")
     if not server or not token:
-        raise ValueError(
-            "OPENYUANRONG_SERVER_ADDRESS and OPENYUANRONG_TOKEN "
-            "environment variables must be set for YR sandbox"
-        )
-    os.environ["AKERNEL_SERVER_ADDRESS"] = server
-    os.environ["AKERNEL_TOKEN"] = token
-    os.environ["TUNNEL_SSL_VERIFY"] = tunnel_ssl_verify
+        raise ValueError("AKERNEL_SERVER_ADDRESS and AKERNEL_TOKEN environment variables must be set for sandbox")
+    os.environ["TUNNEL_SSL_VERIFY"] = os.getenv("AKERNEL_TUNNEL_SSL_VERIFY", "0")
+
+
+def _resolve_sandbox_name() -> str | None:
+    """Return ``{prefix}{random}`` when ``SANDBOX_NAME_PREFIX`` env is set."""
+    prefix = os.getenv("SANDBOX_NAME_PREFIX")
+    if not prefix:
+        return None
+    return f"{prefix}{uuid.uuid4().hex[:8]}"
 
 
 def extract_upstream(gateway_url: str) -> str:
@@ -71,8 +81,8 @@ def rewrite_gateway_url(
     return f"http://127.0.0.1:{proxy_port}{path}"
 
 
-class YRSandbox:
-    """Command execution via OpenYuanRong (AKernel) remote sandbox."""
+class SandboxClient:
+    """Command execution via remote sandbox."""
 
     def __init__(self, sandbox: Any) -> None:
         self._sandbox = sandbox
@@ -81,7 +91,6 @@ def __init__(self, sandbox: Any) -> None:
     def sandbox_id(self) -> str:
         return getattr(self._sandbox, "sandbox_id", "unknown")
 
-
     @classmethod
     async def create(
         cls,
@@ -97,10 +106,10 @@ async def create(
         mem_limit: int = 8192,
         idle_timeout: int = 7200,
         sidecar_target: str = "/opt/mini-swe-agent",
-        max_retries: int = 5,
+        max_retries: int = 10,
         **sandbox_kwargs: Any,
-    ) -> "YRSandbox":
-        """Create an OpenYuanRong sandbox with sidecar tool mounted.
+    ) -> SandboxClient:
+        """Create an sandbox client with sidecar tool mounted.
 
         The sidecar image is mounted at ``sidecar_target`` inside the
         sandbox via ``akernel_sdk.Mount``.
@@ -127,25 +136,35 @@ async def create(
             sb_kwargs["proxy_port"] = proxy_port
         if env:
             sb_kwargs["env"] = env
+        name = _resolve_sandbox_name()
+        if name is not None:
+            sb_kwargs["name"] = name
         sb_kwargs.update(sandbox_kwargs)
 
         logger.info(
-            "Creating YR sandbox (image=%s, cpu=%d, memory=%d, sidecar=%s:%s, upstream=%s)",
-            image, cpu, memory, sidecar_image, sidecar_target, upstream or "none",
+            "Creating sandbox (image=%s, cpu=%d, memory=%d, sidecar=%s:%s, upstream=%s, name=%s)",
+            image,
+            cpu,
+            memory,
+            sidecar_image,
+            sidecar_target,
+            upstream or "none",
+            name or "auto",
         )
         last_error: Exception | None = None
         for retry in range(max_retries):
             sandbox = None
             try:
                 sandbox = await asyncio.to_thread(lambda: Sandbox(**sb_kwargs))
-                logger.info("YR sandbox created: %s", getattr(sandbox, "sandbox_id", "?"))
+                logger.info("sandbox created: %s", getattr(sandbox, "sandbox_id", "?"))
                 return cls(sandbox=sandbox)
             except Exception as exc:
                 last_error = exc
                 sandbox_id = getattr(sandbox, "sandbox_id", None)
                 logger.critical(
-                    "Failed to create YR sandbox (sandbox_id=%s): %s",
-                    sandbox_id or "n/a", exc,
+                    "Failed to create sandbox (sandbox_id=%s): %s",
+                    sandbox_id or "n/a",
+                    exc,
                 )
                 if sandbox is not None:
                     try:
@@ -153,17 +172,19 @@ async def create(
                     except Exception:
                         pass
                 if retry < max_retries - 1:
-                    sleep_time = min(30, 2 ** retry)
-                    logger.info("Retrying YR sandbox creation in %d seconds...", sleep_time)
+                    sleep_time = min(30, 2**retry)
+                    logger.info("Retrying sandbox creation in %d seconds...", sleep_time)
                     await asyncio.sleep(sleep_time)
 
-        raise RuntimeError(f"Failed to create YR sandbox after {max_retries} retries") from last_error
+        raise RuntimeError(f"Failed to create sandbox after {max_retries} retries") from last_error
 
     async def run(self, cmd: str, *, timeout: int = 600) -> CommandResult:
-        """Execute *cmd* inside the OpenYuanRong sandbox via ``sandbox.commands.run``."""
+        """Execute *cmd* inside the sandbox via ``sandbox.commands.run``."""
         try:
             result = await asyncio.to_thread(
-                self._sandbox.commands.run, cmd, timeout=timeout,
+                self._sandbox.commands.run,
+                cmd,
+                timeout=timeout,
             )
             return CommandResult(
                 stdout=getattr(result, "stdout", ""),
@@ -174,15 +195,15 @@ async def run(self, cmd: str, *, timeout: int = 600) -> CommandResult:
             return CommandResult(stdout="", stderr=str(e), exit_code=-1)
 
     async def cleanup(self) -> None:
-        """Kill the OpenYuanRong sandbox if still running."""
+        """Kill the sandbox if still running."""
         if self._sandbox is not None:
             sandbox_id = getattr(self._sandbox, "sandbox_id", "?")
             try:
                 if self._sandbox.is_running():
                     await asyncio.to_thread(self._sandbox.kill)
-                    logger.info("YR sandbox %s killed", sandbox_id)
+                    logger.info("sandbox %s killed", sandbox_id)
                 else:
-                    logger.info("YR sandbox %s already stopped", sandbox_id)
+                    logger.info("sandbox %s already stopped", sandbox_id)
             except Exception as e:
-                logger.warning("Failed to kill YR sandbox %s: %s", sandbox_id, e)
+                logger.warning("Failed to kill sandbox %s: %s", sandbox_id, e)
             self._sandbox = None
diff --git a/examples/blackbox_recipes/scripts/build_tool.sh b/examples/blackbox_recipes/scripts/build_tool.sh
deleted file mode 100755
index e5158629..00000000
--- a/examples/blackbox_recipes/scripts/build_tool.sh
+++ /dev/null
@@ -1,75 +0,0 @@
-#!/usr/bin/env bash
-# Build a SWE blackbox sidecar tool image.
-#
-# Usage:
-#   bash examples/swe_agent_blackbox/build_tool.sh
-#   bash examples/swe_agent_blackbox/build_tool.sh --tool claude_code
-#   bash examples/swe_agent_blackbox/build_tool.sh --pip-index https://pypi.tuna.tsinghua.edu.cn/simple/
-#   bash examples/swe_agent_blackbox/build_tool.sh --npm-registry https://registry.npmmirror.com
-#   bash examples/swe_agent_blackbox/build_tool.sh --tool-version latest
-#   bash examples/swe_agent_blackbox/build_tool.sh --registry reg.antgroup-inc.cn/myrepo
-#
-set -euo pipefail
-
-SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
-TOOL_KIND="${TOOL_KIND:-mini_swe}"
-IMAGE_TAG="${TOOL_TAG:-latest}"
-TOOL_VERSION="${TOOL_VERSION:-latest}"
-
-# Parse args
-REGISTRY=""
-PIP_INDEX_URL="${PIP_INDEX_URL:-}"
-NPM_REGISTRY="${NPM_REGISTRY:-}"
-while [[ $# -gt 0 ]]; do
-    case "$1" in
-        --tool) TOOL_KIND="$2"; shift 2 ;;
-        --registry) REGISTRY="$2"; shift 2 ;;
-        --pip-index) PIP_INDEX_URL="$2"; shift 2 ;;
-        --npm-registry) NPM_REGISTRY="$2"; shift 2 ;;
-        --tool-version) TOOL_VERSION="$2"; shift 2 ;;
-        *) echo "Unknown arg: $1"; exit 1 ;;
-    esac
-done
-
-BUILD_ARGS=()
-DOCKERFILE="${SCRIPT_DIR}/Dockerfile.mini-swe-agent-tool"
-if [[ "${TOOL_KIND}" == "claude" ]]; then
-    TOOL_KIND="claude_code"
-fi
-if [[ "${TOOL_KIND}" == "claude_code" ]]; then
-    IMAGE_NAME="${TOOL_IMAGE:-claude-code-tool}"
-    DOCKERFILE="${SCRIPT_DIR}/Dockerfile.claude-code-tool"
-    BUILD_ARGS+=(--build-arg "TOOL_VERSION=${TOOL_VERSION}")
-    if [[ -n "${NPM_REGISTRY}" ]]; then
-        BUILD_ARGS+=(--build-arg "NPM_REGISTRY=${NPM_REGISTRY}")
-    fi
-elif [[ "${TOOL_KIND}" == "mini_swe" ]]; then
-    IMAGE_NAME="${TOOL_IMAGE:-mini-swe-agent-tool}"
-    if [[ -n "${PIP_INDEX_URL}" ]]; then
-        BUILD_ARGS+=(--build-arg PIP_INDEX_URL="${PIP_INDEX_URL}")
-    fi
-else
-    echo "Unknown tool: ${TOOL_KIND}; expected mini_swe or claude_code"
-    exit 1
-fi
-
-echo "==> Building ${TOOL_KIND} tool image: ${IMAGE_NAME}:${IMAGE_TAG}"
-docker build \
-    -f "${DOCKERFILE}" \
-    -t "${IMAGE_NAME}:${IMAGE_TAG}" \
-    "${BUILD_ARGS[@]}" \
-    "${SCRIPT_DIR}/"
-
-if [[ -n "${REGISTRY}" ]]; then
-    FULL_TAG="${REGISTRY}/${IMAGE_NAME}:${IMAGE_TAG}"
-    echo "==> Tagging and pushing: ${FULL_TAG}"
-    docker tag "${IMAGE_NAME}:${IMAGE_TAG}" "${FULL_TAG}"
-    docker push "${FULL_TAG}"
-    echo "    Pushed."
-fi
-
-echo ""
-echo "Tool image ready: ${IMAGE_NAME}:${IMAGE_TAG}"
-if [[ -n "${REGISTRY}" ]]; then
-    echo "  Remote sandbox: ${FULL_TAG}"
-fi
diff --git a/examples/blackbox_recipes/scripts/run_infer.sh b/examples/blackbox_recipes/scripts/run_infer.sh
deleted file mode 100755
index d5703aa6..00000000
--- a/examples/blackbox_recipes/scripts/run_infer.sh
+++ /dev/null
@@ -1,66 +0,0 @@
-#!/usr/bin/env bash
-# Parallel inference for the blackbox SWE-agent recipe.
-#
-# Usage:
-#   bash examples/swe_agent_blackbox/scripts/run_infer.sh
-
-set -euo pipefail
-
-# ── Model & data ─────────────────────────────────────────────────────────
-MODEL_PATH="${MODEL_PATH:-$HOME/models/Qwen3.5-9B}"
-DATA_PATH="${DATA_PATH:-$HOME/data/swe_agent/swe_bench_verified.parquet}"
-
-# ── Inference parameters ─────────────────────────────────────────────────
-MAX_SAMPLES="${MAX_SAMPLES:--1}"
-PROMPT_LENGTH="${PROMPT_LENGTH:-4096}"
-RESPONSE_LENGTH="${RESPONSE_LENGTH:-65536}"
-TEMPERATURE="${TEMPERATURE:-1.0}"
-TOP_P="${TOP_P:-1.0}"
-N="${N:-8}"
-ENGINE="${ENGINE:-vllm}"
-TP="${TP:-4}"
-N_GPUS_PER_NODE="${N_GPUS_PER_NODE:-8}"
-GATEWAY_COUNT="${GATEWAY_COUNT:-1}"
-MAX_CONCURRENT_SESSIONS="${MAX_CONCURRENT_SESSIONS:-2}"
-
-# ── Agent parameters ─────────────────────────────────────────────────────
-RUNNER="${RUNNER:-uniagent}"
-AGENT_CONFIG_PATH="${AGENT_CONFIG_PATH:-examples/swe_agent_blackbox/config/agent_config.yaml}"
-export SWE_AGENT_MAX_TURNS="${SWE_AGENT_MAX_TURNS:-100}"
-export SWE_AGENT_EVAL_TIMEOUT="${SWE_AGENT_EVAL_TIMEOUT:-600}"
-SWE_AGENT_TOOL_IMAGE="${SWE_AGENT_TOOL_IMAGE:-}"
-SWE_AGENT_RUN_TIMEOUT="${SWE_AGENT_RUN_TIMEOUT:-7200}"
-
-# ── Logging ──────────────────────────────────────────────────────────────
-export VERL_LOGGING_LEVEL="${VERL_LOGGING_LEVEL:-INFO}"
-export ROLLOUT_GPU_MEM_UTIL="${ROLLOUT_GPU_MEM_UTIL:-0.5}"
-
-echo "=== SWE-Agent Blackbox Inference ==="
-echo "Model: ${MODEL_PATH}"
-echo "Data:  ${DATA_PATH}"
-echo "Max samples: ${MAX_SAMPLES}"
-echo "Engine: ${ENGINE} (TP=${TP})"
-echo "Runner: ${RUNNER}"
-echo "Gateway count: ${GATEWAY_COUNT}"
-echo "Max concurrent sessions: ${MAX_CONCURRENT_SESSIONS}"
-echo "====================================="
-
-python examples/swe_agent_blackbox/parallel_infer.py \
-    --model-path "${MODEL_PATH}" \
-    --data-path "${DATA_PATH}" \
-    --max-samples "${MAX_SAMPLES}" \
-    --prompt-length "${PROMPT_LENGTH}" \
-    --response-length "${RESPONSE_LENGTH}" \
-    --temperature "${TEMPERATURE}" \
-    --top-p "${TOP_P}" \
-    --n "${N}" \
-    --engine "${ENGINE}" \
-    --tensor-parallel-size "${TP}" \
-    --max-turns "${SWE_AGENT_MAX_TURNS}" \
-    --runner "${RUNNER}" \
-    --agent-config-path "${AGENT_CONFIG_PATH}" \
-    --n-gpus-per-node "${N_GPUS_PER_NODE}" \
-    --gateway-count "${GATEWAY_COUNT}" \
-    --max-concurrent-sessions "${MAX_CONCURRENT_SESSIONS}" \
-    --tool-image "${SWE_AGENT_TOOL_IMAGE}" \
-    --run-timeout "${SWE_AGENT_RUN_TIMEOUT}"
diff --git a/examples/blackbox_recipes/scripts/run_train.sh b/examples/blackbox_recipes/scripts/run_train.sh
deleted file mode 100755
index cf08005d..00000000
--- a/examples/blackbox_recipes/scripts/run_train.sh
+++ /dev/null
@@ -1,122 +0,0 @@
-#!/usr/bin/env bash
-# Training launch script for the blackbox SWE-agent recipe.
-#
-# Uses GRPO + AgentFrameworkRolloutAdapter with reward computed in-process
-# by the agent runner, then passed through the reward worker's compute_score.
-#
-# Usage:
-#   bash examples/swe_agent_blackbox/scripts/run_train.sh
-#
-# All configurable via environment variables (see defaults below).
-
-set -euo pipefail
-
-# ── Model & data ─────────────────────────────────────────────────────────
-MODEL_PATH="${MODEL_PATH:-$HOME/models/Qwen3-Coder-30B-A3B-Instruct}"
-TRAIN_DATA="${TRAIN_DATA:-$HOME/data/swe_agent/swe_bench_verified.parquet}"
-VAL_DATA="${VAL_DATA:-$HOME/data/swe_agent/swe_bench_verified.parquet}"
-
-# ── Hardware ─────────────────────────────────────────────────────────────
-NNODES="${NNODES:-1}"
-NGPUS_PER_NODE="${NGPUS_PER_NODE:-8}"
-
-# ── Training parameters ─────────────────────────────────────────────────
-TRAIN_BATCH_SIZE="${TRAIN_BATCH_SIZE:-128}"
-PROMPT_LENGTH="${PROMPT_LENGTH:-4096}"
-RESPONSE_LENGTH="${RESPONSE_LENGTH:-131072}"
-ACTOR_LR="${ACTOR_LR:-1e-6}"
-TOTAL_EPOCHS="${TOTAL_EPOCHS:-10}"
-SAVE_FREQ="${SAVE_FREQ:-10}"
-TEST_FREQ="${TEST_FREQ:-10}"
-
-# ── Rollout parameters ──────────────────────────────────────────────────
-ENGINE="${ENGINE:-vllm}"
-TP="${TP:-4}"
-ROLLOUT_GPU_MEM_UTIL="${ROLLOUT_GPU_MEM_UTIL:-0.7}"
-N="${N:-8}"
-TEMPERATURE="${TEMPERATURE:-1.0}"
-
-# ── Agent parameters ─────────────────────────────────────────────────────
-RUNNER="${RUNNER:-mini_swe}"
-MAX_TURNS="${MAX_TURNS:-100}"
-AGENT_CONFIG_PATH="${AGENT_CONFIG_PATH:-examples/swe_agent_blackbox/config/agent_config.yaml}"
-COMPLETION_TIMEOUT="${COMPLETION_TIMEOUT:-600}"
-if [[ "${RUNNER}" == "claude_code" ]]; then
-    AGENT_RUNNER_FQN="examples.swe_agent_blackbox.claude_code_runner.claude_code_runner"
-    SWE_AGENT_TOOL_IMAGE="${SWE_AGENT_TOOL_IMAGE:-claude-code-tool:latest}"
-elif [[ "${RUNNER}" == "mini_swe" ]]; then
-    AGENT_RUNNER_FQN="examples.swe_agent_blackbox.mini_swe_agent_runner.mini_swe_agent_runner"
-    SWE_AGENT_TOOL_IMAGE="${SWE_AGENT_TOOL_IMAGE:-swr.cn-east-3.myhuaweicloud.com/openyuanrong/mini-swe-agent-tool:latest}"
-elif [[ "${RUNNER}" == "uniagent" ]]; then
-    AGENT_RUNNER_FQN="examples.swe_agent_blackbox.agent_runner.swe_agent_runner"
-    SWE_AGENT_TOOL_IMAGE=""
-else
-    echo "Unknown RUNNER=${RUNNER}; expected mini_swe, claude_code, or uniagent" >&2
-    exit 1
-fi
-SWE_AGENT_RUN_TIMEOUT="${SWE_AGENT_RUN_TIMEOUT:-7200}"
-RUNNER_ARGS=(
-    "actor_rollout_ref.rollout.custom.agent_framework.agent_runner_fqn=${AGENT_RUNNER_FQN}"
-)
-if [[ "${RUNNER}" != "uniagent" ]]; then
-    RUNNER_ARGS+=(
-        "+actor_rollout_ref.rollout.custom.agent_framework.agent_runner_kwargs.tool_image=${SWE_AGENT_TOOL_IMAGE}"
-        "+actor_rollout_ref.rollout.custom.agent_framework.agent_runner_kwargs.run_timeout=${SWE_AGENT_RUN_TIMEOUT}"
-    )
-fi
-
-# ── Logging ──────────────────────────────────────────────────────────────
-PROJECT_NAME="${PROJECT_NAME:-swe_agent_blackbox}"
-EXPERIMENT_NAME="${EXPERIMENT_NAME:-swe_agent_$(date +%Y%m%d_%H%M)}"
-VERL_LOGGING_LEVEL="${VERL_LOGGING_LEVEL:-INFO}"
-
-export SWE_AGENT_MAX_TURNS="${MAX_TURNS}"
-export SWE_AGENT_EVAL_TIMEOUT="${SWE_AGENT_EVAL_TIMEOUT:-600}"
-export VERL_LOGGING_LEVEL
-
-# ── Environment for NCCL ─────────────────────────────────────────────────
-export NCCL_P2P_DISABLE="${NCCL_P2P_DISABLE:-1}"
-export NCCL_SHM_DISABLE="${NCCL_SHM_DISABLE:-1}"
-
-echo "=== SWE-Agent Blackbox Training ==="
-echo "Model:       ${MODEL_PATH}"
-echo "Train data:  ${TRAIN_DATA}"
-echo "Val data:    ${VAL_DATA}"
-echo "Engine:      ${ENGINE} (TP=${TP})"
-echo "Runner:      ${RUNNER}"
-echo "Batch size:  ${TRAIN_BATCH_SIZE}, N=${N}"
-echo "Epochs:      ${TOTAL_EPOCHS}"
-echo "====================================="
-
-python3 -m verl.trainer.main_ppo_sync \
-    --config-name=swe_agent_blackbox \
-    --config-path="$(pwd)/examples/swe_agent_blackbox/config" \
-    actor_rollout_ref.model.path="${MODEL_PATH}" \
-    data.train_files="['${TRAIN_DATA}']" \
-    data.val_files="['${VAL_DATA}']" \
-    data.train_batch_size=${TRAIN_BATCH_SIZE} \
-    data.max_prompt_length=${PROMPT_LENGTH} \
-    data.max_response_length=${RESPONSE_LENGTH} \
-    actor_rollout_ref.rollout.name=${ENGINE} \
-    actor_rollout_ref.rollout.tensor_model_parallel_size=${TP} \
-    actor_rollout_ref.rollout.gpu_memory_utilization=${ROLLOUT_GPU_MEM_UTIL} \
-    actor_rollout_ref.rollout.n=${N} \
-    actor_rollout_ref.rollout.temperature=${TEMPERATURE} \
-    actor_rollout_ref.rollout.prompt_length=${PROMPT_LENGTH} \
-    actor_rollout_ref.rollout.response_length=${RESPONSE_LENGTH} \
-    actor_rollout_ref.rollout.max_model_len=$((PROMPT_LENGTH + RESPONSE_LENGTH + 1024)) \
-    actor_rollout_ref.rollout.multi_turn.max_assistant_turns=${MAX_TURNS} \
-    actor_rollout_ref.actor.optim.lr=${ACTOR_LR} \
-    actor_rollout_ref.rollout.nnodes=${NNODES} \
-    actor_rollout_ref.rollout.n_gpus_per_node=${NGPUS_PER_NODE} \
-    trainer.nnodes=${NNODES} \
-    trainer.n_gpus_per_node=${NGPUS_PER_NODE} \
-    trainer.total_epochs=${TOTAL_EPOCHS} \
-    trainer.save_freq=${SAVE_FREQ} \
-    trainer.test_freq=${TEST_FREQ} \
-    trainer.project_name=${PROJECT_NAME} \
-    trainer.experiment_name=${EXPERIMENT_NAME} \
-    actor_rollout_ref.rollout.custom.agent_framework.agent_runner_kwargs.agent_config_path="${AGENT_CONFIG_PATH}" \
-    actor_rollout_ref.rollout.custom.agent_framework.completion_timeout_seconds=${COMPLETION_TIMEOUT} \
-    "${RUNNER_ARGS[@]}" \
-    "$@"
diff --git a/examples/blackbox_recipes/scripts/run_train_megatron_async.sh b/examples/blackbox_recipes/scripts/run_train_megatron_async.sh
deleted file mode 100755
index db3a8264..00000000
--- a/examples/blackbox_recipes/scripts/run_train_megatron_async.sh
+++ /dev/null
@@ -1,199 +0,0 @@
-#!/usr/bin/env bash
-# Megatron + TQ fully-async training for the blackbox SWE-agent recipe.
-#
-# Uses FullyAsyncAgentFrameworkRolloutAdapter + SWEAgentFramework with Megatron backend.
-# Data flows through TransferQueue (zero-copy) with ReplayBuffer flow control.
-#
-# Usage:
-#   bash examples/swe_agent_blackbox/scripts/run_train_megatron_async.sh
-#
-# All configurable via environment variables (see defaults below).
-
-set -euo pipefail
-
-# ── Model & data ─────────────────────────────────────────────────────────
-MODEL_PATH="${MODEL_PATH:-${HOME}/models/Qwen3.5-9B}"
-TRAIN_DATA="${TRAIN_DATA:-${HOME}/data/swe_agent/swe_rebench_filtered.parquet}"
-VAL_DATA="${VAL_DATA:-${HOME}/data/swe_agent/swe_bench_verified.parquet}"
-RUNTIME_ENV="${RUNTIME_ENV:-}"
-
-# ── Hardware ─────────────────────────────────────────────────────────────
-NNODES_TRAIN="${NNODES_TRAIN:-1}"
-NNODES_ROLLOUT="${NNODES_ROLLOUT:-1}"
-NGPUS_PER_NODE="${NGPUS_PER_NODE:-8}"
-
-# ── Algorithm ────────────────────────────────────────────────────────────
-CLIP_RATIO_LOW="${CLIP_RATIO_LOW:-0.2}"
-CLIP_RATIO_HIGH="${CLIP_RATIO_HIGH:-0.28}"
-ACTOR_LR="${ACTOR_LR:-1e-6}"
-
-# ── Sequence lengths ─────────────────────────────────────────────────────
-PROMPT_LENGTH="${PROMPT_LENGTH:-4096}"
-RESPONSE_LENGTH="${RESPONSE_LENGTH:-131072}"
-MAX_MODEL_LEN=$((PROMPT_LENGTH + RESPONSE_LENGTH))
-
-# ── Rollout parameters ───────────────────────────────────────────────────
-ENGINE="${ENGINE:-vllm}"
-GEN_TP="${GEN_TP:-2}"
-N="${N:-8}"
-TEMPERATURE="${TEMPERATURE:-1.0}"
-ROLLOUT_GPU_MEM_UTIL="${ROLLOUT_GPU_MEM_UTIL:-0.7}"
-
-# ── Megatron training parallelism ────────────────────────────────────────
-TRAIN_TP="${TRAIN_TP:-8}"
-TRAIN_PP="${TRAIN_PP:-1}"
-TRAIN_CP="${TRAIN_CP:-1}"
-OFFLOAD="${OFFLOAD:-True}"
-OPTIMIZER_OFFLOAD_FRACTION="${OFFLOAD_FRACTION:-1.0}"
-USE_MBRIDGE="${USE_MBRIDGE:-True}"
-PPO_MINI_BATCH_SIZE="${PPO_MINI_BATCH_SIZE:-16}"
-
-# ── Agent parameters ─────────────────────────────────────────────────────
-RUNNER="${RUNNER:-mini_swe}"
-MAX_TURNS="${MAX_TURNS:-100}"
-AGENT_CONFIG_PATH="${AGENT_CONFIG_PATH:-examples/swe_agent_blackbox/config/agent_config.yaml}"
-COMPLETION_TIMEOUT="${COMPLETION_TIMEOUT:-600}"
-if [[ "${RUNNER}" == "claude_code" ]]; then
-    AGENT_RUNNER_FQN="examples.swe_agent_blackbox.claude_code_runner.claude_code_runner"
-    SWE_AGENT_TOOL_IMAGE="${SWE_AGENT_TOOL_IMAGE:-claude-code-tool:latest}"
-elif [[ "${RUNNER}" == "mini_swe" ]]; then
-    AGENT_RUNNER_FQN="examples.swe_agent_blackbox.mini_swe_agent_runner.mini_swe_agent_runner"
-    SWE_AGENT_TOOL_IMAGE="${SWE_AGENT_TOOL_IMAGE:-swr.cn-east-3.myhuaweicloud.com/openyuanrong/mini-swe-agent-tool:latest}"
-elif [[ "${RUNNER}" == "uniagent" ]]; then
-    AGENT_RUNNER_FQN="examples.swe_agent_blackbox.agent_runner.swe_agent_runner"
-    SWE_AGENT_TOOL_IMAGE=""
-else
-    echo "Unknown RUNNER=${RUNNER}; expected mini_swe, claude_code, or uniagent" >&2
-    exit 1
-fi
-SWE_AGENT_RUN_TIMEOUT="${SWE_AGENT_RUN_TIMEOUT:-7200}"
-CONDA_ENV="${CONDA_ENV:-testbed}"
-RUNNER_ARGS=(
-    "actor_rollout_ref.rollout.custom.agent_framework.agent_runner_fqn=${AGENT_RUNNER_FQN}"
-)
-if [[ "${RUNNER}" != "uniagent" ]]; then
-    RUNNER_ARGS+=(
-        "+actor_rollout_ref.rollout.custom.agent_framework.agent_runner_kwargs.tool_image=${SWE_AGENT_TOOL_IMAGE}"
-        "+actor_rollout_ref.rollout.custom.agent_framework.agent_runner_kwargs.run_timeout=${SWE_AGENT_RUN_TIMEOUT}"
-        "+actor_rollout_ref.rollout.custom.agent_framework.agent_runner_kwargs.conda_env=${CONDA_ENV}"
-    )
-fi
-
-# ── OpenYuanRong (YR remote sandbox) ─────────────────────────────────────
-OPENYUANRONG_SERVER_ADDRESS="${OPENYUANRONG_SERVER_ADDRESS:-}"
-OPENYUANRONG_TOKEN="${OPENYUANRONG_TOKEN:-}"
-OPENYUANRONG_TUNNEL_SSL_VERIFY="${OPENYUANRONG_TUNNEL_SSL_VERIFY:-0}"
-
-# ── Async training ───────────────────────────────────────────────────────
-TOTAL_ROLLOUT_STEPS="${TOTAL_ROLLOUT_STEPS:-100000}"
-STALENESS_THRESHOLD="${STALENESS_THRESHOLD:-1.0}"
-TRIGGER_SYNC_STEP="${TRIGGER_SYNC_STEP:-4}"
-PARTIAL_ROLLOUT="${PARTIAL_ROLLOUT:-True}"
-
-# ── Logging & checkpointing ──────────────────────────────────────────────
-PROJECT_NAME="${PROJECT_NAME:-swe_agent_blackbox}"
-EXPERIMENT_NAME="${EXPERIMENT_NAME:-swe_agent_$(date +%Y%m%d_%H%M)}"
-SAVE_FREQ="${SAVE_FREQ:-10}"
-TEST_FREQ="${TEST_FREQ:-10}"
-CKPTS_DIR="${CKPTS_DIR:-checkpoints/${PROJECT_NAME}/${EXPERIMENT_NAME}}"
-
-export SWE_AGENT_MAX_TURNS="${MAX_TURNS}"
-export SWE_AGENT_EVAL_TIMEOUT="${SWE_AGENT_EVAL_TIMEOUT:-600}"
-export OPENYUANRONG_SERVER_ADDRESS
-export OPENYUANRONG_TOKEN
-export OPENYUANRONG_TUNNEL_SSL_VERIFY
-
-echo "=== SWE-Agent Blackbox Megatron Async Training ==="
-echo "Model:       ${MODEL_PATH}"
-echo "Train data:  ${TRAIN_DATA}"
-echo "Val data:    ${VAL_DATA}"
-echo "Engine:      ${ENGINE} (gen_tp=${GEN_TP}, train_tp=${TRAIN_TP})"
-echo "Runner:      ${RUNNER}"
-echo "Batch:       n=${N}, mini_bsz=${PPO_MINI_BATCH_SIZE}"
-echo "Sequence:    prompt=${PROMPT_LENGTH}, response=${RESPONSE_LENGTH}"
-echo "Nodes:       train=${NNODES_TRAIN}, rollout=${NNODES_ROLLOUT}"
-echo "==================================================="
-
-# ── Compute derived parameters ───────────────────────────────────────────
-ACTOR_PPO_MAX_TOKEN_LEN=$(( (PROMPT_LENGTH + RESPONSE_LENGTH) / TRAIN_CP ))
-INFER_PPO_MAX_TOKEN_LEN=$(( (PROMPT_LENGTH + RESPONSE_LENGTH) / TRAIN_CP ))
-
-RUNTIME_ENV_ARGS=()
-if [ -n "${RUNTIME_ENV}" ]; then
-    RUNTIME_ENV_ARGS=(--runtime-env "${RUNTIME_ENV}")
-fi
-
-# ── Ensure Ray is running ────────────────────────────────────────────────
-TOTAL_GPUS=$(( (NNODES_TRAIN + NNODES_ROLLOUT) * NGPUS_PER_NODE ))
-if ! ray status &>/dev/null; then
-    echo "Starting Ray cluster (${TOTAL_GPUS} GPUs)..."
-    ray start --head --num-gpus="${TOTAL_GPUS}" --disable-usage-stats
-else
-    echo "Ray cluster already running."
-fi
-
-# ── Launch ────────────────────────────────────────────────────────────────
-WORKING_DIR="${WORKING_DIR:-$(pwd)}"
-
-ray job submit --no-wait --working-dir="${WORKING_DIR}" "${RUNTIME_ENV_ARGS[@]}" \
-    -- python3 -m verl.experimental.fully_async_policy.fully_async_main \
-    --config-name=swe_agent_blackbox_megatron_async \
-    --config-path="$(pwd)/examples/swe_agent_blackbox/config" \
-    hydra.searchpath=[pkg://verl.trainer.config] \
-    actor_rollout_ref.model.path="${MODEL_PATH}" \
-    data.train_files="['${TRAIN_DATA}']" \
-    data.val_files="['${VAL_DATA}']" \
-    data.max_prompt_length=${PROMPT_LENGTH} \
-    data.max_response_length=${RESPONSE_LENGTH} \
-    actor_rollout_ref.rollout.n=${N} \
-    actor_rollout_ref.rollout.name=${ENGINE} \
-    actor_rollout_ref.rollout.prompt_length=${PROMPT_LENGTH} \
-    actor_rollout_ref.rollout.response_length=${RESPONSE_LENGTH} \
-    actor_rollout_ref.rollout.max_model_len=${MAX_MODEL_LEN} \
-    actor_rollout_ref.rollout.max_num_batched_tokens=${MAX_MODEL_LEN} \
-    actor_rollout_ref.rollout.temperature=${TEMPERATURE} \
-    actor_rollout_ref.rollout.tensor_model_parallel_size=${GEN_TP} \
-    actor_rollout_ref.rollout.gpu_memory_utilization=${ROLLOUT_GPU_MEM_UTIL} \
-    actor_rollout_ref.rollout.multi_turn.max_assistant_turns=${MAX_TURNS} \
-    actor_rollout_ref.rollout.custom.agent_framework.completion_timeout_seconds=${COMPLETION_TIMEOUT} \
-    actor_rollout_ref.rollout.custom.agent_framework.agent_runner_kwargs.agent_config_path="${AGENT_CONFIG_PATH}" \
-    "${RUNNER_ARGS[@]}" \
-    actor_rollout_ref.actor.clip_ratio_low=${CLIP_RATIO_LOW} \
-    actor_rollout_ref.actor.clip_ratio_high=${CLIP_RATIO_HIGH} \
-    actor_rollout_ref.actor.ppo_mini_batch_size=${PPO_MINI_BATCH_SIZE} \
-    actor_rollout_ref.actor.ppo_max_token_len_per_gpu=${ACTOR_PPO_MAX_TOKEN_LEN} \
-    actor_rollout_ref.actor.optim.lr=${ACTOR_LR} \
-    actor_rollout_ref.actor.optim.lr_decay_steps=${TOTAL_ROLLOUT_STEPS} \
-    +actor_rollout_ref.actor.optim.override_optimizer_config.optimizer_offload_fraction=${OPTIMIZER_OFFLOAD_FRACTION} \
-    +actor_rollout_ref.actor.optim.override_optimizer_config.overlap_cpu_optimizer_d2h_h2d=True \
-    +actor_rollout_ref.actor.optim.override_optimizer_config.use_precision_aware_optimizer=True \
-    +actor_rollout_ref.actor.optim.override_optimizer_config.optimizer_cpu_offload=True \
-    actor_rollout_ref.actor.megatron.param_offload=${OFFLOAD} \
-    actor_rollout_ref.actor.megatron.grad_offload=${OFFLOAD} \
-    actor_rollout_ref.actor.megatron.optimizer_offload=${OFFLOAD} \
-    actor_rollout_ref.actor.megatron.tensor_model_parallel_size=${TRAIN_TP} \
-    actor_rollout_ref.actor.megatron.pipeline_model_parallel_size=${TRAIN_PP} \
-    actor_rollout_ref.actor.megatron.context_parallel_size=${TRAIN_CP} \
-    actor_rollout_ref.actor.megatron.use_mbridge=${USE_MBRIDGE} \
-    actor_rollout_ref.ref.megatron.param_offload=${OFFLOAD} \
-    actor_rollout_ref.ref.megatron.tensor_model_parallel_size=${TRAIN_TP} \
-    actor_rollout_ref.ref.megatron.pipeline_model_parallel_size=${TRAIN_PP} \
-    actor_rollout_ref.ref.megatron.context_parallel_size=${TRAIN_CP} \
-    actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=1 \
-    actor_rollout_ref.rollout.log_prob_max_token_len_per_gpu=${INFER_PPO_MAX_TOKEN_LEN} \
-    actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=1 \
-    actor_rollout_ref.ref.log_prob_max_token_len_per_gpu=${INFER_PPO_MAX_TOKEN_LEN} \
-    trainer.project_name="${PROJECT_NAME}" \
-    trainer.experiment_name="${EXPERIMENT_NAME}" \
-    trainer.save_freq=${SAVE_FREQ} \
-    trainer.test_freq=${TEST_FREQ} \
-    trainer.default_local_dir="${CKPTS_DIR}" \
-    trainer.nnodes=${NNODES_TRAIN} \
-    trainer.n_gpus_per_node=${NGPUS_PER_NODE} \
-    rollout.nnodes=${NNODES_ROLLOUT} \
-    rollout.n_gpus_per_node=${NGPUS_PER_NODE} \
-    rollout.total_rollout_steps=${TOTAL_ROLLOUT_STEPS} \
-    async_training.staleness_threshold=${STALENESS_THRESHOLD} \
-    async_training.trigger_parameter_sync_step=${TRIGGER_SYNC_STEP} \
-    async_training.partial_rollout=${PARTIAL_ROLLOUT} \
-    "$@"
diff --git a/examples/blackbox_recipes/scripts/run_train_megatron_sync.sh b/examples/blackbox_recipes/scripts/run_train_megatron_sync.sh
deleted file mode 100755
index 1a0c19d3..00000000
--- a/examples/blackbox_recipes/scripts/run_train_megatron_sync.sh
+++ /dev/null
@@ -1,138 +0,0 @@
-#!/usr/bin/env bash
-# Megatron sync training for the blackbox SWE-agent recipe.
-#
-# Uses main_ppo_sync + Megatron backend with the same blackbox agent infrastructure
-# (AgentFrameworkRolloutAdapter, subprocess_runner, SWEAgentFramework).
-#
-# Usage:
-#   bash examples/swe_agent_blackbox/scripts/run_train_megatron_sync.sh
-#
-# All configurable via environment variables (see defaults below).
-
-set -euo pipefail
-
-# ── Model & data ─────────────────────────────────────────────────────────
-MODEL_PATH="${MODEL_PATH:-$HOME/models/Qwen3.5-9B}"
-TRAIN_DATA="${TRAIN_DATA:-$HOME/data/swe_agent/swe_rebench_filtered.parquet}"
-VAL_DATA="${VAL_DATA:-$HOME/data/swe_agent/swe_bench_verified.parquet}"
-
-# ── Hardware ─────────────────────────────────────────────────────────────
-NNODES="${NNODES:-1}"
-NGPUS_PER_NODE="${NGPUS_PER_NODE:-8}"
-
-# ── Training parameters ─────────────────────────────────────────────────
-TRAIN_BATCH_SIZE="${TRAIN_BATCH_SIZE:-128}"
-PROMPT_LENGTH="${PROMPT_LENGTH:-4096}"
-RESPONSE_LENGTH="${RESPONSE_LENGTH:-131072}"
-ACTOR_LR="${ACTOR_LR:-1e-6}"
-TOTAL_EPOCHS="${TOTAL_EPOCHS:-10}"
-SAVE_FREQ="${SAVE_FREQ:-10}"
-TEST_FREQ="${TEST_FREQ:-10}"
-PPO_MINI_BATCH_SIZE="${PPO_MINI_BATCH_SIZE:-16}"
-
-# ── Rollout parameters ──────────────────────────────────────────────────
-ENGINE="${ENGINE:-vllm}"
-TP="${TP:-4}"
-ROLLOUT_GPU_MEM_UTIL="${ROLLOUT_GPU_MEM_UTIL:-0.7}"
-N="${N:-8}"
-TEMPERATURE="${TEMPERATURE:-1.0}"
-
-# ── Megatron parallelism ────────────────────────────────────────────────
-TRAIN_TP="${TRAIN_TP:-8}"
-TRAIN_PP="${TRAIN_PP:-1}"
-TRAIN_CP="${TRAIN_CP:-1}"
-OFFLOAD="${OFFLOAD:-true}"
-USE_MBRIDGE="${USE_MBRIDGE:-true}"
-
-# ── Agent parameters ─────────────────────────────────────────────────────
-RUNNER="${RUNNER:-mini_swe}"
-MAX_TURNS="${MAX_TURNS:-100}"
-AGENT_CONFIG_PATH="${AGENT_CONFIG_PATH:-examples/swe_agent_blackbox/config/agent_config.yaml}"
-COMPLETION_TIMEOUT="${COMPLETION_TIMEOUT:-600}"
-if [[ "${RUNNER}" == "claude_code" ]]; then
-    AGENT_RUNNER_FQN="examples.swe_agent_blackbox.claude_code_runner.claude_code_runner"
-    SWE_AGENT_TOOL_IMAGE="${SWE_AGENT_TOOL_IMAGE:-claude-code-tool:latest}"
-elif [[ "${RUNNER}" == "mini_swe" ]]; then
-    AGENT_RUNNER_FQN="examples.swe_agent_blackbox.mini_swe_agent_runner.mini_swe_agent_runner"
-    SWE_AGENT_TOOL_IMAGE="${SWE_AGENT_TOOL_IMAGE:-swr.cn-east-3.myhuaweicloud.com/openyuanrong/mini-swe-agent-tool:latest}"
-elif [[ "${RUNNER}" == "uniagent" ]]; then
-    AGENT_RUNNER_FQN="examples.swe_agent_blackbox.agent_runner.swe_agent_runner"
-    SWE_AGENT_TOOL_IMAGE=""
-else
-    echo "Unknown RUNNER=${RUNNER}; expected mini_swe, claude_code, or uniagent" >&2
-    exit 1
-fi
-SWE_AGENT_RUN_TIMEOUT="${SWE_AGENT_RUN_TIMEOUT:-7200}"
-RUNNER_ARGS=(
-    "actor_rollout_ref.rollout.custom.agent_framework.agent_runner_fqn=${AGENT_RUNNER_FQN}"
-)
-if [[ "${RUNNER}" != "uniagent" ]]; then
-    RUNNER_ARGS+=(
-        "+actor_rollout_ref.rollout.custom.agent_framework.agent_runner_kwargs.tool_image=${SWE_AGENT_TOOL_IMAGE}"
-        "+actor_rollout_ref.rollout.custom.agent_framework.agent_runner_kwargs.run_timeout=${SWE_AGENT_RUN_TIMEOUT}"
-    )
-fi
-
-# ── Logging ──────────────────────────────────────────────────────────────
-PROJECT_NAME="${PROJECT_NAME:-swe_agent_blackbox}"
-EXPERIMENT_NAME="${EXPERIMENT_NAME:-swe_agent_$(date +%Y%m%d_%H%M)}"
-VERL_LOGGING_LEVEL="${VERL_LOGGING_LEVEL:-INFO}"
-
-export SWE_AGENT_MAX_TURNS="${MAX_TURNS}"
-export SWE_AGENT_EVAL_TIMEOUT="${SWE_AGENT_EVAL_TIMEOUT:-600}"
-export VERL_LOGGING_LEVEL
-
-# ── Environment for NCCL ────────────────────────────────────────────────
-export NCCL_P2P_DISABLE="${NCCL_P2P_DISABLE:-1}"
-export NCCL_SHM_DISABLE="${NCCL_SHM_DISABLE:-1}"
-
-echo "=== SWE-Agent Blackbox Megatron Sync Training ==="
-echo "Model:       ${MODEL_PATH}"
-echo "Train data:  ${TRAIN_DATA}"
-echo "Val data:    ${VAL_DATA}"
-echo "Engine:      ${ENGINE} (gen_tp=${TP}, train_tp=${TRAIN_TP})"
-echo "Runner:      ${RUNNER}"
-echo "Batch size:  ${TRAIN_BATCH_SIZE}, N=${N}"
-echo "Sequence:    prompt=${PROMPT_LENGTH}, response=${RESPONSE_LENGTH}"
-echo "==============================================="
-
-python3 -m verl.trainer.main_ppo_sync \
-    --config-name=swe_agent_blackbox_megatron_sync \
-    --config-path="$(pwd)/examples/swe_agent_blackbox/config" \
-    hydra.searchpath=[pkg://verl.trainer.config] \
-    actor_rollout_ref.model.path="${MODEL_PATH}" \
-    data.train_files="['${TRAIN_DATA}']" \
-    data.val_files="['${VAL_DATA}']" \
-    data.train_batch_size=${TRAIN_BATCH_SIZE} \
-    data.max_prompt_length=${PROMPT_LENGTH} \
-    data.max_response_length=${RESPONSE_LENGTH} \
-    actor_rollout_ref.rollout.name=${ENGINE} \
-    actor_rollout_ref.rollout.tensor_model_parallel_size=${TP} \
-    actor_rollout_ref.rollout.gpu_memory_utilization=${ROLLOUT_GPU_MEM_UTIL} \
-    actor_rollout_ref.rollout.n=${N} \
-    actor_rollout_ref.rollout.temperature=${TEMPERATURE} \
-    actor_rollout_ref.rollout.prompt_length=${PROMPT_LENGTH} \
-    actor_rollout_ref.rollout.response_length=${RESPONSE_LENGTH} \
-    actor_rollout_ref.rollout.max_model_len=$((PROMPT_LENGTH + RESPONSE_LENGTH)) \
-    actor_rollout_ref.rollout.multi_turn.max_assistant_turns=${MAX_TURNS} \
-    actor_rollout_ref.actor.optim.lr=${ACTOR_LR} \
-    actor_rollout_ref.actor.ppo_mini_batch_size=${PPO_MINI_BATCH_SIZE} \
-    actor_rollout_ref.actor.megatron.tensor_model_parallel_size=${TRAIN_TP} \
-    actor_rollout_ref.actor.megatron.pipeline_model_parallel_size=${TRAIN_PP} \
-    actor_rollout_ref.actor.megatron.context_parallel_size=${TRAIN_CP} \
-    actor_rollout_ref.actor.megatron.param_offload=${OFFLOAD} \
-    actor_rollout_ref.actor.megatron.grad_offload=${OFFLOAD} \
-    actor_rollout_ref.actor.megatron.use_mbridge=${USE_MBRIDGE} \
-    actor_rollout_ref.rollout.nnodes=${NNODES} \
-    actor_rollout_ref.rollout.n_gpus_per_node=${NGPUS_PER_NODE} \
-    trainer.nnodes=${NNODES} \
-    trainer.n_gpus_per_node=${NGPUS_PER_NODE} \
-    trainer.total_epochs=${TOTAL_EPOCHS} \
-    trainer.save_freq=${SAVE_FREQ} \
-    trainer.test_freq=${TEST_FREQ} \
-    trainer.project_name=${PROJECT_NAME} \
-    trainer.experiment_name=${EXPERIMENT_NAME} \
-    actor_rollout_ref.rollout.custom.agent_framework.agent_runner_kwargs.agent_config_path="${AGENT_CONFIG_PATH}" \
-    actor_rollout_ref.rollout.custom.agent_framework.completion_timeout_seconds=${COMPLETION_TIMEOUT} \
-    "${RUNNER_ARGS[@]}" \
-    "$@"
diff --git a/examples/data_preprocess/r2e_gym_subset_filtered.py b/examples/data_preprocess/r2e_gym_subset_filtered.py
index c97afbdc..eeafbc0a 100644
--- a/examples/data_preprocess/r2e_gym_subset_filtered.py
+++ b/examples/data_preprocess/r2e_gym_subset_filtered.py
@@ -17,6 +17,13 @@ def get_image_name(dataset_id: str, instance_id: str) -> str:
         assert len(parts) == 2
         instance_number = parts[1].lower()
         return PUB_VOLCES_IMG_URL_R2E.format(instance_number=instance_number)
+elif impl == "openyuanrong":
+
+    def get_image_name(dataset_id: str, instance_id: str) -> str:
+        parts = instance_id.split("__")
+        assert len(parts) == 2
+        instance_number = parts[1].lower()
+        return f"swr.cn-east-3.myhuaweicloud.com/openyuanrong/r2e-gym-subset/{instance_number}:latest"
 else:
     raise ValueError(f"Invalid deployment implementation: {impl}")
 
diff --git a/examples/data_preprocess/swe_bench_verified.py b/examples/data_preprocess/swe_bench_verified.py
index 8f26ad16..3a56695e 100644
--- a/examples/data_preprocess/swe_bench_verified.py
+++ b/examples/data_preprocess/swe_bench_verified.py
@@ -18,6 +18,15 @@ def get_image_name(dataset_id: str, instance_id: str) -> str:
         project_name = parts[0].lower()
         instance_number = parts[1].lower()
         return f"swebench/sweb.eval.x86_64.{project_name}_1776_{instance_number}"
+elif impl == "openyuanrong":
+
+    def get_image_name(dataset_id: str, instance_id: str) -> str:
+        assert dataset_id == "swe-bench-verified"
+        parts = instance_id.split("__")
+        assert len(parts) == 2
+        project_name = parts[0].lower()
+        instance_number = parts[1].lower()
+        return f"swr.cn-east-3.myhuaweicloud.com/openyuanrong/swe-bench-verified/sweb.eval.x86_64.{project_name}_1776_{instance_number}:v2"
 else:
     raise ValueError(f"Invalid deployment implementation: {impl}")
 
diff --git a/examples/data_preprocess/swe_rebench.py b/examples/data_preprocess/swe_rebench.py
index 3add8b28..1cb907df 100644
--- a/examples/data_preprocess/swe_rebench.py
+++ b/examples/data_preprocess/swe_rebench.py
@@ -17,6 +17,14 @@ def get_image_name(dataset_id, instance_id):
         project_name = parts[0].lower()
         instance_number = parts[1].lower()
         return f"swerebench/sweb.eval.x86_64.{project_name}_1776_{instance_number}"
+elif impl == "openyuanrong":
+
+    def get_image_name(dataset_id: str, instance_id: str) -> str:
+        parts = instance_id.split("__")
+        assert len(parts) == 2
+        project_name = parts[0].lower()
+        instance_number = parts[1].lower()
+        return f"swr.cn-east-3.myhuaweicloud.com/openyuanrong/swe-rebench/{project_name}_1776_{instance_number}:latest"
 else:
     raise ValueError(f"Invalid deployment implementation: {impl}")
 
diff --git a/uni_agent/gateway/session/codec.py b/uni_agent/gateway/session/codec.py
index 747dbde8..b808183d 100644
--- a/uni_agent/gateway/session/codec.py
+++ b/uni_agent/gateway/session/codec.py
@@ -7,8 +7,8 @@
 from uuid import uuid4
 
 from verl.experimental.agent_loop.tool_parser import ToolParser
-from verl.utils.chat_template import apply_chat_template as _apply_chat_template
-from verl.utils.chat_template import initialize_system_prompt
+from verl.utils.tokenizer.chat_template import apply_chat_template as _apply_chat_template
+from verl.utils.tokenizer.chat_template import initialize_system_prompt
 from verl.utils.tokenizer import normalize_token_ids
 
 
diff --git a/uni_agent/interaction/model.py b/uni_agent/interaction/model.py
index bfa0651f..1184688f 100644
--- a/uni_agent/interaction/model.py
+++ b/uni_agent/interaction/model.py
@@ -138,7 +138,7 @@ async def query(
         return response_str, [], rollout_cache, generation_info
 
     async def _get_new_message_ids(self, new_messages: list[dict[str, Any]]) -> list[int]:
-        from verl.utils.chat_template import apply_chat_template
+        from verl.utils.tokenizer.chat_template import apply_chat_template
         from verl.utils.tokenizer import normalize_token_ids
 
         tokenized_prompt = await self.loop.run_in_executor(
@@ -154,7 +154,7 @@ async def _get_new_message_ids(self, new_messages: list[dict[str, Any]]) -> list
 
     @cached_property
     def message_boundary_tokens(self) -> list[int]:
-        from verl.utils.chat_template import apply_chat_template
+        from verl.utils.tokenizer.chat_template import apply_chat_template
         from verl.utils.tokenizer import normalize_token_ids
 
         dummy_history = [
diff --git a/verl b/verl
index 7aed6b23..6fef6a7a 160000
--- a/verl
+++ b/verl
@@ -1 +1 @@
-Subproject commit 7aed6b230776f963fa09509c10d9c3a767d1102c
+Subproject commit 6fef6a7a699435cad84e8907e9121457e41eed04