huggingface · rycerzes · May 12, 2026 · May 12, 2026 · May 12, 2026 · May 13, 2026
diff --git a/docs/source/environments.md b/docs/source/environments.md
@@ -549,13 +549,13 @@ AgentWorldModel-1K — 1,000 synthetic MCP tool-use environments with 10,000 tas
 ```
 ````
 
-````{grid-item-card} Opencode
+````{grid-item-card} Coding Agent
 :class-card: sd-border-1
 
-`opencode_env` runs the OpenCode coding agent inside an isolated E2B sandbox against any OpenAI-compatible LLM endpoint, optionally capturing per-token logpr...
+`coding_agent_env` runs coding-agent harnesses (currently OpenCode + Pi) inside an isolated E2B sandbox against any OpenAI-compatible LLM endpoint, optionally capturing per-token logpr...
 
 +++
-```{button-link} environments/opencode.html
+```{button-link} environments/coding_agent.html
 :color: primary
 :outline:
 
@@ -633,5 +633,5 @@ environments/tbench2
 environments/unity
 environments/wildfire
 environments/agent_world_model
-environments/opencode
+environments/coding_agent
 ```
diff --git a/docs/source/environments/coding_agent.md b/docs/source/environments/coding_agent.md
@@ -0,0 +1,2 @@
+```{include} ../../../envs/coding_agent_env/README.md
+```
diff --git a/docs/source/environments/opencode.md b/docs/source/environments/opencode.md
diff --git a/envs/opencode_env/.dockerignore → envs/coding_agent_env/.dockerignore b/envs/opencode_env/.dockerignore → envs/coding_agent_env/.dockerignore
diff --git a/envs/opencode_env/.gitignore → envs/coding_agent_env/.gitignore b/envs/opencode_env/.gitignore → envs/coding_agent_env/.gitignore
diff --git a/envs/opencode_env/README.md → envs/coding_agent_env/README.md b/envs/opencode_env/README.md → envs/coding_agent_env/README.md
@@ -1,5 +1,5 @@
 ---
-title: OpenCode Environment Server
+title: Coding Agent Environment Server
 emoji: 🛠️
 colorFrom: indigo
 colorTo: purple
@@ -9,23 +9,24 @@ app_port: 8000
 base_path: /web
 tags:
   - openenv
-short_description: OpenCode coding agent in an E2B sandbox with logprob capture
+short_description: Multi-harness coding-agent env (OpenCode + Pi) in E2B
 ---
 
-# OpenCode Environment for OpenEnv
+# Coding Agent Environment for OpenEnv
 
-`opencode_env` runs the [OpenCode](https://opencode.ai) coding agent inside
-an isolated [E2B](https://e2b.dev) sandbox against any OpenAI-compatible
-LLM endpoint, optionally capturing per-token logprobs for GRPO training.
+`coding_agent_env` runs coding-agent harnesses (currently
+[OpenCode](https://opencode.ai) and [Pi](https://github.com/badlogic/pi-mono))
+inside an isolated [E2B](https://e2b.dev) sandbox against any OpenAI-compatible
+LLM endpoint with optional trainer-owned interception for RL training.
 
-**🚀 Try it live**: [`AdithyaSK/opencode-env`](https://huggingface.co/spaces/AdithyaSK/opencode-env)
+**🚀 Try it live**: [`AdithyaSK/coding-agent-env`](https://huggingface.co/spaces/AdithyaSK/coding-agent-env)
 
 The deployed Space exposes:
 
-- **Web UI** at [`/web`](https://adithyask-opencode-env.hf.space/web) — pick endpoint, write task, hit Run, watch live phase log + reward + logprobs.
-- **MCP tool API** at [`/mcp`](https://adithyask-opencode-env.hf.space/mcp) — programmatic `run_rollout` calls.
-- **OpenAPI docs** at [`/docs`](https://adithyask-opencode-env.hf.space/docs).
-- **Health** at [`/health`](https://adithyask-opencode-env.hf.space/health).
+- **Web UI** at [`/web`](https://adithyask-coding-agent-env.hf.space/web) — pick endpoint, write task, hit Run, watch live phase log + reward.
+- **MCP tool API** at [`/mcp`](https://adithyask-coding-agent-env.hf.space/mcp) — programmatic `run_rollout` calls.
+- **OpenAPI docs** at [`/docs`](https://adithyask-coding-agent-env.hf.space/docs).
+- **Health** at [`/health`](https://adithyask-coding-agent-env.hf.space/health).
 
 The env is **task-agnostic** — every rollout is configured at call-time
 with a uniform Task shape:
@@ -47,20 +48,21 @@ a float to `/home/user/logs/verifier/reward.txt` (override).
 ```python
 import asyncio
 import os
-from opencode_env import OpenCodeEnv
-from opencode_env.client import _extract_text
-from opencode_env.models import RolloutResult
+from coding_agent_env import CodingAgentEnv
+from coding_agent_env.client import _extract_text
+from coding_agent_env.models import RolloutResult
 
 
 async def main():
-    SPACE = "https://adithyask-opencode-env.hf.space"
+    SPACE = "https://adithyask-coding-agent-env.hf.space"
 
-    async with OpenCodeEnv(base_url=SPACE) as env:
+    async with CodingAgentEnv(base_url=SPACE) as env:
         await env.reset()
 
         # The MCP tool returns JSON; deserialize via the typed model.
         raw = await env.call_tool(
             "run_rollout",
+            agent="opencode",                          # opencode | pi
             endpoint="openai",                          # vllm | openai | hf_router
             api_key=os.environ["OPENAI_API_KEY"],       # or set as a Space secret
             instruction=(
@@ -75,13 +77,12 @@ async def main():
                 "import binary_search; "
                 "assert binary_search.binary_search([1,2,3], 2) == 1; print('OK')\"",
             ],
-            template="opencode-rl",                     # prebaked E2B template
+            template="coding-agent-rl",                     # prebaked E2B template
             task_id="binary_search_v1",
         )
         result = RolloutResult.model_validate_json(_extract_text(raw))
 
         print("reward:", result.reward)
-        print("turns:", len(result.proxy_turns))
         print("files:", list(result.files.keys()))
         print("wall:", result.wall_s, "s")
 
@@ -93,7 +94,6 @@ Expected output (~20s with the prebaked template):
 
 ```
 reward: 1.0
-turns: 3
 files: ['/home/user/workdir/binary_search.py', ...]
 wall: 19.8 s
 ```
@@ -102,10 +102,10 @@ wall: 19.8 s
 
 ```python
 import os
-from opencode_env import OpenCodeEnv
+from coding_agent_env import CodingAgentEnv
 
 # .sync() returns a synchronous wrapper around the async client.
-with OpenCodeEnv(base_url="https://adithyask-opencode-env.hf.space").sync() as env:
+with CodingAgentEnv(base_url="https://adithyask-coding-agent-env.hf.space").sync() as env:
     env.reset()
     # MCP tools are reachable via env.call_tool(...) / env.step(...) sync-wrapped.
     # See the async example above for the full run_rollout signature.
@@ -120,23 +120,22 @@ For trainers that want to drive a sandbox directly without an HTTP boundary:
 
 ```python
 import os
-from opencode_env import (
-    OpenCodeConfig, OpenCodeSessionFactory, OpenCodeTask, E2BSandboxBackend,
+from coding_agent_env import (
+    CodingAgentConfig, CodingAgentSessionFactory, CodingAgentTask, E2BSandboxBackend,
 )
 
-factory = OpenCodeSessionFactory(
-    config=OpenCodeConfig(
+factory = CodingAgentSessionFactory(
+    config=CodingAgentConfig(
         provider="openai_compatible",
         base_url="https://api.openai.com/v1",
         api_key=os.environ["OPENAI_API_KEY"],
         model="gpt-4o-mini",
     ),
     sandbox_backend=E2BSandboxBackend(),
-    mode="transparent_proxy",                   # captures per-token logprobs
+    mode="interception_gate",                  # trainer-owned interception mode
 )
-session = factory.create(task=OpenCodeTask(instruction="..."))
+session = factory.create(task=CodingAgentTask(instruction="..."))
 session.wait_for_completion()
-turns = session.fetch_proxy_trace()             # per-turn (tokens, logprobs)
 session.close()
 ```
 
@@ -146,22 +145,22 @@ The Dockerfile lives at `server/Dockerfile`. Use the `openenv` CLI from
 the env root:
 
 ```bash
-cd envs/opencode_env
+cd envs/coding_agent_env
 
 openenv validate               # check pyproject.toml + openenv.yaml + server/app.py + uv.lock
-openenv build -t opencode-env  # builds the image (uses server/Dockerfile)
+openenv build -t coding-agent-env  # builds the image (uses server/Dockerfile)
 
 # run locally with E2B credentials
-docker run -p 8000:8000 -e E2B_API_KEY=e2b_... opencode-env
+docker run -p 8000:8000 -e E2B_API_KEY=e2b_... coding-agent-env
 
 # push to HF Spaces (Docker variant)
-openenv push --repo-id <user>/opencode-env
+openenv push --repo-id <user>/coding-agent-env
 ```
 
 Or build directly without the CLI:
 
 ```bash
-docker build -t opencode-env -f envs/opencode_env/server/Dockerfile envs/opencode_env
+docker build -t coding-agent-env -f envs/coding_agent_env/server/Dockerfile envs/coding_agent_env
 ```
 
 The image:
@@ -174,7 +173,7 @@ The image:
 
 ## The MCP Tool: `run_rollout`
 
-Single tool, two ways to specify the LLM endpoint:
+Single tool, with an ``agent`` selector plus two ways to specify the LLM endpoint:
 
 **Option A — endpoint shorthand (recommended)**: pass
 `endpoint="vllm"` (or `"openai"` / `"hf_router"`). The server resolves
@@ -186,29 +185,30 @@ directly.
 
 | Arg | Type | Default | Notes |
 |---|---|---|---|
+| `agent` | `str` | `"opencode"` | Harness to run: `"opencode"` or `"pi"`. |
 | `endpoint` | `str` | `""` | One of `"vllm"` / `"openai"` / `"hf_router"`. |
 | `base_url` / `api_key` / `model` | `str` | `""` | Override / supply explicitly. |
-| `instruction` | `str` | required | Prompt passed to `opencode run`. |
+| `instruction` | `str` | required | Prompt passed to the selected harness CLI. |
 | `setup` | `list[str]` | `[]` | Bash commands run **before** the agent. |
 | `verify` | `list[str]` | `[]` | Bash commands run **after** the agent. |
 | `task_id` | `str` | `""` | Echoed back in result. |
-| `mode` | `str` | `"transparent_proxy"` | Or `"black_box"` (no logprobs). |
+| `mode` | `str` | `"black_box"` | Or `"interception_gate"` for trainer-owned generation. |
 | `disable_thinking` | `bool \| None` | `None` (catalog default) | Inject `chat_template_kwargs.enable_thinking=false`. |
 | `max_tokens_cap` | `int` | `4096` | Per-turn `max_tokens` clamp. |
-| `top_logprobs` | `int` | `5` | HF Router cap is 5; OpenAI 0–20; vLLM unbounded. |
-| `agent_timeout_s` | `float` | `600.0` | Hard wall budget for opencode. |
-| `template` | `str` | `""` | E2B template name; `"opencode-rl"` skips ~2 min of install per rollout. |
+| `top_logprobs` | `int` | `5` | Reserved for trainer-owned interception workflows. |
+| `agent_timeout_s` | `float` | `600.0` | Hard wall budget for the selected harness. |
+| `template` | `str` | `""` | E2B template name; `"coding-agent-rl"` skips ~2 min of install per rollout. |
 
 Returns `RolloutResult` JSON with: `reward`, `setup_results[]`,
-`verify_results[]`, `proxy_turns[]`, `files{}`, `agent_log_tail`,
-`proxy_log_tail`, `wall_s`, `agent_exit_code`, `sandbox_id`, `error`.
+`verify_results[]`, `files{}`, `agent_log_tail`, `wall_s`,
+`agent_exit_code`, `sandbox_id`, `error`.
 
 ## Two Operating Modes
 
 | Mode | What it does | Best for |
 |---|---|---|
-| **`transparent_proxy`** (default) | In-sandbox proxy at `localhost:7000` forwards opencode's LLM calls to `base_url`, injects `logprobs=true`, captures per-turn `(messages, completion_tokens, logprobs)` to `proxy_trace.jsonl`. | GRPO / RL training, observability, top-k distillation. |
-| **`black_box`** | No proxy. opencode talks straight to `base_url`. | Smoke tests, eval, SFT data collection. |
+| **`black_box`** (default) | The selected harness talks directly to `base_url`. | Smoke tests, eval, SFT data collection. |
+| **`interception_gate`** | Agent calls are routed through trainer-host interception endpoints. Trainer owns forward pass + trajectory capture. | RL training with trainer-owned generation. |
 
 ## Environment Variables
 
@@ -227,68 +227,70 @@ sibling `.env` file; on HF Spaces, set them as **Space secrets**.
 | **OpenAI endpoint** | | |
 | `OPENAI_API_KEY` | required for `endpoint="openai"` | Standard OpenAI key. |
 | `OPENAI_BASE_URL` | no | Defaults to `https://api.openai.com/v1`. |
-| `OPENAI_MODEL` | no | Defaults to `gpt-4o-mini` (gpt-5.x and o-series refuse logprobs). |
+| `OPENAI_MODEL` | no | Defaults to `gpt-4o-mini`. |
 | **HF Router endpoint** | | |
 | `HF_ROUTER_API_KEY` | required for `endpoint="hf_router"` | HF user token. |
 | `HF_ROUTER_BASE_URL` | no | Defaults to `https://router.huggingface.co/v1`. |
 | `HF_ROUTER_MODEL` | no | Defaults to `Qwen/Qwen3-4B-Instruct-2507:nscale`. |
 
-Pick `provider:` suffixes that actually return logprobs:
-**Together / Nscale / Scaleway / SambaNova / Cerebras**. Avoid Novita /
-Hyperbolic / Featherless (silent drop) and Groq (HTTP 400).
 
 ## Pre-baked E2B Template
 
 The first rollout in a fresh E2B sandbox spends ~2 min installing
-opencode and the proxy's Python deps. Build a one-time template that
-ships those pre-installed:
+harness tooling. Build a one-time template that ships those pre-installed:
 
 ```bash
-.venv/bin/python envs/opencode_env/sandbox/build_template.py
-# → builds `opencode-rl` template in your E2B account (~1m20s, one-time)
+.venv/bin/python envs/coding_agent_env/sandbox/build_template.py
+# → builds `coding-agent-rl` template in your E2B account (~1m20s, one-time)
 ```
 
-After this, pass `template="opencode-rl"` on every `run_rollout` call —
+After this, pass `template="coding-agent-rl"` on every `run_rollout` call —
 each rollout drops to ~20–30s end-to-end.
 
 ## Project Structure
 
 ```
-opencode_env/
+coding_agent_env/
 ├── README.md                       # this file
 ├── openenv.yaml                    # OpenEnv space spec
 ├── pyproject.toml                  # deps + ``server`` entrypoint
 ├── uv.lock                         # frozen deps (required by ``openenv validate``)
 ├── .gitignore / .dockerignore      # excludes .env / __pycache__
 ├── __init__.py                     # re-exports primitive + client + models
 │
-├── client.py                       # OpenCodeEnv(MCPToolClient)
-├── models.py                       # RolloutResult / RolloutTurn / OpenCodeState
+├── client.py                       # CodingAgentEnv(MCPToolClient)
+├── models.py                       # RolloutResult / CodingAgentState
 │
-├── config.py                       # OpenCodeConfig (primitive)
-├── harness.py                      # OpenCodeSession / OpenCodeSessionFactory (CLI-only)
+├── config.py                       # CodingAgentConfig (primitive)
+├── harness.py                      # CodingAgentSession / CodingAgentSessionFactory (CLI-only)
 ├── opencode_runtime.py             # opencode.json builder + cmds
-├── task.py                         # OpenCodeTask
+├── task.py                         # CodingAgentTask
 │
 ├── server/
 │   ├── __init__.py
 │   ├── app.py                      # FastAPI factory; mounts Gradio at /web
-│   ├── opencode_environment.py     # MCPEnvironment with single ``run_rollout`` tool
+│   ├── coding_environment.py      # MCPEnvironment with single ``run_rollout`` tool
 │   ├── gradio_ui.py                # the /web Gradio Blocks UI
 │   ├── catalog.py                  # endpoint shorthand resolver
 │   └── Dockerfile                  # multi-stage uv build (used by ``openenv build``)
 │
 └── sandbox/
     ├── __init__.py
-    ├── base.py                     # SandboxBackend / SandboxHandle Protocols
-    ├── e2b.py                      # E2B implementation
-    ├── interception.py             # in-sandbox FastAPI proxy (logprob capture)
     └── build_template.py           # one-time E2B template builder
+
+# Shared sandbox runtime (moved to core):
+src/openenv/core/harness/sandbox/
+├── base.py                         # SandboxBackend / SandboxHandle protocols
+├── e2b_backend.py                  # E2B implementation
+├── docker_backend.py               # local Docker backend
+├── hf_backend.py                   # HF sandbox backend
+└── _util.py                        # shared sandbox shell utilities
 ```
 
 ## References
 
 - [OpenEnv docs](https://meta-pytorch.org/OpenEnv/)
 - [OpenCode CLI](https://opencode.ai/docs/cli/)
+- [Pi](https://github.com/badlogic/pi-mono)
 - [E2B Python SDK](https://e2b.dev/docs)
-- [HF Inference Providers logprob matrix](../../../DOCS/HF/hf_inference_providers_logprobs.md)
+
diff --git a/envs/coding_agent_env/__init__.py b/envs/coding_agent_env/__init__.py
@@ -0,0 +1,55 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""Coding-agent environment for OpenEnv.
+
+Two layers in this package:
+
+1. **Harness primitive** -- :class:`CodingAgentSessionFactory` /
+   :class:`CodingAgentSession` / :class:`CodingAgentConfig` /
+   :class:`E2BSandboxBackend`. Built on the generic
+   :class:`CLIAgentDriver` from ``openenv.core.harness.agents``.
+
+2. **Deployable env** -- :class:`CodingAgentEnv` (MCP client) talks to the
+   FastAPI server at ``server/app.py`` over HTTP. Use this when the
+   sandbox + agent live behind an HTTP boundary (e.g. an HF Space).
+   See ``client.py`` and ``server/``.
+"""
+
+from openenv.core.env_server.mcp_types import CallToolAction, ListToolsAction
+from openenv.core.harness.sandbox import SandboxBackend, SandboxHandle
+
+from .client import CodingAgentEnv
+from .config import CodingAgentConfig, Provider
+from .harness import CodingAgentSession, CodingAgentSessionFactory
+from .models import CommandResult, CodingAgentState, RolloutResult
+from .task import CodingAgentTask
+
+try:
+    from openenv.core.harness.sandbox import E2BSandboxBackend
+except ImportError:  # e2b not installed
+    E2BSandboxBackend = None  # type: ignore[assignment,misc]
+
+__all__ = [
+    # Deployed-env client
+    "CodingAgentEnv",
+    "CallToolAction",
+    "ListToolsAction",
+    # HTTP API models
+    "CommandResult",
+    "CodingAgentState",
+    "RolloutResult",
+    # Harness primitive
+    "CodingAgentConfig",
+    "CodingAgentSession",
+    "CodingAgentSessionFactory",
+    "CodingAgentTask",
+    "Provider",
+    # Sandbox backend
+    "E2BSandboxBackend",
+    "SandboxBackend",
+    "SandboxHandle",
+]
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		```{include} ../../../envs/coding_agent_env/README.md
		```