weijt606 · weijt606 · May 25, 2026 · May 25, 2026 · May 25, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,29 @@ All notable changes to this project will be documented in this file.
 
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
 
+## [0.2.3] - 2026-05-26
+
+### Fixed
+- **Codex adapter** — switched to `codex exec` headless mode; the old
+  `codex --quiet --auto-edit` invocation was removed upstream. Also adds
+  `--skip-git-repo-check` (the optimization workspace isn't a git repo) and
+  `--sandbox workspace-write` (lets the agent edit within the workspace).
+- **OpenCode adapter** — switched to the `opencode run` subcommand; the
+  top-level `-p` flag is no longer supported upstream.
+- **Claude Code adapter** — add `--permission-mode acceptEdits` so the agent can
+  actually write candidate files in headless `-p` mode (recent Claude Code blocks
+  edits without it); drop `--verbose` (noise in print mode).
+
+### Changed
+- **Claude Code adapter** now pins `--model claude-opus-4-7` (Opus 4.7,
+  highest-capability) for the Proposer.
+- Default proposer model `claude-sonnet-4-20250514` → `claude-sonnet-4-6`
+  (affects `api`/`openai` backends; other CLI backends use their own model).
+- Verified `claude-code` (`claude -p`) and `hermes` (`hermes chat -q`) are still
+  current; `claw-code` mirrors Claude Code (unverified, low usage).
+- Docs (README / README_CN / technical-architecture) updated to the current CLI
+  invocations.
+
 ## [0.2.2] - 2026-05-24
 
 ### Added

diff --git a/README.md b/README.md
@@ -303,9 +303,9 @@ Just add `ph wrap --auto-evolve` in front of your agent command (pick the one ma
 # CLI agent backends — wrap the agent you already use
 ph wrap --auto-evolve claude -p "Refactor the auth module to use JWT"   # Claude Code
 ph wrap --auto-evolve claw -p "Write integration tests for payments"     # Claw Code
-ph wrap --auto-evolve codex "Add retry logic to the API client"          # Codex
+ph wrap --auto-evolve codex exec "Add retry logic to the API client"          # Codex
 ph wrap --auto-evolve hermes chat -q "Refactor the DB connection pool"   # Hermes Agent
-ph wrap --auto-evolve opencode -p "Fix the flaky parser test"            # OpenCode
+ph wrap --auto-evolve opencode run "Fix the flaky parser test"            # OpenCode
 
 # Local models — wrap the CLI command directly
 ph wrap --auto-evolve ollama run gemma3 "Summarize this document"         # Ollama
@@ -373,9 +373,9 @@ After that, just use your agent as usual:
 ```bash
 claude -p "Refactor auth to JWT"        # automatically becomes: ph wrap --auto-evolve claude -p ...
 claw -p "Write payment tests"            # same — auto-wrapped
-codex "Add retry logic"                  # same
+codex exec "Add retry logic"                  # same
 hermes chat -q "Refactor pool"           # same
-opencode -p "Fix flaky test"             # same
+opencode run "Fix flaky test"             # same
 ```
 
 How it works: a `preexec` hook in your shell detects `claude`/`claw`/`codex`/`hermes`/`opencode` commands and transparently redirects them through `ph wrap --auto-evolve`. Your output is unchanged.
@@ -466,9 +466,9 @@ The Proposer reads **all of this** before generating the next candidate. It can
 | `openai` | — | OpenAI-compatible API (Ollama, vLLM, LM Studio, etc). Needs `OPENAI_API_KEY` |
 | `claude-code` | `claude -p` | Official Claude Code CLI (Pro/Teams subscription) |
 | `claw-code` | `claw -p` | Open-source Claw Code CLI |
-| `codex` | `codex --quiet` | OpenAI Codex CLI |
+| `codex` | `codex exec` | OpenAI Codex CLI |
 | `hermes` | `hermes chat -q` | Nous Research [Hermes Agent](https://github.com/NousResearch/hermes-agent) CLI |
-| `opencode` | `opencode -p` | OpenCode CLI |
+| `opencode` | `opencode run` | OpenCode CLI |
 | `local` | — | Offline rule-based engine for development & testing |
 
 `ph doctor` auto-detects all available backends and shows their status.
@@ -543,7 +543,7 @@ proposer:
   backend: api                 # api | openai | claude-code | claw-code | codex | hermes | opencode | local
   ensemble: []                 # If non-empty, pick among these backends per iteration via a UCB bandit
   bandit_c: 1.41421356         # UCB exploration constant (higher = more exploration)
-  model: claude-sonnet-4-20250514  # Model name (for api/openai backends)
+  model: claude-sonnet-4-6  # Model name (for api/openai backends)
   base_url: null               # Custom API endpoint (for openai backend)
   api_key: null                # API key override (null = use env var)
   max_tokens: 16384            # Max output tokens per proposer turn
@@ -772,9 +772,9 @@ polyharness/
 │   │   └── adapters/            # Per-agent CLI adapters
 │   │       ├── claude_code.py   # claude -p
 │   │       ├── claw_code.py     # claw -p
-│   │       ├── codex.py         # codex --quiet --auto-edit
+│   │       ├── codex.py         # codex exec
 │   │       ├── hermes.py        # hermes chat -q
-│   │       └── opencode.py      # opencode -p
+│   │       └── opencode.py      # opencode run
 │   └── templates/               # 5 built-in task templates
 │       ├── text-classification/
 │       ├── math-word-problems/

diff --git a/README_CN.md b/README_CN.md
@@ -303,9 +303,9 @@ ph clean --keep-best           # 清理候选目录释放磁盘空间
 # CLI agent 后端 —— 直接包裹你已经在用的 agent
 ph wrap --auto-evolve claude -p "把 auth 模块重构为 JWT 方案"      # Claude Code
 ph wrap --auto-evolve claw -p "给支付服务写集成测试"            # Claw Code
-ph wrap --auto-evolve codex "给 API 客户端加上重试逻辑"              # Codex
+ph wrap --auto-evolve codex exec "给 API 客户端加上重试逻辑"              # Codex
 ph wrap --auto-evolve hermes chat -q "重构数据库连接池"              # Hermes Agent
-ph wrap --auto-evolve opencode -p "修复不稳定的 parser 测试"       # OpenCode
+ph wrap --auto-evolve opencode run "修复不稳定的 parser 测试"       # OpenCode
 
 # 本地模型 —— 直接包裹 CLI 命令
 ph wrap --auto-evolve ollama run gemma3 "总结这篇文档"                # Ollama
@@ -373,9 +373,9 @@ ph shell-hook install          # 一次性设置，写入 ~/.zshrc
 ```bash
 claude -p "把 auth 重构为 JWT"            # 自动变为：ph wrap --auto-evolve claude -p ...
 claw -p "写支付测试"                  # 同理——自动包裹
-codex "加重试逻辑"                     # 同理
+codex exec "加重试逻辑"                     # 同理
 hermes chat -q "重构连接池"            # 同理
-opencode -p "修复不稳定测试"            # 同理
+opencode run "修复不稳定测试"            # 同理
 ```
 
 原理：shell 的 `preexec` 钩子检测到 `claude`/`claw`/`codex`/`hermes`/`opencode` 命令后，透明地通过 `ph wrap --auto-evolve` 转发。你的输出不会变。
@@ -466,9 +466,9 @@ Proposer 在生成下一个候选之前会读取**所有这些信息**。它能
 | `openai` | — | 兼容 OpenAI 格式的本地/云端模型直连 (Ollama, vLLM, LM Studio 等)，需配置 `OPENAI_API_KEY` |
 | `claude-code` | `claude -p` | 官方 Claude Code CLI（Pro/Teams 订阅） |
 | `claw-code` | `claw -p` | 开源 Claw Code CLI |
-| `codex` | `codex --quiet` | OpenAI Codex CLI |
+| `codex` | `codex exec` | OpenAI Codex CLI |
 | `hermes` | `hermes chat -q` | Nous Research [Hermes Agent](https://github.com/NousResearch/hermes-agent) CLI |
-| `opencode` | `opencode -p` | OpenCode CLI |
+| `opencode` | `opencode run` | OpenCode CLI |
 | `local` | — | 离线规则引擎，用于开发和测试 |
 
 `ph doctor` 会自动检测所有可用后端并显示状态。
@@ -543,7 +543,7 @@ proposer:
   backend: api                 # api | openai | claude-code | claw-code | codex | hermes | opencode | local
   ensemble: []                 # 非空时，每轮用 UCB bandit 在这些后端中择优
   bandit_c: 1.41421356         # UCB 探索常数（越大越偏探索）
-  model: claude-sonnet-4-20250514  # 模型名称（api/openai 后端使用）
+  model: claude-sonnet-4-6  # 模型名称（api/openai 后端使用）
   base_url: null               # 自定义 API 端点（openai 后端使用）
   api_key: null                # API 密钥覆盖（null = 使用环境变量）
   max_tokens: 16384            # 每轮 proposer 最大输出 token 数
@@ -772,9 +772,9 @@ polyharness/
 │   │   └── adapters/            # 逐 agent CLI 适配器
 │   │       ├── claude_code.py   # claude -p
 │   │       ├── claw_code.py     # claw -p
-│   │       ├── codex.py         # codex --quiet --auto-edit
+│   │       ├── codex.py         # codex exec
 │   │       ├── hermes.py        # hermes chat -q
-│   │       └── opencode.py      # opencode -p
+│   │       └── opencode.py      # opencode run
 │   └── templates/               # 5 个内置任务模板
 │       ├── text-classification/
 │       ├── math-word-problems/

diff --git a/docs/development/technical-architecture.md b/docs/development/technical-architecture.md
@@ -159,7 +159,7 @@ search:
 
 # Proposer 配置
 proposer:
-  model: "claude-sonnet-4-20250514"     # Proposer 模型
+  model: "claude-sonnet-4-6"     # Proposer 模型
   max_tokens: 16384            # 单次输出上限
   temperature: 0.7             # 生成温度
   backend: "api"               # api | claude-code | claw-code
@@ -222,7 +222,7 @@ harness:
 {
   "iteration": 3,
   "parent": "iter_1",
-  "proposer_model": "claude-sonnet-4-20250514",
+  "proposer_model": "claude-sonnet-4-6",
   "proposer_reasoning": "iter_1 在 task_002 上失败因为缺少 retry 逻辑...",
   "changes_summary": "添加了 exponential backoff retry 到 API 调用层",
   "timestamp": "2026-04-02T14:30:00Z"
@@ -1254,7 +1254,7 @@ ablation:
 ```yaml
 proposer:
   model: "claude-haiku-4-20250414"    # 低成本模式
-  # model: "claude-sonnet-4-20250514"  # 平衡模式
+  # model: "claude-sonnet-4-6"  # 平衡模式
   # model: "claude-opus-4-20250514"    # 最强模式（论文设置）
 ```
 

diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "polyharness",
-  "version": "0.2.2",
+  "version": "0.2.3",
   "description": "Make your AI agent evolve automatically through iterative harness optimization.",
   "keywords": [
     "agent",

diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "polyharness"
-version = "0.2.2"
+version = "0.2.3"
 description = "Automated harness optimization for AI agents — make your agent evolve."
 readme = "README.md"
 license = "MIT"

diff --git a/src/polyharness/__init__.py b/src/polyharness/__init__.py
@@ -1,3 +1,3 @@
 """PolyHarness — Automated harness optimization for AI agents."""
 
-__version__ = "0.2.2"
+__version__ = "0.2.3"
diff --git a/src/polyharness/config.py b/src/polyharness/config.py
@@ -84,7 +84,8 @@ class ProposerConfig(BaseModel):
         description="UCB exploration constant for ensemble selection. Higher = more exploration.",
     )
     model: str = Field(
-        default="claude-sonnet-4-20250514", description="Model for the Proposer agent."
+        default="claude-sonnet-4-6",
+        description="Model for the Proposer agent (api/openai backends; CLI backends use their own).",
     )
     base_url: str | None = Field(
         default=None, description="Optional base URL for the API (useful for local models)."

diff --git a/src/polyharness/proposer/adapters/claude_code.py b/src/polyharness/proposer/adapters/claude_code.py
@@ -2,12 +2,23 @@
 
 Invokes the official `claude` CLI in print mode (-p).
 Requires an active Claude Code subscription.
+
+Verified against Claude Code (May 2026):
+- `-p` headless mode and `--output-format text` are current.
+- `--permission-mode acceptEdits` is REQUIRED for the agent to write files
+  non-interactively (auto-approves Read/Edit/Write); without it, headless edits
+  are blocked. `acceptEdits` still gates arbitrary Bash/network (least-privilege,
+  appropriate for the isolated workspace).
+- `--model claude-opus-4-7` pins to Opus 4.7 (full name for reproducibility).
 """
 
 from __future__ import annotations
 
 from polyharness.proposer.adapters.base import CLIAdapter
 
+# Pinned Proposer model for the Claude Code backend (highest-capability).
+CLAUDE_CODE_MODEL = "claude-opus-4-7"
+
 
 class ClaudeCodeAdapter(CLIAdapter):
     """Adapter for the Claude Code CLI (`claude`)."""
@@ -24,8 +35,9 @@ def build_command(self, prompt: str, *, cli_path: str | None = None) -> list[str
         binary = cli_path or self.default_binary
         return [
             binary,
-            "-p",                # print mode (non-interactive, stdout output)
+            "-p",                                # print mode (non-interactive)
             prompt,
+            "--model", CLAUDE_CODE_MODEL,        # pin to Opus 4.7
+            "--permission-mode", "acceptEdits",  # auto-approve file edits (headless)
             "--output-format", "text",
-            "--verbose",
         ]
diff --git a/src/polyharness/proposer/adapters/codex.py b/src/polyharness/proposer/adapters/codex.py
@@ -1,6 +1,8 @@
 """Codex CLI adapter.
 
-Invokes OpenAI's `codex` CLI agent in quiet/non-interactive mode.
+Invokes OpenAI's `codex` CLI agent in headless/non-interactive mode via
+`codex exec` (the old `--quiet`/`--auto-edit` flags were removed upstream).
+See: developers.openai.com/codex/noninteractive
 """
 
 from __future__ import annotations
@@ -23,7 +25,8 @@ def build_command(self, prompt: str, *, cli_path: str | None = None) -> list[str
         binary = cli_path or self.default_binary
         return [
             binary,
-            "--quiet",
-            "--auto-edit",       # allow file edits without confirmation
+            "exec",                          # headless, non-interactive mode
+            "--skip-git-repo-check",         # the workspace is not a git repo
+            "--sandbox", "workspace-write",  # allow edits within the workspace cwd
             prompt,
         ]
diff --git a/src/polyharness/proposer/adapters/opencode.py b/src/polyharness/proposer/adapters/opencode.py
@@ -1,6 +1,8 @@
 """OpenCode CLI adapter.
 
-Invokes the open-source `opencode` CLI agent.
+Invokes the open-source `opencode` CLI agent in non-interactive mode via the
+`run` subcommand (the old top-level `-p` flag is no longer supported upstream).
+See: opencode.ai/docs/cli
 """
 
 from __future__ import annotations
@@ -23,6 +25,6 @@ def build_command(self, prompt: str, *, cli_path: str | None = None) -> list[str
         binary = cli_path or self.default_binary
         return [
             binary,
-            "-p",                # prompt mode
+            "run",               # non-interactive mode (replaces old -p)
             prompt,
         ]
diff --git a/src/polyharness/proposer/api_proposer.py b/src/polyharness/proposer/api_proposer.py
@@ -123,7 +123,7 @@ class APIProposer(BaseProposer):
 
     def __init__(
         self,
-        model: str = "claude-sonnet-4-20250514",
+        model: str = "claude-sonnet-4-6",
         max_tokens: int = 16384,
         temperature: float = 0.7,
     ):

diff --git a/tests/test_cli_adapters.py b/tests/test_cli_adapters.py
@@ -50,6 +50,14 @@ def test_claude_code_command():
     assert cmd[0] == "claude"
     assert "-p" in cmd
     assert "do stuff" in cmd
+    # Pinned to Opus 4.7
+    assert "--model" in cmd
+    assert "claude-opus-4-7" in cmd
+    # Headless edits must be auto-approved or the agent can't write candidates
+    assert "--permission-mode" in cmd
+    assert "acceptEdits" in cmd
+    # --verbose is noise in print mode; should be gone
+    assert "--verbose" not in cmd
 
 
 def test_claude_code_custom_path():
@@ -70,14 +78,18 @@ def test_codex_command():
     adapter = CodexAdapter()
     cmd = adapter.build_command("fix it")
     assert cmd[0] == "codex"
-    assert "--quiet" in cmd
+    assert "exec" in cmd                    # headless mode (replaces old --quiet)
+    assert "--skip-git-repo-check" in cmd   # workspace isn't a git repo
+    assert "--quiet" not in cmd             # removed upstream
     assert "fix it" in cmd
 
 
 def test_opencode_command():
     adapter = OpenCodeAdapter()
     cmd = adapter.build_command("optimize")
     assert cmd[0] == "opencode"
+    assert "run" in cmd          # non-interactive subcommand (replaces old -p)
+    assert "-p" not in cmd       # no longer supported upstream
     assert "optimize" in cmd