From b72c9e77584b72b18ebe3d01f1aea8f37a95571b Mon Sep 17 00:00:00 2001 From: weijt606 Date: Thu, 9 Apr 2026 08:11:48 +0200 Subject: [PATCH] feat: add Hermes Agent adapter (8th backend) - New adapter: hermes.py (hermes chat -q for non-interactive mode) - Register in ADAPTER_REGISTRY, config.py Literal, workspace.py AGENTS.md - Update cli.py: --agent choices, shell-hook preexec, display text - Update doctor.py: hermes recommendation - Update both READMEs: backend table, wrap examples, shell-hook, config, project structure, test badge (212 passing) - Add ForgeCode hyperlinks (README.md, README_CN.md, product-development.md) - Add 2 tests: test_hermes_command, test_hermes_custom_path --- README.md | 18 +++++++----- README_CN.md | 18 +++++++----- docs/development/product-development.md | 2 +- src/polyharness/cli.py | 12 ++++---- src/polyharness/config.py | 2 +- src/polyharness/doctor.py | 2 ++ src/polyharness/proposer/adapters/__init__.py | 3 ++ src/polyharness/proposer/adapters/hermes.py | 29 +++++++++++++++++++ src/polyharness/workspace.py | 1 + tests/test_cli_adapters.py | 18 +++++++++++- 10 files changed, 82 insertions(+), 23 deletions(-) create mode 100644 src/polyharness/proposer/adapters/hermes.py diff --git a/README.md b/README.md index b57cd4a..6ff6dc6 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE) [![Python 3.12+](https://img.shields.io/badge/python-3.12+-blue.svg)](https://www.python.org/downloads/) -[![Tests](https://img.shields.io/badge/tests-173%20passing-brightgreen.svg)]() +[![Tests](https://img.shields.io/badge/tests-212%20passing-brightgreen.svg)]() [![中文文档](https://img.shields.io/badge/文档-中文版-red.svg)](README_CN.md) --- @@ -30,7 +30,7 @@ Your AI agent runs the same harness every time. Same prompts, same tool config, | | | |---|---| | **Self-Evolution** | Iteratively searches over harness changes and keeps the full evaluation history in one workspace. | -| **7 Agent Backends** | Claude Code · Claw Code · Codex · OpenCode · API direct · OpenAI-compatible · Local — plug in any CLI agent. | +| **8 Agent Backends** | Claude Code · Claw Code · Codex · Hermes · OpenCode · API direct · OpenAI-compatible · Local — plug in any CLI agent. | | **Full History** | Every iteration's code, scores, and traces preserved. The Meta-Harness paper reports that non-Markovian search outperforms blind retries. | | **Search Tree** | Visualize the optimization path. Compare any two candidates with per-task diffs. | | **One-Command Setup** | `ph init --base-harness ... --task-dir ...` — copies files, configures workspace, done. | @@ -59,7 +59,7 @@ PolyHarness is the open-source engine for iteratively searching over an agent's It builds on ideas from the Meta-Harness paper and the TBench2 results reported there, while focusing this repository on the optimization workflow itself — how harness variants are proposed, evaluated, and revised over repeated runs. -If tools like ForgeCode help you code, PolyHarness helps you search for task-specific harness improvements by iterating on prompts, tool use, and harness logic. +If tools like [ForgeCode](https://github.com/antinomyhq/forgecode) help you code, PolyHarness helps you search for task-specific harness improvements by iterating on prompts, tool use, and harness logic. --- @@ -229,7 +229,7 @@ PolyHarness automatically sandboxes your agent inside this workspace, ensuring i | Scenario | How to configure | |----------|------------------| -| **Supported CLI Tools** | Run `ph init --agent `. PolyHarness auto-injects required instructions (e.g., `CLAUDE.md`).
*(Supported: claude-code, claw-code, codex, opencode)* | +| **Supported CLI Tools** | Run `ph init --agent `. PolyHarness auto-injects required instructions (e.g., `CLAUDE.md`).
*(Supported: claude-code, claw-code, codex, hermes, opencode)* | | **Anthropic API** | Run `ph init --agent api`. Set `export ANTHROPIC_API_KEY="sk-ant-..."` before `ph run`. | | **OpenAI / Local Models** | Run `ph init --agent openai`. Then configure the endpoint — see [Local Model Setup](#local-model-setup) below. | | **Custom CLI path** | If your CLI agent uses a non-standard command, edit `config.yaml` in the workspace before running:
`proposer: { cli_path: "npx @anthropic-ai/claude-code" }`| @@ -298,6 +298,7 @@ Just add `ph wrap --auto-evolve` in front of your agent command (pick the one ma ph wrap --auto-evolve claude -p "Refactor the auth module to use JWT" # Claude Code ph wrap --auto-evolve claw -p "Write integration tests for payments" # Claw Code ph wrap --auto-evolve codex "Add retry logic to the API client" # Codex +ph wrap --auto-evolve hermes chat -q "Refactor the DB connection pool" # Hermes Agent ph wrap --auto-evolve opencode -p "Fix the flaky parser test" # OpenCode # Local models — wrap the CLI command directly @@ -367,10 +368,11 @@ After that, just use your agent as usual: claude -p "Refactor auth to JWT" # automatically becomes: ph wrap --auto-evolve claude -p ... claw -p "Write payment tests" # same — auto-wrapped codex "Add retry logic" # same +hermes chat -q "Refactor pool" # same opencode -p "Fix flaky test" # same ``` -How it works: a `preexec` hook in your shell detects `claude`/`claw`/`codex`/`opencode` commands and transparently redirects them through `ph wrap --auto-evolve`. Your output is unchanged. +How it works: a `preexec` hook in your shell detects `claude`/`claw`/`codex`/`hermes`/`opencode` commands and transparently redirects them through `ph wrap --auto-evolve`. Your output is unchanged. ```bash ph shell-hook status # check if installed @@ -459,12 +461,13 @@ The Proposer reads **all of this** before generating the next candidate. It can | `claude-code` | `claude -p` | Official Claude Code CLI (Pro/Teams subscription) | | `claw-code` | `claw -p` | Open-source Claw Code CLI | | `codex` | `codex --quiet` | OpenAI Codex CLI | +| `hermes` | `hermes chat -q` | Nous Research [Hermes Agent](https://github.com/NousResearch/hermes-agent) CLI | | `opencode` | `opencode -p` | OpenCode CLI | | `local` | — | Offline rule-based engine for development & testing | `ph doctor` auto-detects all available backends and shows their status. -When you run `ph init --agent claude-code`, PolyHarness automatically generates a `CLAUDE.md` instruction file in the workspace, telling the agent how to behave as an optimization Proposer. Same for `CLAW.md`, `CODEX.md`, `OPENCODE.md` — each agent's native instruction format. +When you run `ph init --agent claude-code`, PolyHarness automatically generates a `CLAUDE.md` instruction file in the workspace, telling the agent how to behave as an optimization Proposer. Same for `CLAW.md`, `CODEX.md`, `AGENTS.md` (Hermes), `OPENCODE.md` — each agent's native instruction format. ### Local Model Setup @@ -517,7 +520,7 @@ search: parent_selection: best # Strategy: best | tournament | all proposer: - backend: api # api | openai | claude-code | claw-code | codex | opencode | local + backend: api # api | openai | claude-code | claw-code | codex | hermes | opencode | local model: claude-sonnet-4-20250514 # Model name (for api/openai backends) base_url: null # Custom API endpoint (for openai backend) api_key: null # API key override (null = use env var) @@ -744,6 +747,7 @@ polyharness/ │ │ ├── claude_code.py # claude -p │ │ ├── claw_code.py # claw -p │ │ ├── codex.py # codex --quiet --auto-edit +│ │ ├── hermes.py # hermes chat -q │ │ └── opencode.py # opencode -p │ └── templates/ # 5 built-in task templates │ ├── text-classification/ diff --git a/README_CN.md b/README_CN.md index 24cb054..01b7bee 100644 --- a/README_CN.md +++ b/README_CN.md @@ -15,7 +15,7 @@ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE) [![Python 3.12+](https://img.shields.io/badge/python-3.12+-blue.svg)](https://www.python.org/downloads/) -[![Tests](https://img.shields.io/badge/tests-173%20passing-brightgreen.svg)]() +[![Tests](https://img.shields.io/badge/tests-212%20passing-brightgreen.svg)]() [![English](https://img.shields.io/badge/Docs-English-blue.svg)](README.md) --- @@ -30,7 +30,7 @@ | | | |---|---| | **自动进化** | 通过迭代搜索探索 harness 变更,并把完整评估历史保存在同一个 workspace 中。 | -| **7 个 Agent 后端** | Claude Code · Claw Code · Codex · OpenCode · API 直连 · OpenAI 兼容 · Local,可接入任何 CLI agent。 | +| **8 个 Agent 后端** | Claude Code · Claw Code · Codex · Hermes · OpenCode · API 直连 · OpenAI 兼容 · Local,可接入任何 CLI agent。 | | **完整历史** | 每轮迭代的代码、分数、执行轨迹完整保留。Meta-Harness 论文报告非马尔可夫搜索优于盲目重试。 | | **搜索树** | 可视化优化路径,对比任意两个候选的逐任务差异。 | | **一条命令完成初始化** | `ph init --base-harness ... --task-dir ...`,复制文件、配置 workspace,一步完成。 | @@ -59,7 +59,7 @@ PolyHarness 是一个通过迭代评估与搜索来探索 agent harness 变体 它继承了 Meta-Harness 论文及其中 TBench2 结果所体现的核心思路,但这个仓库关注的是优化流程本身如何落地,也就是 harness 变体怎样在一轮轮评估、诊断和修改中被系统性迭代。 -如果说 ForgeCode 这类工具是在直接帮你写代码,那么 PolyHarness 更像是帮助你在自己的任务上持续试验 prompt、工具使用和 harness 逻辑配置的搜索层。 +如果说 [ForgeCode](https://github.com/antinomyhq/forgecode) 这类工具是在直接帮你写代码,那么 PolyHarness 更像是帮助你在自己的任务上持续试验 prompt、工具使用和 harness 逻辑配置的搜索层。 --- @@ -229,7 +229,7 @@ PolyHarness 会通过沙盒编排将你的 Agent 的工作目录(CWD)限制 | 使用场景 | 配置方法 | |----------|------------------| -| **受原生支持的 CLI Agent 工具** | 使用 `ph init --agent `。系统会自动注入其专属提示词指令(如 `CLAUDE.md`)。
*(支持: claude-code, claw-code, codex, opencode)* | +| **受原生支持的 CLI Agent 工具** | 使用 `ph init --agent `。系统会自动注入其专属提示词指令(如 `CLAUDE.md`)。
*(支持: claude-code, claw-code, codex, hermes, opencode)* | | **Anthropic API 直连** | 使用 `ph init --agent api`。在 `ph run` 前设置 `export ANTHROPIC_API_KEY="sk-ant-..."`。 | | **OpenAI / 本地模型** | 使用 `ph init --agent openai`。然后配置 endpoint——参见下方 [本地模型配置](#本地模型配置) 章节。 | | **CLI 命令被自定义 / 路径未响应** | 如果你的 CLI Agent 使用了非标命令(或未设置全局 PATH),请在初始化后手动修改 workspace 根目录下的 `config.yaml`:
`proposer: { cli_path: "npx @anthropic-ai/claude-code" }` | @@ -298,6 +298,7 @@ ph clean --keep-best # 清理候选目录释放磁盘空间 ph wrap --auto-evolve claude -p "把 auth 模块重构为 JWT 方案" # Claude Code ph wrap --auto-evolve claw -p "给支付服务写集成测试" # Claw Code ph wrap --auto-evolve codex "给 API 客户端加上重试逻辑" # Codex +ph wrap --auto-evolve hermes chat -q "重构数据库连接池" # Hermes Agent ph wrap --auto-evolve opencode -p "修复不稳定的 parser 测试" # OpenCode # 本地模型 —— 直接包裹 CLI 命令 @@ -367,10 +368,11 @@ ph shell-hook install # 一次性设置,写入 ~/.zshrc claude -p "把 auth 重构为 JWT" # 自动变为:ph wrap --auto-evolve claude -p ... claw -p "写支付测试" # 同理——自动包裹 codex "加重试逻辑" # 同理 +hermes chat -q "重构连接池" # 同理 opencode -p "修复不稳定测试" # 同理 ``` -原理:shell 的 `preexec` 钩子检测到 `claude`/`claw`/`codex`/`opencode` 命令后,透明地通过 `ph wrap --auto-evolve` 转发。你的输出不会变。 +原理:shell 的 `preexec` 钩子检测到 `claude`/`claw`/`codex`/`hermes`/`opencode` 命令后,透明地通过 `ph wrap --auto-evolve` 转发。你的输出不会变。 ```bash ph shell-hook status # 查看是否已安装 @@ -459,12 +461,13 @@ Proposer 在生成下一个候选之前会读取**所有这些信息**。它能 | `claude-code` | `claude -p` | 官方 Claude Code CLI(Pro/Teams 订阅) | | `claw-code` | `claw -p` | 开源 Claw Code CLI | | `codex` | `codex --quiet` | OpenAI Codex CLI | +| `hermes` | `hermes chat -q` | Nous Research [Hermes Agent](https://github.com/NousResearch/hermes-agent) CLI | | `opencode` | `opencode -p` | OpenCode CLI | | `local` | — | 离线规则引擎,用于开发和测试 | `ph doctor` 会自动检测所有可用后端并显示状态。 -当你运行 `ph init --agent claude-code` 时,PolyHarness 会在 workspace 中自动生成 `CLAUDE.md` 指令文件,告诉 agent 如何作为优化 Proposer 工作。`CLAW.md`、`CODEX.md`、`OPENCODE.md` 也是同样的机制,每个 agent 都使用它自己的原生指令格式。 +当你运行 `ph init --agent claude-code` 时,PolyHarness 会在 workspace 中自动生成 `CLAUDE.md` 指令文件,告诉 agent 如何作为优化 Proposer 工作。`CLAW.md`、`CODEX.md`、`AGENTS.md`(Hermes)、`OPENCODE.md` 也是同样的机制,每个 agent 都使用它自己的原生指令格式。 ### 本地模型配置 @@ -517,7 +520,7 @@ search: parent_selection: best # 父候选选择策略: best | tournament | all proposer: - backend: api # api | openai | claude-code | claw-code | codex | opencode | local + backend: api # api | openai | claude-code | claw-code | codex | hermes | opencode | local model: claude-sonnet-4-20250514 # 模型名称(api/openai 后端使用) base_url: null # 自定义 API 端点(openai 后端使用) api_key: null # API 密钥覆盖(null = 使用环境变量) @@ -744,6 +747,7 @@ polyharness/ │ │ ├── claude_code.py # claude -p │ │ ├── claw_code.py # claw -p │ │ ├── codex.py # codex --quiet --auto-edit +│ │ ├── hermes.py # hermes chat -q │ │ └── opencode.py # opencode -p │ └── templates/ # 5 个内置任务模板 │ ├── text-classification/ diff --git a/docs/development/product-development.md b/docs/development/product-development.md index 3c6facb..87d81c8 100644 --- a/docs/development/product-development.md +++ b/docs/development/product-development.md @@ -555,7 +555,7 @@ PolyHarness 是面向 AI agent 的开源优化引擎,用迭代搜索把 Meta-H **项目边界**: - 它优化的是现有 agent 的工作方式,而不是替代它们成为新的通用 coding agent。 -- 它更适合作为 Claude Code、Codex、ForgeCode 这类 agent 之上的搜索与改进引擎。 +- 它更适合作为 Claude Code、Codex、[ForgeCode](https://github.com/antinomyhq/forgecode) 这类 agent 之上的搜索与改进引擎。 - 它的价值在于把 prompt、工具配置、harness 逻辑和评估反馈连接成一个可重复运行的闭环。 ## 8. 与现有研究和生态的关系 diff --git a/src/polyharness/cli.py b/src/polyharness/cli.py index 4a97ccc..c364703 100644 --- a/src/polyharness/cli.py +++ b/src/polyharness/cli.py @@ -155,7 +155,7 @@ def new(project_dir: str): @click.option( "--agent", type=click.Choice( - ["claude-code", "claw-code", "codex", "opencode", "api", "openai", "local"], + ["claude-code", "claw-code", "codex", "hermes", "opencode", "api", "openai", "local"], case_sensitive=False, ), default="api", @@ -239,7 +239,7 @@ def init( @click.option( "--backend", type=click.Choice( - ["api", "openai", "claude-code", "claw-code", "codex", "opencode", "local"], + ["api", "openai", "claude-code", "claw-code", "codex", "hermes", "opencode", "local"], case_sensitive=False, ), default=None, @@ -1792,7 +1792,7 @@ def evolve(workspace: str, store: str | None, max_iterations: int | None): command -v ph >/dev/null 2>&1 || return local cmd="$1" case "$cmd" in - claude\ *|claw\ *|codex\ *|opencode\ *) + claude\ *|claw\ *|codex\ *|hermes\ *|opencode\ *) eval "ph wrap --auto-evolve $cmd" # Return non-zero to prevent original command from running (zsh preexec) return 1 @@ -1856,7 +1856,7 @@ def install(rc: str | None): """Install shell hook to auto-wrap agent commands. Adds a preexec hook to your shell rc file so that commands like - `claude -p ...`, `claw -p ...`, `codex ...`, `opencode -p ...` + `claude -p ...`, `claw -p ...`, `codex ...`, `hermes chat -q ...`, `opencode -p ...` are automatically wrapped with `ph wrap --auto-evolve`. """ rc_path = Path(rc) if rc else _detect_shell_rc() @@ -1873,7 +1873,7 @@ def install(rc: str | None): console.print(f"Run [bold]source {rc_path}[/bold] or open a new terminal to activate.") console.print() console.print("Agent commands that will be auto-wrapped:") - console.print(" claude, claw, codex, opencode") + console.print(" claude, claw, codex, hermes, opencode") console.print() console.print("To remove: [bold]ph shell-hook uninstall[/bold]") @@ -1918,7 +1918,7 @@ def hook_status(rc: str | None): if _hook_installed(rc_path): console.print(f"[green]Hook is installed in {rc_path}[/green]") - console.print("Auto-wrapped commands: claude, claw, codex, opencode") + console.print("Auto-wrapped commands: claude, claw, codex, hermes, opencode") else: console.print(f"[yellow]Hook is not installed[/yellow] ({rc_path})") console.print("Run [bold]ph shell-hook install[/bold] to set it up.") diff --git a/src/polyharness/config.py b/src/polyharness/config.py index 2244142..d2f47a0 100644 --- a/src/polyharness/config.py +++ b/src/polyharness/config.py @@ -24,7 +24,7 @@ class SearchConfig(BaseModel): class ProposerConfig(BaseModel): """Proposer agent configuration.""" - backend: Literal["api", "openai", "claude-code", "claw-code", "codex", "opencode", "local"] = Field( + backend: Literal["api", "openai", "claude-code", "claw-code", "codex", "hermes", "opencode", "local"] = Field( default="api", description="Proposer backend type." ) model: str = Field( diff --git a/src/polyharness/doctor.py b/src/polyharness/doctor.py index 808689e..4882bef 100644 --- a/src/polyharness/doctor.py +++ b/src/polyharness/doctor.py @@ -45,6 +45,8 @@ def run_doctor() -> None: click.echo("Recommended: claude-code (highest paper fidelity)") elif "claw-code" in available: click.echo("Recommended: claw-code (open-source, full tool support)") + elif "hermes" in available: + click.echo("Recommended: hermes (self-improving agent with skills)") elif "codex" in available: click.echo("Recommended: codex (OpenAI agent)") elif "opencode" in available: diff --git a/src/polyharness/proposer/adapters/__init__.py b/src/polyharness/proposer/adapters/__init__.py index ab09660..fd0d4a1 100644 --- a/src/polyharness/proposer/adapters/__init__.py +++ b/src/polyharness/proposer/adapters/__init__.py @@ -10,12 +10,14 @@ from polyharness.proposer.adapters.claude_code import ClaudeCodeAdapter from polyharness.proposer.adapters.claw_code import ClawCodeAdapter from polyharness.proposer.adapters.codex import CodexAdapter +from polyharness.proposer.adapters.hermes import HermesAdapter from polyharness.proposer.adapters.opencode import OpenCodeAdapter ADAPTER_REGISTRY: dict[str, type[CLIAdapter]] = { "claude-code": ClaudeCodeAdapter, "claw-code": ClawCodeAdapter, "codex": CodexAdapter, + "hermes": HermesAdapter, "opencode": OpenCodeAdapter, } @@ -38,6 +40,7 @@ def get_adapter(backend: str) -> CLIAdapter: "ClaudeCodeAdapter", "ClawCodeAdapter", "CodexAdapter", + "HermesAdapter", "OpenCodeAdapter", "ADAPTER_REGISTRY", "get_adapter", diff --git a/src/polyharness/proposer/adapters/hermes.py b/src/polyharness/proposer/adapters/hermes.py new file mode 100644 index 0000000..d62b601 --- /dev/null +++ b/src/polyharness/proposer/adapters/hermes.py @@ -0,0 +1,29 @@ +"""Hermes Agent CLI adapter. + +Invokes the Nous Research `hermes` CLI in single-query mode (-q). +""" + +from __future__ import annotations + +from polyharness.proposer.adapters.base import CLIAdapter + + +class HermesAdapter(CLIAdapter): + """Adapter for the Hermes Agent CLI (`hermes`).""" + + @property + def name(self) -> str: + return "hermes" + + @property + def default_binary(self) -> str: + return "hermes" + + def build_command(self, prompt: str, *, cli_path: str | None = None) -> list[str]: + binary = cli_path or self.default_binary + return [ + binary, + "chat", + "-q", # single-query mode (non-interactive) + prompt, + ] diff --git a/src/polyharness/workspace.py b/src/polyharness/workspace.py index 20d8110..9cfcfb6 100644 --- a/src/polyharness/workspace.py +++ b/src/polyharness/workspace.py @@ -292,6 +292,7 @@ def _update_leaderboard(self) -> None: "claude-code": "CLAUDE.md", "claw-code": "CLAW.md", "codex": "CODEX.md", + "hermes": "AGENTS.md", "opencode": "OPENCODE.md", } diff --git a/tests/test_cli_adapters.py b/tests/test_cli_adapters.py index 8c0b993..401454e 100644 --- a/tests/test_cli_adapters.py +++ b/tests/test_cli_adapters.py @@ -14,6 +14,7 @@ CLIAdapter, CLIResult, CodexAdapter, + HermesAdapter, OpenCodeAdapter, get_adapter, ) @@ -24,7 +25,7 @@ # --------------------------------------------------------------------------- def test_registry_has_all_backends(): - assert set(ADAPTER_REGISTRY) == {"claude-code", "claw-code", "codex", "opencode"} + assert set(ADAPTER_REGISTRY) == {"claude-code", "claw-code", "codex", "hermes", "opencode"} def test_get_adapter_valid(): @@ -80,6 +81,21 @@ def test_opencode_command(): assert "optimize" in cmd +def test_hermes_command(): + adapter = HermesAdapter() + cmd = adapter.build_command("improve harness") + assert cmd[0] == "hermes" + assert "chat" in cmd + assert "-q" in cmd + assert "improve harness" in cmd + + +def test_hermes_custom_path(): + adapter = HermesAdapter() + cmd = adapter.build_command("x", cli_path="/usr/local/bin/hermes") + assert cmd[0] == "/usr/local/bin/hermes" + + # --------------------------------------------------------------------------- # Adapter — parse_output # ---------------------------------------------------------------------------