From 9fdfeb7cbf4c620a0d2645db80233ff2a47e8cff Mon Sep 17 00:00:00 2001 From: Spherrrical Date: Mon, 4 May 2026 12:57:53 -0700 Subject: [PATCH 01/11] feat(claude-cli): add local Claude Code CLI provider bridge MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Spawn the local `claude` binary as a subprocess and expose it as an Anthropic Messages-compatible provider. Hosted in brightstaff (`CLAUDE_CLI_LISTEN_ADDR`), with session reuse, idle TTL, and watchdog. User-facing surface is `model_providers: [{ model: claude-cli/* }]` — the Python CLI auto-fills name/provider_interface/base_url/access_key and the launcher (native + supervisord) enables the bridge listener only when at least one claude-cli provider is present. --- cli/planoai/config_generator.py | 59 ++ cli/planoai/native_runner.py | 60 ++ cli/test/test_config_generator.py | 66 +- cli/test/test_native_runner_claude_cli.py | 112 ++ config/plano_config_schema.yaml | 2 + config/supervisord.conf | 8 + .../src/handlers/claude_cli/mod.rs | 22 + .../src/handlers/claude_cli/process.rs | 330 ++++++ .../src/handlers/claude_cli/server.rs | 335 ++++++ .../src/handlers/claude_cli/session.rs | 341 +++++++ crates/brightstaff/src/handlers/mod.rs | 1 + crates/brightstaff/src/main.rs | 83 +- crates/brightstaff/tests/claude_cli_bridge.rs | 190 ++++ .../brightstaff/tests/fixtures/fake_claude.sh | 26 + crates/common/src/configuration.rs | 6 + crates/hermesllm/src/apis/claude_cli.rs | 955 ++++++++++++++++++ crates/hermesllm/src/apis/mod.rs | 1 + crates/hermesllm/src/bin/provider_models.yaml | 13 + crates/hermesllm/src/providers/id.rs | 17 + crates/hermesllm/tests/claude_cli_fixtures.rs | 114 +++ .../fixtures/claude_cli/error_response.ndjson | 3 + .../claude_cli/retry_then_success.ndjson | 10 + .../fixtures/claude_cli/text_response.ndjson | 10 + .../claude_cli/tool_use_response.ndjson | 9 + demos/integrations/claude_cli/README.md | 49 + demos/integrations/claude_cli/config.yaml | 27 + 26 files changed, 2847 insertions(+), 2 deletions(-) create mode 100644 cli/test/test_native_runner_claude_cli.py create mode 100644 crates/brightstaff/src/handlers/claude_cli/mod.rs create mode 100644 crates/brightstaff/src/handlers/claude_cli/process.rs create mode 100644 crates/brightstaff/src/handlers/claude_cli/server.rs create mode 100644 crates/brightstaff/src/handlers/claude_cli/session.rs create mode 100644 crates/brightstaff/tests/claude_cli_bridge.rs create mode 100755 crates/brightstaff/tests/fixtures/fake_claude.sh create mode 100644 crates/hermesllm/src/apis/claude_cli.rs create mode 100644 crates/hermesllm/tests/claude_cli_fixtures.rs create mode 100644 crates/hermesllm/tests/fixtures/claude_cli/error_response.ndjson create mode 100644 crates/hermesllm/tests/fixtures/claude_cli/retry_then_success.ndjson create mode 100644 crates/hermesllm/tests/fixtures/claude_cli/text_response.ndjson create mode 100644 crates/hermesllm/tests/fixtures/claude_cli/tool_use_response.ndjson create mode 100644 demos/integrations/claude_cli/README.md create mode 100644 demos/integrations/claude_cli/config.yaml diff --git a/cli/planoai/config_generator.py b/cli/planoai/config_generator.py index cb07767e0..273e5061b 100644 --- a/cli/planoai/config_generator.py +++ b/cli/planoai/config_generator.py @@ -39,11 +39,64 @@ CHATGPT_DEFAULT_ORIGINATOR = "codex_cli_rs" CHATGPT_DEFAULT_USER_AGENT = "codex_cli_rs/0.0.0 (Unknown 0; unknown) unknown" +# Local-only bridge that runs Claude Code CLI as a subprocess. Hosted by +# brightstaff on this loopback address; the Python CLI auto-fills the matching +# provider fields below and tells the launcher to enable the bridge. +CLAUDE_CLI_DEFAULT_BASE_URL = "http://127.0.0.1:14001" +CLAUDE_CLI_DEFAULT_LISTEN_ADDR = "127.0.0.1:14001" +CLAUDE_CLI_DEFAULT_NAME = "claude-cli/*" +CLAUDE_CLI_DEFAULT_ACCESS_KEY_PLACEHOLDER = "claude-cli-local" + SUPPORTED_PROVIDERS = ( SUPPORTED_PROVIDERS_WITHOUT_BASE_URL + SUPPORTED_PROVIDERS_WITH_BASE_URL ) +def _is_claude_cli_provider(model_provider): + """Return True iff this provider entry refers to the local claude-cli + bridge. Triggered by any of `model`, `name`, or `provider_interface` + matching the `claude-cli/...` namespace. + """ + model = (model_provider.get("model") or "").strip() + name = (model_provider.get("name") or "").strip() + interface = (model_provider.get("provider_interface") or "").strip() + return ( + model.startswith("claude-cli/") + or name.startswith("claude-cli/") + or interface == "claude-cli" + ) + + +def _apply_claude_cli_autofill(model_provider): + """Fill in implicit fields for `claude-cli/*` provider entries so the + user only has to write `model: claude-cli/*` (or any `claude-cli/...`) + and everything else is wired automatically: a localhost cluster pointing + at the brightstaff bridge, the `claude-cli` provider_interface, and a + placeholder access key so downstream validation does not reject the entry. + + Returns True iff this entry was treated as a claude-cli provider (so the + caller can flip the launcher's `needs_claude_cli_runtime` flag). + """ + if not _is_claude_cli_provider(model_provider): + return False + + if not model_provider.get("name"): + model_provider["name"] = model_provider.get("model") or CLAUDE_CLI_DEFAULT_NAME + if not model_provider.get("provider_interface"): + model_provider["provider_interface"] = "claude-cli" + if not model_provider.get("base_url"): + model_provider["base_url"] = CLAUDE_CLI_DEFAULT_BASE_URL + # Keep passthrough_auth users alone; the bridge ignores the access key + # anyway (it uses the host's `claude auth login` keychain), so a + # placeholder is fine for everyone else. + if not model_provider.get("access_key") and not model_provider.get( + "passthrough_auth" + ): + model_provider["access_key"] = CLAUDE_CLI_DEFAULT_ACCESS_KEY_PLACEHOLDER + + return True + + def get_endpoint_and_port(endpoint, protocol): endpoint_tokens = endpoint.split(":") if len(endpoint_tokens) > 1: @@ -329,6 +382,12 @@ def validate_and_render_schema(): name = listener.get("name", None) for model_provider in listener.get("model_providers", []): + # Auto-fill the implicit fields for `claude-cli/*` providers + # before the rest of the loop runs validation. This makes + # `model_providers: [{model: claude-cli/*}]` a fully-formed + # entry by the time we reach the wildcard checks below. + _apply_claude_cli_autofill(model_provider) + if model_provider.get("usage", None): llms_with_usage.append(model_provider["name"]) if model_provider.get("name") in model_provider_name_set: diff --git a/cli/planoai/native_runner.py b/cli/planoai/native_runner.py index 1b58b36d6..91a8f2531 100644 --- a/cli/planoai/native_runner.py +++ b/cli/planoai/native_runner.py @@ -22,6 +22,61 @@ log = getLogger(__name__) +CLAUDE_CLI_DEFAULT_LISTEN_ADDR = "127.0.0.1:14001" +# Env vars the user can set to customize the bridge. We always honor a +# pre-set CLAUDE_CLI_LISTEN_ADDR (so power users can move the listener) +# but otherwise inject the default whenever a claude-cli provider is +# detected in the rendered config. +CLAUDE_CLI_PASSTHROUGH_ENV = ( + "CLAUDE_CLI_LISTEN_ADDR", + "CLAUDE_CLI_BIN", + "CLAUDE_CLI_PERMISSION_MODE", + "CLAUDE_CLI_SESSION_TTL_SECS", + "CLAUDE_CLI_WATCHDOG_SECS", + "CLAUDE_CLI_MAX_SESSIONS", +) + + +def _needs_claude_cli_runtime(plano_config_rendered_path) -> bool: + """True iff the rendered config has at least one model_provider whose + `provider_interface` is `claude-cli`. The Python config_generator + auto-fills this field when it sees a `claude-cli/*` model entry, so the + detection is one-step regardless of how the user wrote the original + provider line. + """ + import yaml + + try: + with open(plano_config_rendered_path, "r") as f: + rendered = yaml.safe_load(f) or {} + except FileNotFoundError: + return False + for provider in rendered.get("model_providers") or []: + if (provider or {}).get("provider_interface") == "claude-cli": + return True + return False + + +def _apply_claude_cli_env(brightstaff_env, plano_config_rendered_path): + """If the rendered config opts into the claude-cli bridge, ensure + `CLAUDE_CLI_LISTEN_ADDR` is set in the brightstaff process environment so + the bridge listener actually starts. Honors any pre-set values from the + caller's env (so users can override the listen address, binary path, or + permission mode without editing this file). + """ + if not _needs_claude_cli_runtime(plano_config_rendered_path): + return False + if not brightstaff_env.get("CLAUDE_CLI_LISTEN_ADDR"): + brightstaff_env["CLAUDE_CLI_LISTEN_ADDR"] = CLAUDE_CLI_DEFAULT_LISTEN_ADDR + for key in CLAUDE_CLI_PASSTHROUGH_ENV: + if key in os.environ and key not in brightstaff_env: + brightstaff_env[key] = os.environ[key] + log.info( + "claude-cli bridge enabled: brightstaff will listen on %s", + brightstaff_env["CLAUDE_CLI_LISTEN_ADDR"], + ) + return True + def _find_config_dir(): """Locate the directory containing plano_config_schema.yaml and envoy.template.yaml. @@ -197,6 +252,11 @@ def start_native( for key, value in env.items(): brightstaff_env[key] = value + # Enable the claude-cli bridge if the rendered config asks for it. Done + # after `env.items()` is merged so user-set CLAUDE_CLI_* env vars take + # precedence over the auto-injected defaults. + _apply_claude_cli_env(brightstaff_env, plano_config_rendered_path) + brightstaff_pid = _daemon_exec( [brightstaff_path], brightstaff_env, diff --git a/cli/test/test_config_generator.py b/cli/test/test_config_generator.py index 77b5b4803..e1ba5a74a 100644 --- a/cli/test/test_config_generator.py +++ b/cli/test/test_config_generator.py @@ -3,8 +3,11 @@ import yaml from unittest import mock from planoai.config_generator import ( - validate_and_render_schema, + CLAUDE_CLI_DEFAULT_BASE_URL, + _apply_claude_cli_autofill, + _is_claude_cli_provider, migrate_inline_routing_preferences, + validate_and_render_schema, ) @@ -738,3 +741,64 @@ def test_migration_does_not_downgrade_newer_versions(): migrate_inline_routing_preferences(config_yaml) assert config_yaml["version"] == "v0.5.0" + + +def test_claude_cli_autofill_wildcard_provider(): + provider = {"model": "claude-cli/*"} + assert _is_claude_cli_provider(provider) is True + assert _apply_claude_cli_autofill(provider) is True + assert provider["name"] == "claude-cli/*" + assert provider["provider_interface"] == "claude-cli" + assert provider["base_url"] == CLAUDE_CLI_DEFAULT_BASE_URL + assert provider["access_key"] == "claude-cli-local" + # `model` itself must not be rewritten — the wildcard expansion happens + # downstream and we want to preserve the user's intent. + assert provider["model"] == "claude-cli/*" + + +def test_claude_cli_autofill_specific_model(): + provider = {"model": "claude-cli/sonnet", "default": True} + assert _apply_claude_cli_autofill(provider) is True + assert provider["name"] == "claude-cli/sonnet" + assert provider["provider_interface"] == "claude-cli" + assert provider["base_url"] == CLAUDE_CLI_DEFAULT_BASE_URL + # Existing fields like `default` survive. + assert provider["default"] is True + + +def test_claude_cli_autofill_does_not_override_user_fields(): + provider = { + "model": "claude-cli/*", + "name": "custom-name", + "base_url": "http://192.0.2.10:9000", + "access_key": "do-not-touch", + } + assert _apply_claude_cli_autofill(provider) is True + assert provider["name"] == "custom-name" + assert provider["base_url"] == "http://192.0.2.10:9000" + assert provider["access_key"] == "do-not-touch" + # provider_interface still gets injected because it was missing. + assert provider["provider_interface"] == "claude-cli" + + +def test_claude_cli_autofill_skips_non_matching_providers(): + provider = {"model": "openai/gpt-4o"} + assert _is_claude_cli_provider(provider) is False + assert _apply_claude_cli_autofill(provider) is False + assert "provider_interface" not in provider + + +def test_claude_cli_autofill_passthrough_auth_skips_access_key(): + provider = {"model": "claude-cli/*", "passthrough_auth": True} + assert _apply_claude_cli_autofill(provider) is True + # Honor passthrough_auth: do not inject a placeholder access_key. + assert "access_key" not in provider + assert provider["passthrough_auth"] is True + + +def test_claude_cli_autofill_detects_via_provider_interface_only(): + provider = {"model": "sonnet", "provider_interface": "claude-cli"} + assert _is_claude_cli_provider(provider) is True + assert _apply_claude_cli_autofill(provider) is True + assert provider["base_url"] == CLAUDE_CLI_DEFAULT_BASE_URL + assert provider["name"] == "sonnet" diff --git a/cli/test/test_native_runner_claude_cli.py b/cli/test/test_native_runner_claude_cli.py new file mode 100644 index 000000000..a7bb495e1 --- /dev/null +++ b/cli/test/test_native_runner_claude_cli.py @@ -0,0 +1,112 @@ +"""Unit tests for the claude-cli env wiring in native_runner.py.""" + +import os +import textwrap + +from planoai.native_runner import ( + CLAUDE_CLI_DEFAULT_LISTEN_ADDR, + _apply_claude_cli_env, + _needs_claude_cli_runtime, +) + + +def _write(path, body): + path.write_text(textwrap.dedent(body).lstrip()) + return str(path) + + +def test_needs_claude_cli_runtime_detects_provider(tmp_path): + rendered = _write( + tmp_path / "rendered.yaml", + """ + version: v0.4.0 + listeners: [] + model_providers: + - name: claude-cli/* + model: '*' + provider_interface: claude-cli + base_url: http://127.0.0.1:14001 + """, + ) + assert _needs_claude_cli_runtime(rendered) is True + + +def test_needs_claude_cli_runtime_skips_other_providers(tmp_path): + rendered = _write( + tmp_path / "rendered.yaml", + """ + version: v0.4.0 + model_providers: + - name: openai/gpt-4o + model: gpt-4o + provider_interface: openai + """, + ) + assert _needs_claude_cli_runtime(rendered) is False + + +def test_needs_claude_cli_runtime_handles_missing_file(tmp_path): + assert _needs_claude_cli_runtime(str(tmp_path / "does-not-exist.yaml")) is False + + +def test_apply_claude_cli_env_injects_default_addr(tmp_path, monkeypatch): + rendered = _write( + tmp_path / "rendered.yaml", + """ + model_providers: + - provider_interface: claude-cli + model: '*' + """, + ) + monkeypatch.delenv("CLAUDE_CLI_LISTEN_ADDR", raising=False) + monkeypatch.delenv("CLAUDE_CLI_BIN", raising=False) + env = {} + assert _apply_claude_cli_env(env, rendered) is True + assert env["CLAUDE_CLI_LISTEN_ADDR"] == CLAUDE_CLI_DEFAULT_LISTEN_ADDR + + +def test_apply_claude_cli_env_honors_user_override(tmp_path, monkeypatch): + rendered = _write( + tmp_path / "rendered.yaml", + """ + model_providers: + - provider_interface: claude-cli + model: '*' + """, + ) + monkeypatch.delenv("CLAUDE_CLI_LISTEN_ADDR", raising=False) + env = {"CLAUDE_CLI_LISTEN_ADDR": "127.0.0.1:25000"} + assert _apply_claude_cli_env(env, rendered) is True + assert env["CLAUDE_CLI_LISTEN_ADDR"] == "127.0.0.1:25000" + + +def test_apply_claude_cli_env_passes_through_user_env(tmp_path, monkeypatch): + rendered = _write( + tmp_path / "rendered.yaml", + """ + model_providers: + - provider_interface: claude-cli + model: '*' + """, + ) + monkeypatch.delenv("CLAUDE_CLI_LISTEN_ADDR", raising=False) + monkeypatch.setenv("CLAUDE_CLI_BIN", "/usr/local/bin/claude-test") + monkeypatch.setenv("CLAUDE_CLI_PERMISSION_MODE", "default") + env = {} + assert _apply_claude_cli_env(env, rendered) is True + assert env["CLAUDE_CLI_BIN"] == "/usr/local/bin/claude-test" + assert env["CLAUDE_CLI_PERMISSION_MODE"] == "default" + + +def test_apply_claude_cli_env_noop_for_other_configs(tmp_path): + rendered = _write( + tmp_path / "rendered.yaml", + """ + model_providers: + - provider_interface: openai + model: gpt-4o + """, + ) + env = {} + assert _apply_claude_cli_env(env, rendered) is False + assert "CLAUDE_CLI_LISTEN_ADDR" not in env diff --git a/config/plano_config_schema.yaml b/config/plano_config_schema.yaml index 9560b4376..10ad86cef 100644 --- a/config/plano_config_schema.yaml +++ b/config/plano_config_schema.yaml @@ -184,6 +184,7 @@ properties: enum: - plano - claude + - claude-cli - deepseek - groq - mistral @@ -242,6 +243,7 @@ properties: enum: - plano - claude + - claude-cli - deepseek - groq - mistral diff --git a/config/supervisord.conf b/config/supervisord.conf index a28691360..f2095c8e0 100644 --- a/config/supervisord.conf +++ b/config/supervisord.conf @@ -18,8 +18,16 @@ stdout_logfile_maxbytes=0 stderr_logfile_maxbytes=0 [program:brightstaff] +# CLAUDE_CLI_LISTEN_ADDR is set automatically when the rendered config has at +# least one provider with `provider_interface: claude-cli` (the Python config +# generator auto-fills that field for any `model: claude-cli/*` entry). The +# bridge listener stays off otherwise — matches native_runner.py behavior. command=sh -c "\ while [ ! -f /tmp/config_ready ]; do echo '[brightstaff] Waiting for config generation...'; sleep 0.5; done && \ + if grep -q 'provider_interface: claude-cli' /app/plano_config_rendered.env_sub.yaml 2>/dev/null; then \ + export CLAUDE_CLI_LISTEN_ADDR=${CLAUDE_CLI_LISTEN_ADDR:-127.0.0.1:14001}; \ + echo '[brightstaff] claude-cli bridge enabled on '$CLAUDE_CLI_LISTEN_ADDR; \ + fi; \ RUST_LOG=${LOG_LEVEL:-info} \ PLANO_CONFIG_PATH_RENDERED=/app/plano_config_rendered.env_sub.yaml \ /app/brightstaff 2>&1 | \ diff --git a/crates/brightstaff/src/handlers/claude_cli/mod.rs b/crates/brightstaff/src/handlers/claude_cli/mod.rs new file mode 100644 index 000000000..89fff8ee5 --- /dev/null +++ b/crates/brightstaff/src/handlers/claude_cli/mod.rs @@ -0,0 +1,22 @@ +//! Bridge that exposes the local `claude` CLI as an Anthropic Messages API +//! endpoint on a localhost port, allowing it to be used as just another +//! `model_provider` in Plano. +//! +//! Wire-up: +//! - `process` — spawns and manages the `claude -p --output-format stream-json +//! --input-format stream-json` subprocess. +//! - `session` — keys long-lived processes by session id (header or hash) and +//! enforces idle TTL / cap. +//! - `server` — hyper listener that speaks `POST /v1/messages` and bridges +//! between Anthropic SSE and the CLI's NDJSON. +//! +//! Translation between the two wire formats lives in +//! `hermesllm::apis::claude_cli`; this module only owns runtime concerns. + +pub mod process; +pub mod server; +pub mod session; + +pub use process::{ClaudeCliConfig, ClaudeProcess, ProcessError}; +pub use server::run_listener; +pub use session::{SessionManager, SessionManagerConfig, SESSION_HEADER}; diff --git a/crates/brightstaff/src/handlers/claude_cli/process.rs b/crates/brightstaff/src/handlers/claude_cli/process.rs new file mode 100644 index 000000000..6a19943cc --- /dev/null +++ b/crates/brightstaff/src/handlers/claude_cli/process.rs @@ -0,0 +1,330 @@ +//! Manages the lifetime of one `claude -p` child process for a single +//! conversation session. Spawning, env scrubbing, NDJSON line reading and the +//! per-line watchdog all live here. Translation between Anthropic Messages +//! and stream-json lives in `hermesllm::apis::claude_cli`. + +use std::process::Stdio; +use std::sync::Arc; +use std::time::Duration; + +use hermesllm::apis::claude_cli::{parse_ndjson_line, ClaudeCliEvent, ClaudeCliInputEvent}; +use thiserror::Error; +use tokio::io::{AsyncBufReadExt, AsyncWriteExt, BufReader}; +use tokio::process::{Child, ChildStdin, Command}; +use tokio::sync::{mpsc, Mutex, OwnedMutexGuard}; +use tokio::time::{self, Instant}; +use tracing::{debug, info, warn}; + +/// Tunables for one `ClaudeProcess`. Defaults match the OpenClaw reference +/// configuration: `bypassPermissions`, ~120 s watchdog window, ~10 min idle TTL. +#[derive(Debug, Clone)] +pub struct ClaudeCliConfig { + /// Path or name of the `claude` binary (looked up via `$PATH`). + pub binary: String, + /// Value passed to `--permission-mode`. The CLI accepts `default`, + /// `acceptEdits`, `plan`, `auto`, `dontAsk`, `bypassPermissions`. + pub permission_mode: String, + /// Idle session TTL — after this many seconds without a request the + /// session manager kills the child. + pub session_ttl: Duration, + /// Per-line watchdog: if no NDJSON line arrives for this long during a + /// turn, kill the child. Reset on every line (not every byte). + pub watchdog: Duration, +} + +impl Default for ClaudeCliConfig { + fn default() -> Self { + Self { + binary: "claude".to_string(), + permission_mode: "bypassPermissions".to_string(), + session_ttl: Duration::from_secs(600), + watchdog: Duration::from_secs(120), + } + } +} + +/// Errors produced while interacting with the child process. +#[derive(Debug, Error)] +pub enum ProcessError { + #[error("failed to spawn `{binary}`: {source}")] + Spawn { + binary: String, + #[source] + source: std::io::Error, + }, + #[error("failed to write to claude stdin: {0}")] + StdinWrite(#[source] std::io::Error), + #[error("claude process exited unexpectedly")] + ExitedEarly, + #[error("claude watchdog fired after {0:?} of silence")] + WatchdogTimeout(Duration), + #[error("failed to serialize stdin payload: {0}")] + Serialize(#[from] serde_json::Error), + #[error("turn already in progress for this session")] + TurnInProgress, +} + +/// Strip down to the model alias / id the CLI's `--model` flag accepts. +/// Models registered via the wildcard `claude-cli/*` arrive prefixed with +/// `claude-cli/` (or just bare, e.g. `sonnet`); both forms are normalized +/// here. +pub fn normalize_model_arg(model: &str) -> &str { + model.strip_prefix("claude-cli/").unwrap_or(model) +} + +/// Environment variables that must be removed before exec'ing `claude` so the +/// child uses its own login keychain rather than picking up server-side +/// credentials. The list mirrors the OpenClaw scrub list. +const SCRUB_ENV_PREFIXES: &[&str] = &["ANTHROPIC_", "CLAUDE_CODE_", "OTEL_"]; + +fn scrubbed_env_for_spawn() -> Vec<(String, String)> { + std::env::vars() + .filter(|(k, _)| !SCRUB_ENV_PREFIXES.iter().any(|p| k.starts_with(p))) + .collect() +} + +/// One running `claude -p` subprocess plus the channels we use to talk to it. +/// Each `ClaudeProcess` is owned by exactly one session. +pub struct ClaudeProcess { + child: Mutex>, + stdin: Mutex>, + /// The receiver of `ClaudeCliEvent`s parsed from the child's stdout. + /// Wrapped in `Arc` so a `TurnStream` can hold an owned guard for + /// the duration of one turn (which serializes turns within a session). + event_rx: Arc>>, + config: ClaudeCliConfig, + /// Last time a request was served on this session — used by the session + /// manager to enforce the idle TTL. + last_used: Mutex, + pub session_id: String, +} + +impl ClaudeProcess { + /// Spawn a new child for `session_id`. The first turn for a new session + /// should be the user's Anthropic request body — see + /// [`ClaudeProcess::send_user_turn`] for that. + pub async fn spawn( + session_id: String, + model: &str, + system_prompt: Option<&str>, + cwd: Option<&std::path::Path>, + config: ClaudeCliConfig, + ) -> Result, ProcessError> { + let mut cmd = Command::new(&config.binary); + cmd.arg("-p") + .arg("--output-format") + .arg("stream-json") + .arg("--input-format") + .arg("stream-json") + .arg("--verbose") + .arg("--include-partial-messages") + .arg("--permission-mode") + .arg(&config.permission_mode) + .arg("--model") + .arg(normalize_model_arg(model)) + .arg("--session-id") + .arg(&session_id) + .arg("--no-session-persistence"); + + if let Some(prompt) = system_prompt { + // Append (don't replace) so Claude Code's built-in system prompt + // — which carries tool definitions — is preserved. + cmd.arg("--append-system-prompt").arg(prompt); + } + if let Some(dir) = cwd { + cmd.current_dir(dir); + } + cmd.env_clear(); + for (k, v) in scrubbed_env_for_spawn() { + cmd.env(k, v); + } + cmd.stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .kill_on_drop(true); + + let mut child = cmd.spawn().map_err(|e| ProcessError::Spawn { + binary: config.binary.clone(), + source: e, + })?; + + let stdin = child.stdin.take().ok_or(ProcessError::ExitedEarly)?; + let stdout = child.stdout.take().ok_or(ProcessError::ExitedEarly)?; + let stderr = child.stderr.take().ok_or(ProcessError::ExitedEarly)?; + + // Bounded channel — backpressure if the consumer is slow, but large + // enough that bursts of small text deltas do not block stdout drain. + let (tx, rx) = mpsc::channel::(256); + + let session_for_log = session_id.clone(); + tokio::spawn(async move { + let mut reader = BufReader::new(stdout).lines(); + loop { + match reader.next_line().await { + Ok(Some(line)) => { + if let Some(parsed) = parse_ndjson_line(&line) { + match parsed { + Ok(ev) => { + if tx.send(ev).await.is_err() { + break; + } + } + Err(err) => { + warn!( + session = %session_for_log, + error = %err, + line = %line, + "failed to parse claude NDJSON line" + ); + } + } + } + } + Ok(None) => { + debug!(session = %session_for_log, "claude stdout closed"); + break; + } + Err(err) => { + warn!( + session = %session_for_log, + error = %err, + "claude stdout read error" + ); + break; + } + } + } + }); + + let session_for_stderr = session_id.clone(); + tokio::spawn(async move { + let mut reader = BufReader::new(stderr).lines(); + while let Ok(Some(line)) = reader.next_line().await { + if !line.trim().is_empty() { + warn!(session = %session_for_stderr, line = %line, "claude stderr"); + } + } + }); + + info!( + session = %session_id, + model = %normalize_model_arg(model), + "spawned claude-cli" + ); + + Ok(Arc::new(Self { + child: Mutex::new(Some(child)), + stdin: Mutex::new(Some(stdin)), + event_rx: Arc::new(Mutex::new(rx)), + config, + last_used: Mutex::new(Instant::now()), + session_id, + })) + } + + /// Write the user-turn JSONL events to the child's stdin and return a + /// stream that yields parsed CLI events for this turn until the terminal + /// `result` event (or watchdog) ends it. + /// + /// Holds an exclusive lock on the event receiver for the duration of the + /// turn, so concurrent calls return [`ProcessError::TurnInProgress`]. + pub async fn send_user_turn( + &self, + events: &[ClaudeCliInputEvent], + ) -> Result { + *self.last_used.lock().await = Instant::now(); + + // Claim the event receiver for the lifetime of this turn. + let rx_guard = Arc::clone(&self.event_rx) + .try_lock_owned() + .map_err(|_| ProcessError::TurnInProgress)?; + + let mut stdin_guard = self.stdin.lock().await; + let stdin = stdin_guard.as_mut().ok_or(ProcessError::ExitedEarly)?; + for ev in events { + let mut bytes = serde_json::to_vec(ev)?; + bytes.push(b'\n'); + stdin + .write_all(&bytes) + .await + .map_err(ProcessError::StdinWrite)?; + } + stdin.flush().await.map_err(ProcessError::StdinWrite)?; + + Ok(TurnStream { + rx: rx_guard, + watchdog: self.config.watchdog, + done: false, + }) + } + + /// Most-recent activity timestamp; used by the session manager's reaper. + pub async fn last_used(&self) -> Instant { + *self.last_used.lock().await + } + + /// Forcefully terminate the child. Safe to call multiple times. + pub async fn shutdown(&self) { + if let Some(mut child) = self.child.lock().await.take() { + let _ = child.start_kill(); + let _ = child.wait().await; + } + // Dropping stdin signals the child if it survived `start_kill`. + let _ = self.stdin.lock().await.take(); + } +} + +/// One-shot stream of CLI events for a single user turn. Yields events until +/// the terminal `result` event is observed (or the watchdog fires). Drops the +/// owned receiver lock when finished, allowing the next turn to start. +pub struct TurnStream { + rx: OwnedMutexGuard>, + watchdog: Duration, + done: bool, +} + +impl TurnStream { + /// Pull the next CLI event from the child, applying the per-line + /// watchdog. Returns `Ok(None)` when the turn's terminal `result` event + /// has been delivered. + pub async fn next(&mut self) -> Result, ProcessError> { + if self.done { + return Ok(None); + } + match time::timeout(self.watchdog, self.rx.recv()).await { + Ok(Some(ev)) => { + if matches!(ev, ClaudeCliEvent::Result { .. }) { + self.done = true; + } + Ok(Some(ev)) + } + Ok(None) => { + self.done = true; + Err(ProcessError::ExitedEarly) + } + Err(_) => { + self.done = true; + Err(ProcessError::WatchdogTimeout(self.watchdog)) + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn normalize_model_arg_strips_prefix() { + assert_eq!(normalize_model_arg("claude-cli/sonnet"), "sonnet"); + assert_eq!( + normalize_model_arg("claude-cli/claude-opus-4-7"), + "claude-opus-4-7" + ); + assert_eq!(normalize_model_arg("sonnet"), "sonnet"); + } + + // Note: cannot mutate process env in unit tests safely since tests run + // in parallel; spawn integration tests cover env behavior end-to-end via + // the fake_claude.sh fixture. +} diff --git a/crates/brightstaff/src/handlers/claude_cli/server.rs b/crates/brightstaff/src/handlers/claude_cli/server.rs new file mode 100644 index 000000000..68f3dc579 --- /dev/null +++ b/crates/brightstaff/src/handlers/claude_cli/server.rs @@ -0,0 +1,335 @@ +//! HTTP server fronting the claude-cli bridge. Speaks Anthropic Messages API +//! (`POST /v1/messages`) on a localhost port; everything inside this module +//! delegates to `hermesllm::apis::claude_cli` for translation and to +//! `super::session::SessionManager` for subprocess lifecycle. + +use std::convert::Infallible; +use std::net::SocketAddr; +use std::sync::Arc; + +use bytes::Bytes; +use futures::stream; +use hermesllm::apis::anthropic::MessagesRequest; +use hermesllm::apis::claude_cli::{ + cli_error_to_anthropic_error_body, cli_event_to_messages_stream_event, + collect_to_messages_response, extract_system_prompt, messages_request_to_stdin_payload, + synthetic_message_start, ClaudeCliEvent, +}; +use http_body_util::combinators::BoxBody; +use http_body_util::{BodyExt, Full, StreamBody}; +use hyper::body::{Frame, Incoming}; +use hyper::header::{self, HeaderValue}; +use hyper::server::conn::http1; +use hyper::service::service_fn; +use hyper::{Method, Request, Response, StatusCode}; +use hyper_util::rt::TokioIo; +use tokio::net::TcpListener; +use tokio::sync::mpsc; +use tokio_stream::wrappers::ReceiverStream; +use tracing::{debug, error, info, warn}; + +use super::session::{SessionManager, SESSION_HEADER}; + +/// Spawn the claude-cli bridge listener. The returned `JoinHandle` resolves +/// when the listener loop exits (either via the provided shutdown signal or a +/// fatal accept error). On shutdown the manager drains all active sessions. +pub async fn run_listener( + addr: SocketAddr, + manager: Arc, + shutdown: F, +) -> Result<(), Box> +where + F: std::future::Future + Send + 'static, +{ + let listener = TcpListener::bind(addr).await?; + info!(%addr, "claude-cli bridge listening"); + + let manager_for_shutdown = Arc::clone(&manager); + tokio::pin!(shutdown); + loop { + tokio::select! { + accept = listener.accept() => { + let (stream, peer) = match accept { + Ok(s) => s, + Err(err) => { + warn!(error = ?err, "claude-cli accept error"); + continue; + } + }; + debug!(peer = ?peer, "claude-cli accepted connection"); + let manager = Arc::clone(&manager); + let io = TokioIo::new(stream); + tokio::task::spawn(async move { + let svc = service_fn(move |req| { + let manager = Arc::clone(&manager); + async move { handle(req, manager).await } + }); + if let Err(err) = http1::Builder::new().serve_connection(io, svc).await { + warn!(error = ?err, "claude-cli connection error"); + } + }); + } + _ = &mut shutdown => { + info!("claude-cli bridge shutting down"); + manager_for_shutdown.shutdown_all().await; + return Ok(()); + } + } + } +} + +async fn handle( + req: Request, + manager: Arc, +) -> Result>, hyper::Error> { + let path = req.uri().path(); + let method = req.method(); + if method == Method::GET && path == "/healthz" { + return Ok(text_response(StatusCode::OK, "ok")); + } + if method != Method::POST || path != "/v1/messages" { + return Ok(text_response(StatusCode::NOT_FOUND, "not found")); + } + + // Pull out the optional session header up front so we can drop the + // request after consuming the body. + let session_header = req + .headers() + .get(SESSION_HEADER) + .and_then(|h| h.to_str().ok()) + .map(|s| s.to_string()); + + let body_bytes = match req.collect().await { + Ok(c) => c.to_bytes(), + Err(err) => { + warn!(error = %err, "failed to read claude-cli request body"); + return Ok(json_error(StatusCode::BAD_REQUEST, "failed to read body")); + } + }; + + let parsed: MessagesRequest = match serde_json::from_slice(&body_bytes) { + Ok(p) => p, + Err(err) => { + warn!(error = %err, "failed to parse Anthropic MessagesRequest"); + return Ok(json_error( + StatusCode::BAD_REQUEST, + &format!("invalid Anthropic MessagesRequest: {err}"), + )); + } + }; + + let session_id = SessionManager::resolve_session_id(session_header.as_deref(), &parsed); + let system_prompt = extract_system_prompt(&parsed); + + let process = match manager + .get_or_spawn(&session_id, &parsed.model, system_prompt.as_deref(), None) + .await + { + Ok(p) => p, + Err(err) => { + error!(session = %session_id, error = %err, "failed to spawn claude-cli"); + return Ok(json_error( + StatusCode::BAD_GATEWAY, + &format!("failed to spawn claude-cli: {err}"), + )); + } + }; + + let stdin_payload = match messages_request_to_stdin_payload(&parsed, Some(&session_id)) { + Ok(p) => p, + Err(err) => { + warn!(error = %err, "failed to build claude-cli stdin payload"); + return Ok(json_error( + StatusCode::BAD_REQUEST, + &format!("failed to build claude-cli stdin payload: {err}"), + )); + } + }; + + let streaming = parsed.stream.unwrap_or(false); + let model = parsed.model.clone(); + + let mut turn = match process.send_user_turn(&stdin_payload).await { + Ok(t) => t, + Err(err) => { + error!(session = %session_id, error = %err, "failed to send user turn"); + return Ok(json_error( + StatusCode::BAD_GATEWAY, + &format!("failed to send user turn: {err}"), + )); + } + }; + + if streaming { + Ok(stream_response(turn, model, session_id)) + } else { + // Drain the entire turn before answering. + let mut events: Vec = Vec::new(); + loop { + match turn.next().await { + Ok(Some(ev)) => events.push(ev), + Ok(None) => break, + Err(err) => { + warn!(session = %session_id, error = %err, "claude-cli turn failed"); + let body = cli_error_to_anthropic_error_body(&err.to_string()); + return Ok(json_response(StatusCode::BAD_GATEWAY, &body)); + } + } + } + match collect_to_messages_response(&model, events) { + Ok(resp) => Ok(json_response(StatusCode::OK, &resp)), + Err(err) => { + let body = cli_error_to_anthropic_error_body(&err.to_string()); + Ok(json_response(StatusCode::BAD_GATEWAY, &body)) + } + } + } +} + +fn stream_response( + mut turn: super::process::TurnStream, + model: String, + session_id: String, +) -> Response> { + let (tx, rx) = mpsc::channel::, Infallible>>(64); + + tokio::spawn(async move { + // Some short turns skip MessageStart; emit a synthetic one so the + // client always sees a complete stream. + let mut emitted_message_start = false; + + loop { + let ev = match turn.next().await { + Ok(Some(ev)) => ev, + Ok(None) => break, + Err(err) => { + warn!(session = %session_id, error = %err, "claude-cli streaming turn failed"); + let body = cli_error_to_anthropic_error_body(&err.to_string()); + let frame = + Frame::data(format_sse("error", &serde_json::to_string(&body).unwrap())); + let _ = tx.send(Ok(frame)).await; + break; + } + }; + + if !emitted_message_start { + if let ClaudeCliEvent::StreamEvent { + event: hermesllm::apis::anthropic::MessagesStreamEvent::MessageStart { .. }, + } = &ev + { + emitted_message_start = true; + } else if matches!(&ev, ClaudeCliEvent::Result { .. }) { + // No actual content was streamed; synthesize a + // MessageStart so the SSE stream is well-formed. + let synthetic = synthetic_message_start(&model, Some(&session_id)); + if let Some(frame) = sse_frame_for_event(&synthetic) { + let _ = tx.send(Ok(frame)).await; + } + emitted_message_start = true; + } + } + + if let Some(translated) = cli_event_to_messages_stream_event(&ev) { + if let Some(frame) = sse_frame_for_event(&translated) { + if tx.send(Ok(frame)).await.is_err() { + break; + } + } + } + + if let ClaudeCliEvent::Result { + is_error, result, .. + } = &ev + { + if *is_error { + let msg = result + .clone() + .unwrap_or_else(|| "claude-cli returned an error".to_string()); + let body = cli_error_to_anthropic_error_body(&msg); + let frame = + Frame::data(format_sse("error", &serde_json::to_string(&body).unwrap())); + let _ = tx.send(Ok(frame)).await; + } + break; + } + } + }); + + let body = StreamBody::new(ReceiverStream::new(rx)); + let mut resp = Response::new(body.boxed()); + *resp.status_mut() = StatusCode::OK; + resp.headers_mut().insert( + header::CONTENT_TYPE, + HeaderValue::from_static("text/event-stream"), + ); + resp.headers_mut() + .insert(header::CACHE_CONTROL, HeaderValue::from_static("no-cache")); + resp.headers_mut() + .insert("X-Accel-Buffering", HeaderValue::from_static("no")); + resp +} + +fn sse_frame_for_event( + event: &hermesllm::apis::anthropic::MessagesStreamEvent, +) -> Option> { + use hermesllm::apis::anthropic::MessagesStreamEvent; + let event_name = match event { + MessagesStreamEvent::MessageStart { .. } => "message_start", + MessagesStreamEvent::ContentBlockStart { .. } => "content_block_start", + MessagesStreamEvent::ContentBlockDelta { .. } => "content_block_delta", + MessagesStreamEvent::ContentBlockStop { .. } => "content_block_stop", + MessagesStreamEvent::MessageDelta { .. } => "message_delta", + MessagesStreamEvent::MessageStop => "message_stop", + MessagesStreamEvent::Ping => "ping", + }; + let data = serde_json::to_string(event).ok()?; + Some(Frame::data(format_sse(event_name, &data))) +} + +fn format_sse(event: &str, data: &str) -> Bytes { + Bytes::from(format!("event: {event}\ndata: {data}\n\n")) +} + +fn json_response( + status: StatusCode, + body: &T, +) -> Response> { + let bytes = serde_json::to_vec(body).unwrap_or_else(|_| b"{}".to_vec()); + let body = Full::new(Bytes::from(bytes)) + .map_err(|e| match e {}) + .boxed(); + let mut resp = Response::new(body); + *resp.status_mut() = status; + resp.headers_mut().insert( + header::CONTENT_TYPE, + HeaderValue::from_static("application/json"), + ); + resp +} + +fn json_error(status: StatusCode, message: &str) -> Response> { + let body = cli_error_to_anthropic_error_body(message); + json_response(status, &body) +} + +fn text_response( + status: StatusCode, + message: &'static str, +) -> Response> { + let body = Full::new(Bytes::from_static(message.as_bytes())) + .map_err(|e| match e {}) + .boxed(); + let mut resp = Response::new(body); + *resp.status_mut() = status; + resp.headers_mut() + .insert(header::CONTENT_TYPE, HeaderValue::from_static("text/plain")); + resp +} + +// Ensure a no-op import so that `stream` (re-exported from futures) is +// considered used in case future expansion needs it. Avoids accidental +// deletion when running `cargo fix`. +#[allow(dead_code)] +fn _touch_stream_module() { + let _: stream::Empty = stream::empty(); +} diff --git a/crates/brightstaff/src/handlers/claude_cli/session.rs b/crates/brightstaff/src/handlers/claude_cli/session.rs new file mode 100644 index 000000000..cd664cab1 --- /dev/null +++ b/crates/brightstaff/src/handlers/claude_cli/session.rs @@ -0,0 +1,341 @@ +//! Session manager for the claude-cli bridge. Maps a stable session id (taken +//! from a client-provided header or hashed from the conversation prefix) to a +//! long-lived `ClaudeProcess`. Enforces an idle TTL and a hard cap on the +//! number of concurrent sessions. + +use std::collections::{hash_map::DefaultHasher, HashMap}; +use std::hash::{Hash, Hasher}; +use std::sync::Arc; +use std::time::Duration; + +use hermesllm::apis::anthropic::{ + MessagesContentBlock, MessagesMessageContent, MessagesRequest, MessagesRole, + MessagesSystemPrompt, +}; +use tokio::sync::Mutex; +use tokio::time::Instant; +use tracing::{debug, info}; + +use super::process::{ClaudeCliConfig, ClaudeProcess, ProcessError}; + +/// Optional client header that pins a request to a specific session id. +pub const SESSION_HEADER: &str = "x-arch-claude-cli-session"; + +/// Default cap. The bridge is local and per-developer; this is a guard +/// against runaway memory if a client bug churns through unique session ids. +pub const DEFAULT_MAX_SESSIONS: usize = 64; + +/// Tunables for the session manager. +#[derive(Debug, Clone)] +pub struct SessionManagerConfig { + pub max_sessions: usize, + pub process: ClaudeCliConfig, +} + +impl Default for SessionManagerConfig { + fn default() -> Self { + Self { + max_sessions: DEFAULT_MAX_SESSIONS, + process: ClaudeCliConfig::default(), + } + } +} + +/// Holds active `ClaudeProcess` handles keyed by session id. +pub struct SessionManager { + inner: Mutex>>, + config: SessionManagerConfig, +} + +impl SessionManager { + pub fn new(config: SessionManagerConfig) -> Arc { + Arc::new(Self { + inner: Mutex::new(HashMap::new()), + config, + }) + } + + /// Pick (or fabricate) the session id for a given request. + /// + /// Strategy (in order): + /// 1. Honor the `x-arch-claude-cli-session` header if it's a non-empty + /// valid UUID-shaped string. + /// 2. Otherwise hash `(model, system_prompt_text, first_user_message_text)` + /// and produce a deterministic UUID-shaped id so retries of the same + /// conversation reuse the same process. + pub fn resolve_session_id(client_header: Option<&str>, req: &MessagesRequest) -> String { + if let Some(raw) = client_header { + let trimmed = raw.trim(); + if !trimmed.is_empty() { + // Accept any opaque token; the CLI requires UUID format, so + // we hash unknown shapes into one. + if uuid::Uuid::parse_str(trimmed).is_ok() { + return trimmed.to_string(); + } + return uuid_from_seed(trimmed); + } + } + let mut hasher = DefaultHasher::new(); + req.model.hash(&mut hasher); + if let Some(system) = &req.system { + system_text(system).hash(&mut hasher); + } + if let Some(first) = first_user_message_text(req) { + first.hash(&mut hasher); + } + uuid_from_seed(&hasher.finish().to_string()) + } + + /// Get the existing session's process or spawn a new one. + pub async fn get_or_spawn( + &self, + session_id: &str, + model: &str, + system_prompt: Option<&str>, + cwd: Option<&std::path::Path>, + ) -> Result, ProcessError> { + // Reap idle sessions on the read path so we don't need a separate + // background task for the common one-developer-one-laptop deployment. + self.evict_idle().await; + + { + let map = self.inner.lock().await; + if let Some(existing) = map.get(session_id) { + debug!(session = %session_id, "reusing claude-cli session"); + return Ok(Arc::clone(existing)); + } + } + + let mut map = self.inner.lock().await; + if let Some(existing) = map.get(session_id) { + return Ok(Arc::clone(existing)); + } + + if map.len() >= self.config.max_sessions { + // Evict the least-recently-used session to keep the cap honest. + if let Some(victim_key) = lru_session_id(&map).await { + if let Some(victim) = map.remove(&victim_key) { + info!(session = %victim_key, "evicting LRU claude-cli session to make room"); + drop(map); + victim.shutdown().await; + map = self.inner.lock().await; + } + } + } + + let process = ClaudeProcess::spawn( + session_id.to_string(), + model, + system_prompt, + cwd, + self.config.process.clone(), + ) + .await?; + map.insert(session_id.to_string(), Arc::clone(&process)); + Ok(process) + } + + /// Drop and kill all sessions. Called on graceful shutdown. + pub async fn shutdown_all(&self) { + let mut map = self.inner.lock().await; + let drained: Vec<_> = map.drain().collect(); + drop(map); + info!(count = drained.len(), "draining claude-cli sessions"); + for (_, proc) in drained { + proc.shutdown().await; + } + } + + async fn evict_idle(&self) { + let ttl = self.config.process.session_ttl; + if ttl.is_zero() { + return; + } + let now = Instant::now(); + let mut to_kill: Vec<(String, Arc)> = Vec::new(); + { + let map = self.inner.lock().await; + for (k, v) in map.iter() { + if now.duration_since(v.last_used().await) > ttl { + to_kill.push((k.clone(), Arc::clone(v))); + } + } + } + if to_kill.is_empty() { + return; + } + let mut map = self.inner.lock().await; + for (k, _) in &to_kill { + map.remove(k); + } + drop(map); + for (k, proc) in to_kill { + info!(session = %k, "evicting idle claude-cli session"); + proc.shutdown().await; + } + } +} + +async fn lru_session_id(map: &HashMap>) -> Option { + let mut oldest: Option<(String, Instant)> = None; + for (k, v) in map.iter() { + let used = v.last_used().await; + match &oldest { + Some((_, t)) if *t < used => {} + _ => oldest = Some((k.clone(), used)), + } + } + oldest.map(|(k, _)| k) +} + +fn first_user_message_text(req: &MessagesRequest) -> Option { + for msg in &req.messages { + if msg.role != MessagesRole::User { + continue; + } + return Some(match &msg.content { + MessagesMessageContent::Single(s) => s.clone(), + MessagesMessageContent::Blocks(blocks) => blocks + .iter() + .filter_map(|b| match b { + MessagesContentBlock::Text { text, .. } => Some(text.as_str()), + _ => None, + }) + .collect::>() + .join("\n"), + }); + } + None +} + +fn system_text(system: &MessagesSystemPrompt) -> String { + match system { + MessagesSystemPrompt::Single(s) => s.clone(), + MessagesSystemPrompt::Blocks(blocks) => blocks + .iter() + .filter_map(|b| match b { + MessagesContentBlock::Text { text, .. } => Some(text.as_str()), + _ => None, + }) + .collect::>() + .join("\n"), + } +} + +/// Deterministic v5-style UUID derived from an arbitrary seed string. The +/// `claude` CLI requires `--session-id` to be a valid UUID; we use the DNS +/// namespace constant as a stable salt so the same conversation always maps +/// to the same id without us pulling in the v5 feature of the `uuid` crate. +fn uuid_from_seed(seed: &str) -> String { + let mut hasher = DefaultHasher::new(); + seed.hash(&mut hasher); + let h1 = hasher.finish(); + let mut hasher2 = DefaultHasher::new(); + h1.hash(&mut hasher2); + seed.hash(&mut hasher2); + let h2 = hasher2.finish(); + let bytes = [ + (h1 >> 56) as u8, + (h1 >> 48) as u8, + (h1 >> 40) as u8, + (h1 >> 32) as u8, + (h1 >> 24) as u8, + (h1 >> 16) as u8, + (h1 >> 8) as u8, + h1 as u8, + (h2 >> 56) as u8, + (h2 >> 48) as u8, + (h2 >> 40) as u8, + (h2 >> 32) as u8, + (h2 >> 24) as u8, + (h2 >> 16) as u8, + (h2 >> 8) as u8, + h2 as u8, + ]; + uuid::Builder::from_random_bytes(bytes) + .into_uuid() + .to_string() +} + +/// `Duration::is_zero` shim — `Duration` exposes `is_zero` only on stable +/// 1.53+, but our MSRV already covers that. Re-exporting keeps call sites +/// terse if we ever need to swap implementations. +#[allow(dead_code)] +fn is_zero(d: Duration) -> bool { + d.is_zero() +} + +#[cfg(test)] +mod tests { + use super::*; + use hermesllm::apis::anthropic::MessagesMessage; + + fn req(model: &str, user: &str, system: Option<&str>) -> MessagesRequest { + MessagesRequest { + model: model.to_string(), + messages: vec![MessagesMessage { + role: MessagesRole::User, + content: MessagesMessageContent::Single(user.to_string()), + }], + max_tokens: 1024, + container: None, + mcp_servers: None, + system: system.map(|s| MessagesSystemPrompt::Single(s.to_string())), + metadata: None, + service_tier: None, + thinking: None, + temperature: None, + top_p: None, + top_k: None, + stream: Some(true), + stop_sequences: None, + tools: None, + tool_choice: None, + } + } + + #[test] + fn header_uuid_is_used_as_is() { + let id = "550e8400-e29b-41d4-a716-446655440000"; + let r = req("sonnet", "hi", None); + assert_eq!(SessionManager::resolve_session_id(Some(id), &r), id); + } + + #[test] + fn header_non_uuid_is_normalized_to_uuid() { + let r = req("sonnet", "hi", None); + let id = SessionManager::resolve_session_id(Some("my-token"), &r); + assert!(uuid::Uuid::parse_str(&id).is_ok()); + let id2 = SessionManager::resolve_session_id(Some("my-token"), &r); + assert_eq!(id, id2); + } + + #[test] + fn empty_header_falls_back_to_hash() { + let r = req("sonnet", "hi", Some("you are helpful")); + let id = SessionManager::resolve_session_id(Some(""), &r); + assert!(uuid::Uuid::parse_str(&id).is_ok()); + let id2 = SessionManager::resolve_session_id(None, &r); + assert_eq!(id, id2); + } + + #[test] + fn hash_is_stable_across_repeats_and_distinct_across_inputs() { + let r1 = req("sonnet", "hello", None); + let r2 = req("sonnet", "hello", None); + let r3 = req("sonnet", "different", None); + let r4 = req("opus", "hello", None); + assert_eq!( + SessionManager::resolve_session_id(None, &r1), + SessionManager::resolve_session_id(None, &r2) + ); + assert_ne!( + SessionManager::resolve_session_id(None, &r1), + SessionManager::resolve_session_id(None, &r3) + ); + assert_ne!( + SessionManager::resolve_session_id(None, &r1), + SessionManager::resolve_session_id(None, &r4) + ); + } +} diff --git a/crates/brightstaff/src/handlers/mod.rs b/crates/brightstaff/src/handlers/mod.rs index 4e8512640..c4aaab3af 100644 --- a/crates/brightstaff/src/handlers/mod.rs +++ b/crates/brightstaff/src/handlers/mod.rs @@ -1,4 +1,5 @@ pub mod agents; +pub mod claude_cli; pub mod debug; pub mod function_calling; pub mod llm; diff --git a/crates/brightstaff/src/main.rs b/crates/brightstaff/src/main.rs index b1e17e42b..1fb5f9735 100644 --- a/crates/brightstaff/src/main.rs +++ b/crates/brightstaff/src/main.rs @@ -4,6 +4,9 @@ static ALLOC: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc; use brightstaff::app_state::AppState; use brightstaff::handlers::agents::orchestrator::agent_chat; +use brightstaff::handlers::claude_cli::{ + self, ClaudeCliConfig, SessionManager, SessionManagerConfig, +}; use brightstaff::handlers::debug; use brightstaff::handlers::empty; use brightstaff::handlers::function_calling::function_calling_chat_handler; @@ -37,6 +40,7 @@ use opentelemetry::trace::FutureExt; use opentelemetry_http::HeaderExtractor; use std::collections::HashMap; use std::sync::Arc; +use std::time::Duration; use std::{env, fs}; use tokio::net::TcpListener; use tokio::sync::RwLock; @@ -575,6 +579,57 @@ async fn run_server(state: Arc) -> Result<(), Box Option<(std::net::SocketAddr, SessionManagerConfig)> { + let addr_str = env::var("CLAUDE_CLI_LISTEN_ADDR").ok()?; + let addr: std::net::SocketAddr = match addr_str.parse() { + Ok(a) => a, + Err(err) => { + warn!( + value = %addr_str, + error = %err, + "invalid CLAUDE_CLI_LISTEN_ADDR — claude-cli bridge disabled" + ); + return None; + } + }; + let binary = env::var("CLAUDE_CLI_BIN").unwrap_or_else(|_| "claude".to_string()); + let permission_mode = + env::var("CLAUDE_CLI_PERMISSION_MODE").unwrap_or_else(|_| "bypassPermissions".to_string()); + let session_ttl = env::var("CLAUDE_CLI_SESSION_TTL_SECS") + .ok() + .and_then(|s| s.parse::().ok()) + .map(Duration::from_secs) + .unwrap_or_else(|| Duration::from_secs(600)); + let watchdog = env::var("CLAUDE_CLI_WATCHDOG_SECS") + .ok() + .and_then(|s| s.parse::().ok()) + .map(Duration::from_secs) + .unwrap_or_else(|| Duration::from_secs(120)); + let max_sessions = env::var("CLAUDE_CLI_MAX_SESSIONS") + .ok() + .and_then(|s| s.parse::().ok()) + .unwrap_or(claude_cli::session::DEFAULT_MAX_SESSIONS); + Some(( + addr, + SessionManagerConfig { + max_sessions, + process: ClaudeCliConfig { + binary, + permission_mode, + session_ttl, + watchdog, + }, + }, + )) +} + // --------------------------------------------------------------------------- // Entry point // --------------------------------------------------------------------------- @@ -586,5 +641,31 @@ async fn main() -> Result<(), Box> { bs_metrics::init(); info!("loaded plano_config.yaml"); let state = Arc::new(init_app_state(&config).await?); - run_server(state).await + + // Optional claude-cli bridge listener. Started iff CLAUDE_CLI_LISTEN_ADDR + // is set in the environment (the Python CLI sets this when it detects a + // `model: claude-cli/*` provider entry). + let bridge_handle = if let Some((addr, cfg)) = claude_cli_config_from_env() { + let manager = SessionManager::new(cfg); + let shutdown = async { + let _ = tokio::signal::ctrl_c().await; + }; + Some(tokio::spawn(async move { + if let Err(err) = claude_cli::run_listener(addr, manager, shutdown).await { + warn!(error = ?err, "claude-cli bridge listener exited with error"); + } + })) + } else { + None + }; + + let result = run_server(state).await; + + if let Some(handle) = bridge_handle { + // Ctrl-C already triggered the bridge's own shutdown; join briefly to + // give in-flight session drains a chance to finish. + let _ = tokio::time::timeout(Duration::from_secs(5), handle).await; + } + + result } diff --git a/crates/brightstaff/tests/claude_cli_bridge.rs b/crates/brightstaff/tests/claude_cli_bridge.rs new file mode 100644 index 000000000..6cf97258c --- /dev/null +++ b/crates/brightstaff/tests/claude_cli_bridge.rs @@ -0,0 +1,190 @@ +//! Integration test for the claude-cli bridge. Spins up the listener with a +//! fake `claude` shell script that emits a canned NDJSON sequence, then +//! verifies both the streaming SSE and non-streaming JSON code paths produce +//! the expected Anthropic Messages output. + +use std::path::PathBuf; +use std::sync::Arc; +use std::time::Duration; + +use brightstaff::handlers::claude_cli::{ + self, ClaudeCliConfig, SessionManager, SessionManagerConfig, +}; +use serde_json::{json, Value}; +use tokio::net::TcpListener; +use tokio::sync::oneshot; + +fn fake_claude_path() -> PathBuf { + PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("tests") + .join("fixtures") + .join("fake_claude.sh") +} + +async fn pick_free_addr() -> std::net::SocketAddr { + let listener = TcpListener::bind("127.0.0.1:0").await.unwrap(); + let addr = listener.local_addr().unwrap(); + drop(listener); + addr +} + +struct BridgeFixture { + addr: std::net::SocketAddr, + shutdown: Option>, + handle: Option>, +} + +impl BridgeFixture { + async fn start() -> Self { + let addr = pick_free_addr().await; + let (shutdown_tx, shutdown_rx) = oneshot::channel::<()>(); + + let manager = SessionManager::new(SessionManagerConfig { + max_sessions: 4, + process: ClaudeCliConfig { + binary: fake_claude_path().to_string_lossy().to_string(), + permission_mode: "bypassPermissions".to_string(), + session_ttl: Duration::from_secs(60), + watchdog: Duration::from_secs(5), + }, + }); + + let manager_for_listener = Arc::clone(&manager); + let handle = tokio::spawn(async move { + let shutdown = async move { + let _ = shutdown_rx.await; + }; + if let Err(err) = claude_cli::run_listener(addr, manager_for_listener, shutdown).await { + eprintln!("listener exited with error: {err}"); + } + }); + + // Wait for the listener to bind. Loop until we can connect. + for _ in 0..50 { + if tokio::net::TcpStream::connect(addr).await.is_ok() { + break; + } + tokio::time::sleep(Duration::from_millis(20)).await; + } + + Self { + addr, + shutdown: Some(shutdown_tx), + handle: Some(handle), + } + } + + async fn stop(mut self) { + if let Some(tx) = self.shutdown.take() { + let _ = tx.send(()); + } + if let Some(h) = self.handle.take() { + let _ = tokio::time::timeout(Duration::from_secs(3), h).await; + } + } +} + +fn anthropic_request(stream: bool) -> Value { + json!({ + "model": "claude-cli/sonnet", + "max_tokens": 64, + "stream": stream, + "messages": [ + {"role": "user", "content": "say hi"} + ] + }) +} + +#[tokio::test] +async fn streaming_request_emits_anthropic_sse() { + let fixture = BridgeFixture::start().await; + let url = format!("http://{}/v1/messages", fixture.addr); + + let client = reqwest::Client::new(); + let resp = client + .post(&url) + .json(&anthropic_request(true)) + .send() + .await + .expect("send request"); + assert_eq!(resp.status(), 200); + let ct = resp + .headers() + .get("content-type") + .and_then(|v| v.to_str().ok()) + .unwrap_or("") + .to_string(); + assert!( + ct.starts_with("text/event-stream"), + "expected text/event-stream, got {ct}" + ); + let body = resp.text().await.expect("read body"); + + // SSE event names should mirror Anthropic's wire format, in order. + let events: Vec<&str> = body + .lines() + .filter_map(|l| l.strip_prefix("event: ")) + .collect(); + assert_eq!( + events, + vec![ + "message_start", + "content_block_start", + "content_block_delta", + "content_block_delta", + "content_block_stop", + "message_delta", + "message_stop", + ], + "unexpected SSE event sequence:\n{body}" + ); + + // The two text deltas should reconstruct "Hello, world!". + let mut combined = String::new(); + for line in body.lines() { + if let Some(payload) = line.strip_prefix("data: ") { + if let Ok(v) = serde_json::from_str::(payload) { + if v.get("type").and_then(|t| t.as_str()) == Some("content_block_delta") { + if let Some(text) = v + .get("delta") + .and_then(|d| d.get("text")) + .and_then(|t| t.as_str()) + { + combined.push_str(text); + } + } + } + } + } + assert_eq!(combined, "Hello, world!"); + + fixture.stop().await; +} + +#[tokio::test] +async fn non_streaming_request_returns_messages_response() { + let fixture = BridgeFixture::start().await; + let url = format!("http://{}/v1/messages", fixture.addr); + + let client = reqwest::Client::new(); + let resp = client + .post(&url) + .json(&anthropic_request(false)) + .send() + .await + .expect("send request"); + assert_eq!(resp.status(), 200); + let body: Value = resp.json().await.expect("parse json"); + + assert_eq!(body["type"], "message"); + assert_eq!(body["role"], "assistant"); + assert_eq!(body["stop_reason"], "end_turn"); + assert_eq!(body["usage"]["input_tokens"], 3); + assert_eq!(body["usage"]["output_tokens"], 4); + let content = body["content"].as_array().expect("content array"); + assert_eq!(content.len(), 1); + assert_eq!(content[0]["type"], "text"); + assert_eq!(content[0]["text"], "Hello, world!"); + + fixture.stop().await; +} diff --git a/crates/brightstaff/tests/fixtures/fake_claude.sh b/crates/brightstaff/tests/fixtures/fake_claude.sh new file mode 100755 index 000000000..de27edaf4 --- /dev/null +++ b/crates/brightstaff/tests/fixtures/fake_claude.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash +# Stand-in for the real `claude` CLI used by the brightstaff integration test. +# Reads stdin (so it does not exit early when the bridge writes the user +# JSONL turn) and emits a canned `--output-format stream-json` NDJSON +# sequence that mirrors a one-turn "Hello, world!" response. +# +# All CLI flags are accepted and ignored; only the NDJSON output matters for +# the bridge-side translation. +set -euo pipefail + +# Drain any stdin the parent writes so it does not see EPIPE. +( cat > /dev/null ) & +DRAIN_PID=$! +trap 'kill ${DRAIN_PID} 2>/dev/null || true' EXIT + +cat <<'EOF' +{"type":"system","subtype":"init","session_id":"fake-session","model":"sonnet","cwd":"/tmp","tools":[]} +{"type":"stream_event","event":{"type":"message_start","message":{"id":"msg_fake","type":"message","role":"assistant","content":[],"model":"claude-sonnet-4-6","stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":3,"output_tokens":0}}}} +{"type":"stream_event","event":{"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}} +{"type":"stream_event","event":{"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Hello"}}} +{"type":"stream_event","event":{"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":", world!"}}} +{"type":"stream_event","event":{"type":"content_block_stop","index":0}} +{"type":"stream_event","event":{"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":4}}} +{"type":"stream_event","event":{"type":"message_stop"}} +{"type":"result","subtype":"success","is_error":false,"duration_ms":12,"num_turns":1,"result":"Hello, world!","total_cost_usd":0.0001,"usage":{"input_tokens":3,"output_tokens":4},"session_id":"fake-session"} +EOF diff --git a/crates/common/src/configuration.rs b/crates/common/src/configuration.rs index 37492904d..181c04893 100644 --- a/crates/common/src/configuration.rs +++ b/crates/common/src/configuration.rs @@ -400,6 +400,10 @@ pub enum LlmProviderType { Vercel, #[serde(rename = "openrouter")] OpenRouter, + /// Claude Code CLI invoked as a local subprocess. The bridge runs inside + /// brightstaff (`CLAUDE_CLI_LISTEN_ADDR`) and exposes Anthropic Messages. + #[serde(rename = "claude-cli")] + ClaudeCli, } impl Display for LlmProviderType { @@ -425,6 +429,7 @@ impl Display for LlmProviderType { LlmProviderType::DigitalOcean => write!(f, "digitalocean"), LlmProviderType::Vercel => write!(f, "vercel"), LlmProviderType::OpenRouter => write!(f, "openrouter"), + LlmProviderType::ClaudeCli => write!(f, "claude-cli"), } } } @@ -772,6 +777,7 @@ mod test { for (yaml_value, expected) in [ ("vercel", LlmProviderType::Vercel), ("openrouter", LlmProviderType::OpenRouter), + ("claude-cli", LlmProviderType::ClaudeCli), ] { let parsed: LlmProviderType = serde_yaml::from_str(yaml_value).expect("variant should deserialize"); diff --git a/crates/hermesllm/src/apis/claude_cli.rs b/crates/hermesllm/src/apis/claude_cli.rs new file mode 100644 index 000000000..0c107a888 --- /dev/null +++ b/crates/hermesllm/src/apis/claude_cli.rs @@ -0,0 +1,955 @@ +//! Translation between Anthropic Messages API and Claude Code CLI's +//! `--output-format stream-json` / `--input-format stream-json` wire format. +//! +//! Claude Code CLI is invoked as a subprocess by `brightstaff` with flags such +//! as `claude -p --output-format stream-json --input-format stream-json +//! --include-partial-messages --verbose`. Each line on stdout is one JSON event +//! (NDJSON), and each line on stdin is a user-message JSON. This module owns +//! the pure (no-I/O) types and conversions; the runtime layer in brightstaff +//! does the actual spawning and streaming. + +use serde::{Deserialize, Serialize}; +use serde_json::{json, Value}; +use serde_with::skip_serializing_none; +use thiserror::Error; +use uuid::Uuid; + +use crate::apis::anthropic::{ + MessagesContentBlock, MessagesContentDelta, MessagesMessage, MessagesMessageContent, + MessagesMessageDelta, MessagesRequest, MessagesResponse, MessagesRole, MessagesStopReason, + MessagesStreamEvent, MessagesStreamMessage, MessagesSystemPrompt, MessagesUsage, +}; + +/// Errors produced by translation between Anthropic Messages and Claude Code +/// stream-json. +#[derive(Debug, Error)] +pub enum ClaudeCliTranslationError { + #[error("Claude CLI returned an error: {message}")] + CliError { message: String }, + #[error("Failed to serialize stdin payload: {0}")] + SerializeStdin(#[from] serde_json::Error), + #[error("Claude CLI stream ended before a terminal `result` event")] + UnexpectedEnd, +} + +// --------------------------------------------------------------------------- +// Wire types — output (Claude CLI -> us) +// --------------------------------------------------------------------------- + +/// One line of NDJSON emitted on stdout by `claude -p --output-format +/// stream-json`. The CLI tags variants with a top-level `type` field, and +/// `system`/`result` carry an additional `subtype`. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum ClaudeCliEvent { + /// `type=system` events. The actual classification lives in `subtype` + /// (e.g. `init`, `api_retry`, `rate_limit_event`). We keep the raw fields + /// rather than enumerating subtypes so a new CLI release that adds a + /// subtype does not break parsing. + System { + #[serde(default)] + subtype: Option, + #[serde(default)] + session_id: Option, + #[serde(default)] + model: Option, + #[serde(default)] + cwd: Option, + #[serde(flatten)] + extra: Value, + }, + /// A complete assistant message (emitted after the corresponding + /// `stream_event` deltas finish). Useful for non-streaming consumers. + Assistant { message: ClaudeCliAssistantMessage }, + /// A complete user message echoed back (when `--replay-user-messages` is + /// set). We currently ignore these in translation but keep the variant so + /// stray events do not cause deserialization failures. + User { + #[serde(default)] + message: Value, + }, + /// Wrapped Anthropic SSE event. The CLI re-emits the raw streaming-API + /// shape here when `--include-partial-messages` is enabled. + StreamEvent { event: MessagesStreamEvent }, + /// Terminal event marking the end of one CLI turn. `is_error == true` + /// means the underlying API call failed; `result` typically holds the + /// final assistant text or an error message. + Result { + #[serde(default)] + subtype: Option, + #[serde(default)] + is_error: bool, + #[serde(default)] + duration_ms: Option, + #[serde(default)] + num_turns: Option, + #[serde(default)] + result: Option, + #[serde(default)] + total_cost_usd: Option, + #[serde(default)] + usage: Option, + #[serde(default)] + session_id: Option, + }, + /// Catch-all for events the CLI may add in the future. We surface them in + /// logs but do not translate them to Anthropic events. + #[serde(other)] + Unknown, +} + +/// Subset of the Anthropic message shape the CLI emits inside `assistant` +/// events. We keep `content` as `Value` so we can decode text + tool_use +/// blocks without re-deriving every Anthropic content variant here. +#[skip_serializing_none] +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ClaudeCliAssistantMessage { + pub id: Option, + #[serde(default)] + pub model: Option, + #[serde(default)] + pub role: Option, + #[serde(default)] + pub content: Vec, + #[serde(default)] + pub stop_reason: Option, + #[serde(default)] + pub stop_sequence: Option, + #[serde(default)] + pub usage: Option, +} + +/// The CLI's `assistant.message.content[]` entries are a subset of Anthropic's +/// content blocks. We deserialize them into `MessagesContentBlock` directly +/// where possible and fall back to a tagged enum for the few fields we care +/// about explicitly (text + tool_use). +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(untagged)] +pub enum ClaudeCliContentBlock { + /// Anthropic-shaped content block (text, tool_use, thinking, ...). + Anthropic(MessagesContentBlock), + /// Anything we do not recognize is preserved as raw JSON so we can still + /// surface it in the `result` aggregation. + Unknown(Value), +} + +#[skip_serializing_none] +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct ClaudeCliUsage { + #[serde(default)] + pub input_tokens: Option, + #[serde(default)] + pub output_tokens: Option, + #[serde(default)] + pub cache_creation_input_tokens: Option, + #[serde(default)] + pub cache_read_input_tokens: Option, +} + +impl From for MessagesUsage { + fn from(u: ClaudeCliUsage) -> Self { + MessagesUsage { + input_tokens: u.input_tokens.unwrap_or(0), + output_tokens: u.output_tokens.unwrap_or(0), + cache_creation_input_tokens: u.cache_creation_input_tokens, + cache_read_input_tokens: u.cache_read_input_tokens, + } + } +} + +// --------------------------------------------------------------------------- +// Wire types — input (us -> Claude CLI) +// --------------------------------------------------------------------------- + +/// One line of NDJSON written to the CLI's stdin when invoked with +/// `--input-format stream-json`. +#[derive(Debug, Clone, Serialize)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum ClaudeCliInputEvent { + User { + message: ClaudeCliUserMessage, + /// The session id assigned by the CLI on first turn. Optional on the + /// first message; required (and must match) on subsequent turns. + #[serde(skip_serializing_if = "Option::is_none")] + session_id: Option, + }, +} + +#[derive(Debug, Clone, Serialize)] +pub struct ClaudeCliUserMessage { + pub role: &'static str, + pub content: Value, +} + +// --------------------------------------------------------------------------- +// Conversions +// --------------------------------------------------------------------------- + +/// Map a `MessagesRequest` into the JSONL payload that should be written to +/// the CLI's stdin. Returns one event per user turn, in order, so callers can +/// either replay the full conversation on first spawn or send only the latest +/// turn for a hot session. +/// +/// `session_id` (when set) is attached to every event so the CLI can verify +/// the turn belongs to the expected session. +pub fn messages_request_to_stdin_payload( + req: &MessagesRequest, + session_id: Option<&str>, +) -> Result, ClaudeCliTranslationError> { + let mut out = Vec::new(); + for msg in &req.messages { + if msg.role != MessagesRole::User { + // Assistant turns are managed by the CLI internally; we skip them. + continue; + } + let content = message_content_to_cli_value(&msg.content); + out.push(ClaudeCliInputEvent::User { + message: ClaudeCliUserMessage { + role: "user", + content, + }, + session_id: session_id.map(|s| s.to_string()), + }); + } + Ok(out) +} + +/// Build the `--append-system-prompt` value that should be passed when +/// spawning the CLI for this request. Returns `None` when the request has no +/// system prompt. +pub fn extract_system_prompt(req: &MessagesRequest) -> Option { + req.system.as_ref().map(|s| match s { + MessagesSystemPrompt::Single(text) => text.clone(), + MessagesSystemPrompt::Blocks(blocks) => blocks + .iter() + .filter_map(|b| match b { + MessagesContentBlock::Text { text, .. } => Some(text.as_str()), + _ => None, + }) + .collect::>() + .join("\n"), + }) +} + +fn message_content_to_cli_value(content: &MessagesMessageContent) -> Value { + match content { + MessagesMessageContent::Single(s) => Value::String(s.clone()), + MessagesMessageContent::Blocks(blocks) => { + // Preserve the structured block array so tool_result / image + // blocks survive intact across the stdin boundary. + serde_json::to_value(blocks).unwrap_or_else(|_| Value::Array(vec![])) + } + } +} + +/// Translate a single CLI event into a corresponding Anthropic +/// `MessagesStreamEvent`, when one exists. Returns `None` for events that +/// have no SSE counterpart (CLI-internal `system` notifications, terminal +/// `result`, unrecognized variants, ...). +pub fn cli_event_to_messages_stream_event(ev: &ClaudeCliEvent) -> Option { + match ev { + ClaudeCliEvent::StreamEvent { event } => Some(event.clone()), + _ => None, + } +} + +/// Aggregate a sequence of CLI events into a single non-streaming +/// `MessagesResponse`. Used by the bridge when the client did not request +/// streaming. +/// +/// The terminal `result` event is required: if the iterator ends without one, +/// we return [`ClaudeCliTranslationError::UnexpectedEnd`]. +pub fn collect_to_messages_response( + model: &str, + events: I, +) -> Result +where + I: IntoIterator, +{ + let mut content_blocks: Vec = Vec::new(); + // Accumulate per-index text deltas + tool-use input deltas as the CLI + // emits content_block_start -> content_block_delta(s) -> content_block_stop. + let mut text_accum: std::collections::HashMap = std::collections::HashMap::new(); + let mut tool_accum: std::collections::HashMap = + std::collections::HashMap::new(); + let mut block_order: Vec<(u32, BlockKind)> = Vec::new(); + let mut stop_reason = MessagesStopReason::EndTurn; + let mut stop_sequence: Option = None; + let mut usage = MessagesUsage { + input_tokens: 0, + output_tokens: 0, + cache_creation_input_tokens: None, + cache_read_input_tokens: None, + }; + let mut id = String::new(); + let mut model_out = model.to_string(); + let mut last_assistant_message: Option = None; + let mut saw_result = false; + let mut error_message: Option = None; + + for ev in events { + match ev { + ClaudeCliEvent::StreamEvent { event } => match event { + MessagesStreamEvent::MessageStart { message } => { + if id.is_empty() { + id = message.id.clone(); + } + if !message.model.is_empty() { + model_out = message.model.clone(); + } + usage = message.usage.clone(); + } + MessagesStreamEvent::ContentBlockStart { + index, + content_block, + } => match content_block { + MessagesContentBlock::Text { text, .. } => { + text_accum.insert(index, text); + block_order.push((index, BlockKind::Text)); + } + MessagesContentBlock::ToolUse { + id: tool_id, name, .. + } => { + // Anthropic streaming always starts a tool_use block + // with an empty `input` placeholder (`{}` or `null`); + // the real arguments arrive via `input_json_delta`s. + // Always start with an empty buffer so deltas + // assemble into valid JSON. + tool_accum.insert(index, (tool_id, name, String::new())); + block_order.push((index, BlockKind::ToolUse)); + } + other => { + // Unknown block kind — preserve verbatim by pushing it + // immediately. We do not expect deltas for this index. + content_blocks.push(other); + } + }, + MessagesStreamEvent::ContentBlockDelta { index, delta } => match delta { + MessagesContentDelta::TextDelta { text } => { + text_accum.entry(index).or_default().push_str(&text); + } + MessagesContentDelta::InputJsonDelta { partial_json } => { + if let Some((_, _, buf)) = tool_accum.get_mut(&index) { + buf.push_str(&partial_json); + } + } + // Thinking/signature deltas are surfaced to streaming + // clients but dropped from the non-streaming aggregate. + _ => {} + }, + MessagesStreamEvent::ContentBlockStop { .. } => {} + MessagesStreamEvent::MessageDelta { + delta, + usage: msg_usage, + } => { + let MessagesMessageDelta { + stop_reason: sr, + stop_sequence: ss, + } = delta; + stop_reason = sr; + stop_sequence = ss; + // The MessageDelta usage carries final output_tokens. + usage.output_tokens = msg_usage.output_tokens; + } + MessagesStreamEvent::MessageStop | MessagesStreamEvent::Ping => {} + }, + ClaudeCliEvent::Assistant { message } => { + last_assistant_message = Some(message); + } + ClaudeCliEvent::Result { + is_error, + result, + usage: result_usage, + .. + } => { + saw_result = true; + if is_error { + error_message = Some(result.unwrap_or_else(|| "Claude CLI failed".to_string())); + } + if let Some(u) = result_usage { + let merged: MessagesUsage = u.into(); + if merged.input_tokens > 0 { + usage.input_tokens = merged.input_tokens; + } + if merged.output_tokens > 0 { + usage.output_tokens = merged.output_tokens; + } + if merged.cache_creation_input_tokens.is_some() { + usage.cache_creation_input_tokens = merged.cache_creation_input_tokens; + } + if merged.cache_read_input_tokens.is_some() { + usage.cache_read_input_tokens = merged.cache_read_input_tokens; + } + } + } + ClaudeCliEvent::System { .. } + | ClaudeCliEvent::User { .. } + | ClaudeCliEvent::Unknown => {} + } + } + + if let Some(msg) = error_message { + return Err(ClaudeCliTranslationError::CliError { message: msg }); + } + if !saw_result { + return Err(ClaudeCliTranslationError::UnexpectedEnd); + } + + // Materialize accumulated blocks in the order they were started. + let mut sorted_indices = block_order.clone(); + sorted_indices.sort_by_key(|(idx, _)| *idx); + for (idx, kind) in sorted_indices { + match kind { + BlockKind::Text => { + if let Some(text) = text_accum.remove(&idx) { + content_blocks.push(MessagesContentBlock::Text { + text, + cache_control: None, + }); + } + } + BlockKind::ToolUse => { + if let Some((tool_id, name, raw_input)) = tool_accum.remove(&idx) { + let input_value = if raw_input.is_empty() { + Value::Object(Default::default()) + } else { + serde_json::from_str(&raw_input) + .unwrap_or_else(|_| Value::String(raw_input)) + }; + content_blocks.push(MessagesContentBlock::ToolUse { + id: tool_id, + name, + input: input_value, + cache_control: None, + }); + } + } + } + } + + // If the streaming events did not include any content but the CLI sent a + // final `assistant` message (common for short responses), use that as the + // body of the response. + if content_blocks.is_empty() { + if let Some(msg) = last_assistant_message { + for block in msg.content { + if let ClaudeCliContentBlock::Anthropic(b) = block { + content_blocks.push(b); + } + } + if id.is_empty() { + if let Some(msg_id) = msg.id { + id = msg_id; + } + } + if let Some(m) = msg.model { + if !m.is_empty() { + model_out = m; + } + } + if let Some(u) = msg.usage { + let merged: MessagesUsage = u.into(); + if usage.input_tokens == 0 { + usage.input_tokens = merged.input_tokens; + } + if usage.output_tokens == 0 { + usage.output_tokens = merged.output_tokens; + } + if usage.cache_creation_input_tokens.is_none() { + usage.cache_creation_input_tokens = merged.cache_creation_input_tokens; + } + if usage.cache_read_input_tokens.is_none() { + usage.cache_read_input_tokens = merged.cache_read_input_tokens; + } + } + } + } + + if id.is_empty() { + id = format!("msg_cli_{}", Uuid::new_v4().simple()); + } + + Ok(MessagesResponse { + id, + obj_type: "message".to_string(), + role: MessagesRole::Assistant, + content: content_blocks, + model: model_out, + stop_reason, + stop_sequence, + usage, + container: None, + }) +} + +#[derive(Clone, Copy)] +enum BlockKind { + Text, + ToolUse, +} + +/// Build an Anthropic-style error envelope JSON for a CLI-level failure. The +/// brightstaff bridge serializes this and returns it with a 502/500 status so +/// the existing `llm_gateway` error handling sees a familiar shape. +pub fn cli_error_to_anthropic_error_body(message: &str) -> Value { + json!({ + "type": "error", + "error": { + "type": "claude_cli_error", + "message": message, + } + }) +} + +/// Synthesize a `message_start` event for streaming clients in cases where +/// the CLI did not emit one (it usually does, but very small turns can skip +/// straight to `assistant`/`result`). +pub fn synthetic_message_start(model: &str, session_id: Option<&str>) -> MessagesStreamEvent { + let id = session_id + .map(|s| format!("msg_cli_{}", s)) + .unwrap_or_else(|| format!("msg_cli_{}", Uuid::new_v4().simple())); + MessagesStreamEvent::MessageStart { + message: MessagesStreamMessage { + id, + obj_type: "message".to_string(), + role: MessagesRole::Assistant, + content: Vec::new(), + model: model.to_string(), + stop_reason: None, + stop_sequence: None, + usage: MessagesUsage { + input_tokens: 0, + output_tokens: 0, + cache_creation_input_tokens: None, + cache_read_input_tokens: None, + }, + }, + } +} + +/// Convenience: parse one NDJSON line into a `ClaudeCliEvent`. Whitespace-only +/// lines deserialize to `None` so callers can simply skip them. +pub fn parse_ndjson_line(line: &str) -> Option> { + let trimmed = line.trim(); + if trimmed.is_empty() { + return None; + } + Some(serde_json::from_str(trimmed)) +} + +// Unused helper to keep MessagesMessage in scope in case future tool_result +// translation needs to reach into the message shape directly. +#[allow(dead_code)] +fn _touch_messages_message_type(_m: MessagesMessage) {} + +#[cfg(test)] +mod tests { + use super::*; + use crate::apis::anthropic::{MessagesMessage, MessagesMessageContent}; + + fn user_request(text: &str) -> MessagesRequest { + MessagesRequest { + model: "claude-cli/sonnet".to_string(), + messages: vec![MessagesMessage { + role: MessagesRole::User, + content: MessagesMessageContent::Single(text.to_string()), + }], + max_tokens: 1024, + container: None, + mcp_servers: None, + system: None, + metadata: None, + service_tier: None, + thinking: None, + temperature: None, + top_p: None, + top_k: None, + stream: Some(true), + stop_sequences: None, + tools: None, + tool_choice: None, + } + } + + #[test] + fn parses_system_init_event() { + let line = r#"{"type":"system","subtype":"init","session_id":"s1","model":"sonnet","cwd":"/tmp","tools":[]}"#; + let parsed = parse_ndjson_line(line).expect("non-empty").expect("ok"); + match parsed { + ClaudeCliEvent::System { + subtype, + session_id, + model, + .. + } => { + assert_eq!(subtype.as_deref(), Some("init")); + assert_eq!(session_id.as_deref(), Some("s1")); + assert_eq!(model.as_deref(), Some("sonnet")); + } + other => panic!("expected System, got {other:?}"), + } + } + + #[test] + fn parses_text_stream_event() { + let line = r#"{"type":"stream_event","event":{"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"hi"}}}"#; + let parsed = parse_ndjson_line(line).unwrap().unwrap(); + let translated = cli_event_to_messages_stream_event(&parsed) + .expect("text_delta should translate to MessagesStreamEvent"); + match translated { + MessagesStreamEvent::ContentBlockDelta { index, delta } => { + assert_eq!(index, 0); + match delta { + MessagesContentDelta::TextDelta { text } => assert_eq!(text, "hi"), + other => panic!("expected TextDelta, got {other:?}"), + } + } + other => panic!("expected ContentBlockDelta, got {other:?}"), + } + } + + #[test] + fn parses_result_success_event() { + let line = r#"{"type":"result","subtype":"success","is_error":false,"duration_ms":12,"num_turns":1,"result":"hi","total_cost_usd":0.001,"usage":{"input_tokens":4,"output_tokens":2},"session_id":"s1"}"#; + let parsed = parse_ndjson_line(line).unwrap().unwrap(); + match parsed { + ClaudeCliEvent::Result { + is_error, + result, + usage, + .. + } => { + assert!(!is_error); + assert_eq!(result.as_deref(), Some("hi")); + assert_eq!(usage.unwrap().output_tokens, Some(2)); + } + other => panic!("expected Result, got {other:?}"), + } + } + + #[test] + fn unknown_event_type_does_not_break_parser() { + let line = r#"{"type":"future_event_kind","data":{"foo":"bar"},"another":42}"#; + let parsed = parse_ndjson_line(line).unwrap().unwrap(); + assert!(matches!(parsed, ClaudeCliEvent::Unknown)); + } + + #[test] + fn stdin_payload_skips_assistant_turns() { + let mut req = user_request("hello"); + req.messages.push(MessagesMessage { + role: MessagesRole::Assistant, + content: MessagesMessageContent::Single("hi back".to_string()), + }); + req.messages.push(MessagesMessage { + role: MessagesRole::User, + content: MessagesMessageContent::Single("how are you?".to_string()), + }); + let payload = messages_request_to_stdin_payload(&req, Some("s1")).unwrap(); + assert_eq!(payload.len(), 2); + for ev in &payload { + match ev { + ClaudeCliInputEvent::User { + message, + session_id, + } => { + assert_eq!(message.role, "user"); + assert_eq!(session_id.as_deref(), Some("s1")); + } + } + } + } + + #[test] + fn collect_to_messages_response_aggregates_text() { + let events = vec![ + ClaudeCliEvent::StreamEvent { + event: MessagesStreamEvent::MessageStart { + message: MessagesStreamMessage { + id: "msg_1".to_string(), + obj_type: "message".to_string(), + role: MessagesRole::Assistant, + content: vec![], + model: "claude-sonnet-4-6".to_string(), + stop_reason: None, + stop_sequence: None, + usage: MessagesUsage { + input_tokens: 7, + output_tokens: 0, + cache_creation_input_tokens: None, + cache_read_input_tokens: None, + }, + }, + }, + }, + ClaudeCliEvent::StreamEvent { + event: MessagesStreamEvent::ContentBlockStart { + index: 0, + content_block: MessagesContentBlock::Text { + text: String::new(), + cache_control: None, + }, + }, + }, + ClaudeCliEvent::StreamEvent { + event: MessagesStreamEvent::ContentBlockDelta { + index: 0, + delta: MessagesContentDelta::TextDelta { + text: "Hello ".to_string(), + }, + }, + }, + ClaudeCliEvent::StreamEvent { + event: MessagesStreamEvent::ContentBlockDelta { + index: 0, + delta: MessagesContentDelta::TextDelta { + text: "world".to_string(), + }, + }, + }, + ClaudeCliEvent::StreamEvent { + event: MessagesStreamEvent::ContentBlockStop { index: 0 }, + }, + ClaudeCliEvent::StreamEvent { + event: MessagesStreamEvent::MessageDelta { + delta: MessagesMessageDelta { + stop_reason: MessagesStopReason::EndTurn, + stop_sequence: None, + }, + usage: MessagesUsage { + input_tokens: 0, + output_tokens: 12, + cache_creation_input_tokens: None, + cache_read_input_tokens: None, + }, + }, + }, + ClaudeCliEvent::StreamEvent { + event: MessagesStreamEvent::MessageStop, + }, + ClaudeCliEvent::Result { + subtype: Some("success".to_string()), + is_error: false, + duration_ms: Some(123), + num_turns: Some(1), + result: Some("Hello world".to_string()), + total_cost_usd: Some(0.001), + usage: Some(ClaudeCliUsage { + input_tokens: Some(7), + output_tokens: Some(12), + cache_creation_input_tokens: None, + cache_read_input_tokens: None, + }), + session_id: Some("s1".to_string()), + }, + ]; + + let resp = collect_to_messages_response("claude-cli/sonnet", events).unwrap(); + assert_eq!(resp.id, "msg_1"); + assert_eq!(resp.model, "claude-sonnet-4-6"); + assert_eq!(resp.usage.input_tokens, 7); + assert_eq!(resp.usage.output_tokens, 12); + assert!(matches!(resp.stop_reason, MessagesStopReason::EndTurn)); + match &resp.content[..] { + [MessagesContentBlock::Text { text, .. }] => assert_eq!(text, "Hello world"), + other => panic!("expected single Text block, got {other:?}"), + } + } + + #[test] + fn collect_to_messages_response_aggregates_tool_use() { + let events = vec![ + ClaudeCliEvent::StreamEvent { + event: MessagesStreamEvent::MessageStart { + message: MessagesStreamMessage { + id: "msg_2".to_string(), + obj_type: "message".to_string(), + role: MessagesRole::Assistant, + content: vec![], + model: "sonnet".to_string(), + stop_reason: None, + stop_sequence: None, + usage: MessagesUsage { + input_tokens: 1, + output_tokens: 0, + cache_creation_input_tokens: None, + cache_read_input_tokens: None, + }, + }, + }, + }, + ClaudeCliEvent::StreamEvent { + event: MessagesStreamEvent::ContentBlockStart { + index: 0, + content_block: MessagesContentBlock::ToolUse { + id: "toolu_1".to_string(), + name: "get_weather".to_string(), + input: Value::Null, + cache_control: None, + }, + }, + }, + ClaudeCliEvent::StreamEvent { + event: MessagesStreamEvent::ContentBlockDelta { + index: 0, + delta: MessagesContentDelta::InputJsonDelta { + partial_json: "{\"loc\":\"".to_string(), + }, + }, + }, + ClaudeCliEvent::StreamEvent { + event: MessagesStreamEvent::ContentBlockDelta { + index: 0, + delta: MessagesContentDelta::InputJsonDelta { + partial_json: "SF\"}".to_string(), + }, + }, + }, + ClaudeCliEvent::StreamEvent { + event: MessagesStreamEvent::ContentBlockStop { index: 0 }, + }, + ClaudeCliEvent::StreamEvent { + event: MessagesStreamEvent::MessageDelta { + delta: MessagesMessageDelta { + stop_reason: MessagesStopReason::ToolUse, + stop_sequence: None, + }, + usage: MessagesUsage { + input_tokens: 0, + output_tokens: 5, + cache_creation_input_tokens: None, + cache_read_input_tokens: None, + }, + }, + }, + ClaudeCliEvent::Result { + subtype: Some("success".to_string()), + is_error: false, + duration_ms: None, + num_turns: Some(1), + result: None, + total_cost_usd: None, + usage: None, + session_id: None, + }, + ]; + + let resp = collect_to_messages_response("sonnet", events).unwrap(); + assert!(matches!(resp.stop_reason, MessagesStopReason::ToolUse)); + match &resp.content[..] { + [MessagesContentBlock::ToolUse { + id, name, input, .. + }] => { + assert_eq!(id, "toolu_1"); + assert_eq!(name, "get_weather"); + assert_eq!(input["loc"], "SF"); + } + other => panic!("expected ToolUse block, got {other:?}"), + } + } + + #[test] + fn collect_to_messages_response_propagates_cli_error() { + let events = vec![ClaudeCliEvent::Result { + subtype: Some("error".to_string()), + is_error: true, + duration_ms: Some(5), + num_turns: Some(0), + result: Some("auth failed".to_string()), + total_cost_usd: None, + usage: None, + session_id: None, + }]; + let err = collect_to_messages_response("sonnet", events).unwrap_err(); + match err { + ClaudeCliTranslationError::CliError { message } => { + assert!(message.contains("auth failed")); + } + other => panic!("expected CliError, got {other:?}"), + } + } + + #[test] + fn collect_to_messages_response_unexpected_end() { + let events: Vec = vec![ClaudeCliEvent::StreamEvent { + event: MessagesStreamEvent::Ping, + }]; + let err = collect_to_messages_response("sonnet", events).unwrap_err(); + assert!(matches!(err, ClaudeCliTranslationError::UnexpectedEnd)); + } + + #[test] + fn collect_to_messages_response_uses_assistant_when_no_deltas() { + let assistant_msg = ClaudeCliAssistantMessage { + id: Some("msg_3".to_string()), + model: Some("sonnet".to_string()), + role: Some("assistant".to_string()), + content: vec![ClaudeCliContentBlock::Anthropic( + MessagesContentBlock::Text { + text: "ok".to_string(), + cache_control: None, + }, + )], + stop_reason: Some("end_turn".to_string()), + stop_sequence: None, + usage: Some(ClaudeCliUsage { + input_tokens: Some(2), + output_tokens: Some(1), + cache_creation_input_tokens: None, + cache_read_input_tokens: None, + }), + }; + let events = vec![ + ClaudeCliEvent::Assistant { + message: assistant_msg, + }, + ClaudeCliEvent::Result { + subtype: Some("success".to_string()), + is_error: false, + duration_ms: None, + num_turns: Some(1), + result: None, + total_cost_usd: None, + usage: None, + session_id: None, + }, + ]; + let resp = collect_to_messages_response("sonnet", events).unwrap(); + assert_eq!(resp.id, "msg_3"); + assert_eq!(resp.usage.input_tokens, 2); + assert_eq!(resp.usage.output_tokens, 1); + match &resp.content[..] { + [MessagesContentBlock::Text { text, .. }] => assert_eq!(text, "ok"), + other => panic!("expected Text, got {other:?}"), + } + } + + #[test] + fn extract_system_prompt_blocks_join_text() { + let req = MessagesRequest { + system: Some(MessagesSystemPrompt::Blocks(vec![ + MessagesContentBlock::Text { + text: "line 1".to_string(), + cache_control: None, + }, + MessagesContentBlock::Text { + text: "line 2".to_string(), + cache_control: None, + }, + ])), + ..user_request("ignored") + }; + assert_eq!( + extract_system_prompt(&req).as_deref(), + Some("line 1\nline 2") + ); + } + + #[test] + fn tool_result_content_round_trips_through_translation() { + // Sanity-check that ToolResultContent (used by future tool_result + // translation) stays linkable as the surface evolves. + use crate::apis::anthropic::ToolResultContent; + let _ = ToolResultContent::Text("noop".to_string()); + } +} diff --git a/crates/hermesllm/src/apis/mod.rs b/crates/hermesllm/src/apis/mod.rs index ea0563926..368ead2da 100644 --- a/crates/hermesllm/src/apis/mod.rs +++ b/crates/hermesllm/src/apis/mod.rs @@ -1,5 +1,6 @@ pub mod amazon_bedrock; pub mod anthropic; +pub mod claude_cli; pub mod openai; pub mod openai_responses; pub mod streaming_shapes; diff --git a/crates/hermesllm/src/bin/provider_models.yaml b/crates/hermesllm/src/bin/provider_models.yaml index 2e9e0a9b4..d4387b511 100644 --- a/crates/hermesllm/src/bin/provider_models.yaml +++ b/crates/hermesllm/src/bin/provider_models.yaml @@ -92,6 +92,19 @@ providers: - mistralai/mistral-embed - mistralai/codestral-embed - mistralai/codestral-embed-2505 + claude-cli: + # Family aliases (always resolve to the latest model in the family). + - claude-cli/sonnet + - claude-cli/opus + - claude-cli/haiku + # Dated full ids (sourced from the Claude Code model configuration article; + # refresh by re-fetching that doc whenever Anthropic ships new models). + - claude-cli/claude-opus-4-7 + - claude-cli/claude-sonnet-4-6 + - claude-cli/claude-opus-4-6 + - claude-cli/claude-opus-4-5-20251101 + - claude-cli/claude-haiku-4-5-20251001 + - claude-cli/claude-sonnet-4-5-20250929 anthropic: - anthropic/claude-sonnet-4-6 - anthropic/claude-opus-4-6 diff --git a/crates/hermesllm/src/providers/id.rs b/crates/hermesllm/src/providers/id.rs index 4fa7d19d1..3ad0a2cb3 100644 --- a/crates/hermesllm/src/providers/id.rs +++ b/crates/hermesllm/src/providers/id.rs @@ -48,6 +48,11 @@ pub enum ProviderId { DigitalOcean, Vercel, OpenRouter, + /// Claude Code CLI invoked as a local subprocess by brightstaff. On the + /// wire it speaks the Anthropic Messages API exactly like + /// [`ProviderId::Anthropic`]; the difference is that no Anthropic API key + /// or network call is involved — the local `claude` binary is. + ClaudeCli, } impl TryFrom<&str> for ProviderId { @@ -81,6 +86,8 @@ impl TryFrom<&str> for ProviderId { "do_ai" => Ok(ProviderId::DigitalOcean), // alias "vercel" => Ok(ProviderId::Vercel), "openrouter" => Ok(ProviderId::OpenRouter), + "claude-cli" => Ok(ProviderId::ClaudeCli), + "claude_cli" => Ok(ProviderId::ClaudeCli), // alias _ => Err(format!("Unknown provider: {}", value)), } } @@ -107,6 +114,7 @@ impl ProviderId { ProviderId::Qwen => "qwen", ProviderId::ChatGPT => "chatgpt", ProviderId::DigitalOcean => "digitalocean", + ProviderId::ClaudeCli => "claude-cli", _ => return Vec::new(), }; @@ -144,6 +152,14 @@ impl ProviderId { SupportedUpstreamAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions) } + // ClaudeCli speaks the same wire protocol as Anthropic — the + // brightstaff bridge always presents itself as an Anthropic + // Messages API endpoint, so client requests in any shape get + // converted to AnthropicMessagesAPI on the way out. + (ProviderId::ClaudeCli, _) => { + SupportedUpstreamAPIs::AnthropicMessagesAPI(AnthropicApi::Messages) + } + // Vercel AI Gateway natively supports all three API types (ProviderId::Vercel, SupportedAPIsFromClient::AnthropicMessagesAPI(_)) => { SupportedUpstreamAPIs::AnthropicMessagesAPI(AnthropicApi::Messages) @@ -267,6 +283,7 @@ impl Display for ProviderId { ProviderId::DigitalOcean => write!(f, "digitalocean"), ProviderId::Vercel => write!(f, "vercel"), ProviderId::OpenRouter => write!(f, "openrouter"), + ProviderId::ClaudeCli => write!(f, "claude-cli"), } } } diff --git a/crates/hermesllm/tests/claude_cli_fixtures.rs b/crates/hermesllm/tests/claude_cli_fixtures.rs new file mode 100644 index 000000000..3ac335c4e --- /dev/null +++ b/crates/hermesllm/tests/claude_cli_fixtures.rs @@ -0,0 +1,114 @@ +//! End-to-end fixture tests for `apis::claude_cli`. Each NDJSON file under +//! `tests/fixtures/claude_cli/` represents one canned subprocess output. We +//! parse it line-by-line and feed it through the same translation entry points +//! the brightstaff bridge uses at runtime. + +use std::fs; +use std::path::PathBuf; + +use hermesllm::apis::anthropic::{ + MessagesContentBlock, MessagesContentDelta, MessagesStopReason, MessagesStreamEvent, +}; +use hermesllm::apis::claude_cli::{ + cli_event_to_messages_stream_event, collect_to_messages_response, parse_ndjson_line, + ClaudeCliEvent, ClaudeCliTranslationError, +}; + +fn fixture_path(name: &str) -> PathBuf { + PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("tests") + .join("fixtures") + .join("claude_cli") + .join(name) +} + +fn load_events(name: &str) -> Vec { + let body = fs::read_to_string(fixture_path(name)) + .unwrap_or_else(|e| panic!("read fixture {name}: {e}")); + body.lines() + .filter_map(|line| parse_ndjson_line(line).map(|r| r.unwrap_or_else(|e| panic!("{e}")))) + .collect() +} + +#[test] +fn text_response_aggregates_into_messages_response() { + let events = load_events("text_response.ndjson"); + let resp = collect_to_messages_response("claude-cli/sonnet", events.clone()).unwrap(); + assert_eq!(resp.id, "msg_01ABC"); + assert_eq!(resp.model, "claude-sonnet-4-6"); + assert_eq!(resp.usage.input_tokens, 12); + assert_eq!(resp.usage.output_tokens, 4); + assert!(matches!(resp.stop_reason, MessagesStopReason::EndTurn)); + match &resp.content[..] { + [MessagesContentBlock::Text { text, .. }] => assert_eq!(text, "Hello, world!"), + other => panic!("expected single Text, got {other:?}"), + } + + // Verify the streaming projection emits exactly the events the Anthropic + // SSE wire protocol expects, in order. + let stream: Vec = events + .iter() + .filter_map(cli_event_to_messages_stream_event) + .collect(); + assert!(matches!( + stream[0], + MessagesStreamEvent::MessageStart { .. } + )); + let final_event = stream.last().unwrap(); + assert!(matches!(final_event, MessagesStreamEvent::MessageStop)); + let text_deltas = stream + .iter() + .filter_map(|ev| match ev { + MessagesStreamEvent::ContentBlockDelta { + delta: MessagesContentDelta::TextDelta { text }, + .. + } => Some(text.clone()), + _ => None, + }) + .collect::>() + .join(""); + assert_eq!(text_deltas, "Hello, world!"); +} + +#[test] +fn tool_use_response_assembles_partial_json() { + let events = load_events("tool_use_response.ndjson"); + let resp = collect_to_messages_response("sonnet", events).unwrap(); + assert!(matches!(resp.stop_reason, MessagesStopReason::ToolUse)); + match &resp.content[..] { + [MessagesContentBlock::ToolUse { + id, name, input, .. + }] => { + assert_eq!(id, "toolu_W"); + assert_eq!(name, "get_weather"); + assert_eq!(input["city"], "Seattle"); + } + other => panic!("expected single ToolUse block, got {other:?}"), + } +} + +#[test] +fn error_response_returns_cli_error() { + let events = load_events("error_response.ndjson"); + let err = collect_to_messages_response("sonnet", events).unwrap_err(); + match err { + ClaudeCliTranslationError::CliError { message } => { + assert!( + message.contains("529"), + "expected 529 in error message, got: {message}" + ); + } + other => panic!("expected CliError, got {other:?}"), + } +} + +#[test] +fn retry_then_success_is_treated_as_success() { + let events = load_events("retry_then_success.ndjson"); + let resp = collect_to_messages_response("sonnet", events).unwrap(); + assert!(matches!(resp.stop_reason, MessagesStopReason::EndTurn)); + match &resp.content[..] { + [MessagesContentBlock::Text { text, .. }] => assert_eq!(text, "ok"), + other => panic!("expected Text block, got {other:?}"), + } +} diff --git a/crates/hermesllm/tests/fixtures/claude_cli/error_response.ndjson b/crates/hermesllm/tests/fixtures/claude_cli/error_response.ndjson new file mode 100644 index 000000000..bb2d5a384 --- /dev/null +++ b/crates/hermesllm/tests/fixtures/claude_cli/error_response.ndjson @@ -0,0 +1,3 @@ +{"type":"system","subtype":"init","session_id":"err-1","model":"sonnet","cwd":"/tmp","tools":[]} +{"type":"system","subtype":"api_retry","attempt":1,"reason":"529 overloaded"} +{"type":"result","subtype":"error","is_error":true,"duration_ms":1200,"num_turns":0,"result":"Anthropic API returned 529 after 3 retries","total_cost_usd":0,"session_id":"err-1"} diff --git a/crates/hermesllm/tests/fixtures/claude_cli/retry_then_success.ndjson b/crates/hermesllm/tests/fixtures/claude_cli/retry_then_success.ndjson new file mode 100644 index 000000000..c8986b8c1 --- /dev/null +++ b/crates/hermesllm/tests/fixtures/claude_cli/retry_then_success.ndjson @@ -0,0 +1,10 @@ +{"type":"system","subtype":"init","session_id":"retry-1","model":"sonnet","cwd":"/tmp","tools":[]} +{"type":"system","subtype":"api_retry","attempt":1,"reason":"529 overloaded"} +{"type":"system","subtype":"rate_limit_event","reset_at":"2026-05-04T18:30:00Z"} +{"type":"stream_event","event":{"type":"message_start","message":{"id":"msg_retry","type":"message","role":"assistant","content":[],"model":"sonnet","stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":3,"output_tokens":0}}}} +{"type":"stream_event","event":{"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}} +{"type":"stream_event","event":{"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"ok"}}} +{"type":"stream_event","event":{"type":"content_block_stop","index":0}} +{"type":"stream_event","event":{"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":1}}} +{"type":"stream_event","event":{"type":"message_stop"}} +{"type":"result","subtype":"success","is_error":false,"duration_ms":2100,"num_turns":1,"result":"ok","total_cost_usd":0.00009,"usage":{"input_tokens":3,"output_tokens":1},"session_id":"retry-1"} diff --git a/crates/hermesllm/tests/fixtures/claude_cli/text_response.ndjson b/crates/hermesllm/tests/fixtures/claude_cli/text_response.ndjson new file mode 100644 index 000000000..52e56fee7 --- /dev/null +++ b/crates/hermesllm/tests/fixtures/claude_cli/text_response.ndjson @@ -0,0 +1,10 @@ +{"type":"system","subtype":"init","session_id":"a1b2c3","model":"claude-sonnet-4-6","cwd":"/tmp","tools":["Bash","Read"]} +{"type":"stream_event","event":{"type":"message_start","message":{"id":"msg_01ABC","type":"message","role":"assistant","content":[],"model":"claude-sonnet-4-6","stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":12,"output_tokens":0}}}} +{"type":"stream_event","event":{"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}} +{"type":"stream_event","event":{"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Hello"}}} +{"type":"stream_event","event":{"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":", world!"}}} +{"type":"stream_event","event":{"type":"content_block_stop","index":0}} +{"type":"stream_event","event":{"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":4}}} +{"type":"stream_event","event":{"type":"message_stop"}} +{"type":"assistant","message":{"id":"msg_01ABC","type":"message","role":"assistant","model":"claude-sonnet-4-6","content":[{"type":"text","text":"Hello, world!"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":12,"output_tokens":4}}} +{"type":"result","subtype":"success","is_error":false,"duration_ms":521,"num_turns":1,"result":"Hello, world!","total_cost_usd":0.00012,"usage":{"input_tokens":12,"output_tokens":4},"session_id":"a1b2c3"} diff --git a/crates/hermesllm/tests/fixtures/claude_cli/tool_use_response.ndjson b/crates/hermesllm/tests/fixtures/claude_cli/tool_use_response.ndjson new file mode 100644 index 000000000..c5fd208f1 --- /dev/null +++ b/crates/hermesllm/tests/fixtures/claude_cli/tool_use_response.ndjson @@ -0,0 +1,9 @@ +{"type":"system","subtype":"init","session_id":"tool-1","model":"sonnet","cwd":"/tmp","tools":[]} +{"type":"stream_event","event":{"type":"message_start","message":{"id":"msg_tool","type":"message","role":"assistant","content":[],"model":"sonnet","stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":20,"output_tokens":0}}}} +{"type":"stream_event","event":{"type":"content_block_start","index":0,"content_block":{"type":"tool_use","id":"toolu_W","name":"get_weather","input":{}}}} +{"type":"stream_event","event":{"type":"content_block_delta","index":0,"delta":{"type":"input_json_delta","partial_json":"{\"city\":\""}}} +{"type":"stream_event","event":{"type":"content_block_delta","index":0,"delta":{"type":"input_json_delta","partial_json":"Seattle\"}"}}} +{"type":"stream_event","event":{"type":"content_block_stop","index":0}} +{"type":"stream_event","event":{"type":"message_delta","delta":{"stop_reason":"tool_use","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":7}}} +{"type":"stream_event","event":{"type":"message_stop"}} +{"type":"result","subtype":"success","is_error":false,"duration_ms":701,"num_turns":1,"result":null,"total_cost_usd":0.00021,"usage":{"input_tokens":20,"output_tokens":7},"session_id":"tool-1"} diff --git a/demos/integrations/claude_cli/README.md b/demos/integrations/claude_cli/README.md new file mode 100644 index 000000000..e479e1445 --- /dev/null +++ b/demos/integrations/claude_cli/README.md @@ -0,0 +1,49 @@ +# Claude Code CLI as a Plano provider + +This demo wires the locally installed `claude` binary as a Plano +`model_provider`. The single line under `model_providers:` + +```yaml +model_providers: + - model: claude-cli/* + default: true +``` + +is enough to: + +1. Auto-fill `provider_interface: claude-cli`, `base_url: http://127.0.0.1:14001` + and a placeholder `access_key` (the CLI uses its own login keychain). +2. Start a localhost bridge inside `brightstaff` that spawns `claude -p + --output-format stream-json --input-format stream-json` for each + conversation. +3. Expose every Claude Code model — `claude-cli/sonnet`, `claude-cli/opus`, + `claude-cli/haiku`, plus dated full ids — at `GET /v1/models`. + +## Running + +```bash +# Make sure the CLI is logged in. You can use API krey billing or a paid Claude subscription. +claude auth login + +# Start Plano in native mode. +planoai up demos/integrations/claude_cli/config.yaml +``` + +Then point any OpenAI- or Anthropic-style client at `http://localhost:12000` +and pick any `claude-cli/...` model. Plano routes the request through Envoy +to the brightstaff bridge, which asks the local `claude` binary to handle +it. + +## Optional overrides + +Set these env vars before `planoai up` if you need to tweak the bridge: + + +| Env var | Default | Meaning | +| ----------------------------- | ------------------- | -------------------------------------- | +| `CLAUDE_CLI_BIN` | `claude` | Path to the CLI binary. | +| `CLAUDE_CLI_PERMISSION_MODE` | `bypassPermissions` | `--permission-mode` flag value. | +| `CLAUDE_CLI_LISTEN_ADDR` | `127.0.0.1:14001` | Bridge listen address. | +| `CLAUDE_CLI_SESSION_TTL_SECS` | `600` | Idle TTL before a child is killed. | +| `CLAUDE_CLI_WATCHDOG_SECS` | `120` | Per-line watchdog inside one CLI turn. | +| `CLAUDE_CLI_MAX_SESSIONS` | `64` | Hard cap on concurrent CLI children. | diff --git a/demos/integrations/claude_cli/config.yaml b/demos/integrations/claude_cli/config.yaml new file mode 100644 index 000000000..fe30a4905 --- /dev/null +++ b/demos/integrations/claude_cli/config.yaml @@ -0,0 +1,27 @@ +version: v0.4.0 + +# Claude Code CLI as a Plano model_provider. +# +# The single line below is everything you need: Plano detects the +# `claude-cli/*` namespace, auto-fills the provider_interface, base_url and +# placeholder access_key, and starts a localhost bridge inside brightstaff +# that shells out to the `claude` binary on your $PATH for each request. +# +# Requirements: +# - `claude --version` must work in the same shell as `planoai up`. +# - Auth happens via the CLI's own `claude auth login` (no API key needed +# in Plano). +# +# Optional overrides via env (set before `planoai up`): +# CLAUDE_CLI_BIN=/custom/path/to/claude +# CLAUDE_CLI_PERMISSION_MODE=default +# CLAUDE_CLI_LISTEN_ADDR=127.0.0.1:14001 + +listeners: + - type: model + name: model_listener + port: 12000 + +model_providers: + - model: claude-cli/* + default: true From fc0ccfb4163d9c3da57172f41408ba2591a00477 Mon Sep 17 00:00:00 2001 From: Spherrrical Date: Mon, 4 May 2026 13:09:17 -0700 Subject: [PATCH 02/11] fix(claude-cli): keep upstream path as /v1/messages for ClaudeCli target_endpoint_for_provider was rewriting the upstream path to /v1/chat/completions for any provider that wasn't Anthropic/Vercel, which made Plano POST /v1/chat/completions to the brightstaff bridge. The bridge only accepts POST /v1/messages, so it returned a plain "not found" 404 to the client. Treat ClaudeCli the same as Anthropic for path selection (and force /v1/messages even when the client framed the request as OpenAI Chat Completions or Responses, since the bridge always speaks Anthropic Messages on the wire). --- crates/hermesllm/src/clients/endpoints.rs | 41 ++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/crates/hermesllm/src/clients/endpoints.rs b/crates/hermesllm/src/clients/endpoints.rs index eeef88565..d57196169 100644 --- a/crates/hermesllm/src/clients/endpoints.rs +++ b/crates/hermesllm/src/clients/endpoints.rs @@ -175,7 +175,10 @@ impl SupportedAPIsFromClient { match self { SupportedAPIsFromClient::AnthropicMessagesAPI(AnthropicApi::Messages) => { match provider_id { - ProviderId::Anthropic | ProviderId::Vercel => { + // ClaudeCli speaks Anthropic Messages on the wire (the + // brightstaff bridge only accepts `POST /v1/messages`), + // so keep the path as-is just like the real Anthropic. + ProviderId::Anthropic | ProviderId::Vercel | ProviderId::ClaudeCli => { build_endpoint("/v1", "/messages") } ProviderId::AmazonBedrock => { @@ -198,11 +201,18 @@ impl SupportedAPIsFromClient { | ProviderId::XAI | ProviderId::ChatGPT | ProviderId::Vercel => route_by_provider("/responses"), + // ClaudeCli: bridge only accepts Anthropic Messages. + ProviderId::ClaudeCli => build_endpoint("/v1", "/messages"), // All other providers: translate to /chat/completions _ => route_by_provider("/chat/completions"), } } SupportedAPIsFromClient::OpenAIChatCompletions(_) => { + // ClaudeCli: bridge only accepts Anthropic Messages, regardless + // of how the client framed the request. + if matches!(provider_id, ProviderId::ClaudeCli) { + return build_endpoint("/v1", "/messages"); + } // For Chat Completions API, use the standard chat/completions path route_by_provider("/chat/completions") } @@ -633,6 +643,35 @@ mod tests { ); } + /// The brightstaff `claude-cli` bridge only accepts `POST /v1/messages`. + /// Make sure that no matter how a client framed the request, the upstream + /// path stays `/v1/messages`. + #[test] + fn test_claude_cli_endpoint_always_v1_messages() { + for client_api in [ + SupportedAPIsFromClient::AnthropicMessagesAPI(AnthropicApi::Messages), + SupportedAPIsFromClient::OpenAIChatCompletions(OpenAIApi::ChatCompletions), + SupportedAPIsFromClient::OpenAIResponsesAPI(OpenAIApi::Responses), + ] { + for request_path in ["/v1/messages", "/v1/chat/completions", "/v1/responses"] { + assert_eq!( + client_api.target_endpoint_for_provider( + &ProviderId::ClaudeCli, + request_path, + "claude-cli/sonnet", + false, + None, + false + ), + "/v1/messages", + "client_api={:?} request_path={} should map to /v1/messages", + client_api, + request_path, + ); + } + } + } + #[test] fn test_non_v1_request_paths() { let api = SupportedAPIsFromClient::OpenAIChatCompletions(OpenAIApi::ChatCompletions); From 56006f07698343853b2179126410ff8bd3aa306a Mon Sep 17 00:00:00 2001 From: Spherrrical Date: Mon, 4 May 2026 13:16:11 -0700 Subject: [PATCH 03/11] chore(claude-cli): tweak demo config (full tracing, drop default flag) --- demos/integrations/claude_cli/config.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/demos/integrations/claude_cli/config.yaml b/demos/integrations/claude_cli/config.yaml index fe30a4905..13e9f90c4 100644 --- a/demos/integrations/claude_cli/config.yaml +++ b/demos/integrations/claude_cli/config.yaml @@ -24,4 +24,6 @@ listeners: model_providers: - model: claude-cli/* - default: true + +tracing: + random_sampling: 100 From 53a23ec8f9856d95116bbf6b084f983842bef7c2 Mon Sep 17 00:00:00 2001 From: Spherrrical Date: Mon, 4 May 2026 13:35:11 -0700 Subject: [PATCH 04/11] refactor(claude-cli): make session bookkeeping sync, drop lock-across-await - Convert ClaudeProcess::last_used from tokio::sync::Mutex to std::sync::Mutex: the critical section is one Copy read/write with no .await, so a sync mutex lets SessionManager iterate sessions without holding the map lock across an await point. Fixes the lock-across-await pattern in lru_session_id and evict_idle. - Simplify SessionManager::get_or_spawn to a single map-lock acquisition on the fast path; only release the lock for the rare case where we need to await a victim shutdown before spawning. - Replace the hand-rolled "deterministic UUID via DefaultHasher" with a real UUIDv5 over the OID namespace (uuid feature `v5`). Stable across Rust toolchain versions, unlike SipHash, and matches what the doc on the helper claimed all along. - Introduce ProcessError::MissingStdio { which } so spawns where Stdio::piped() somehow returned None surface as their own programmer- error variant rather than masquerading as ExitedEarly. - Delete the dead is_zero() helper. --- crates/Cargo.lock | 1 + crates/brightstaff/Cargo.toml | 2 +- .../src/handlers/claude_cli/process.rs | 50 ++++- .../src/handlers/claude_cli/session.rs | 188 ++++++++---------- 4 files changed, 127 insertions(+), 114 deletions(-) diff --git a/crates/Cargo.lock b/crates/Cargo.lock index c5819de90..7001e8103 100644 --- a/crates/Cargo.lock +++ b/crates/Cargo.lock @@ -4106,6 +4106,7 @@ dependencies = [ "getrandom 0.4.2", "js-sys", "serde_core", + "sha1_smol", "wasm-bindgen", ] diff --git a/crates/brightstaff/Cargo.toml b/crates/brightstaff/Cargo.toml index 0b62c313c..ac17c9a6f 100644 --- a/crates/brightstaff/Cargo.toml +++ b/crates/brightstaff/Cargo.toml @@ -60,7 +60,7 @@ time = { version = "0.3", features = ["formatting", "macros"] } tracing = "0.1" tracing-opentelemetry = "0.32.1" tracing-subscriber = { version = "0.3", features = ["env-filter"] } -uuid = { version = "1.0", features = ["v4", "serde"] } +uuid = { version = "1.0", features = ["v4", "v5", "serde"] } [dev-dependencies] mockito = "1.0" diff --git a/crates/brightstaff/src/handlers/claude_cli/process.rs b/crates/brightstaff/src/handlers/claude_cli/process.rs index 6a19943cc..e93642fa0 100644 --- a/crates/brightstaff/src/handlers/claude_cli/process.rs +++ b/crates/brightstaff/src/handlers/claude_cli/process.rs @@ -4,7 +4,7 @@ //! and stream-json lives in `hermesllm::apis::claude_cli`. use std::process::Stdio; -use std::sync::Arc; +use std::sync::{Arc, Mutex as StdMutex}; use std::time::Duration; use hermesllm::apis::claude_cli::{parse_ndjson_line, ClaudeCliEvent, ClaudeCliInputEvent}; @@ -56,6 +56,12 @@ pub enum ProcessError { StdinWrite(#[source] std::io::Error), #[error("claude process exited unexpectedly")] ExitedEarly, + /// `Command::spawn` succeeded but a piped stdio handle was already taken + /// by the time we asked for it. Should be unreachable given we set + /// `Stdio::piped()` immediately before spawn; surfaced as its own variant + /// so callers can tell it apart from a real "exited early". + #[error("claude child is missing piped {which} after spawn")] + MissingStdio { which: &'static str }, #[error("claude watchdog fired after {0:?} of silence")] WatchdogTimeout(Duration), #[error("failed to serialize stdin payload: {0}")] @@ -94,8 +100,11 @@ pub struct ClaudeProcess { event_rx: Arc>>, config: ClaudeCliConfig, /// Last time a request was served on this session — used by the session - /// manager to enforce the idle TTL. - last_used: Mutex, + /// manager to enforce the idle TTL. Held under a sync mutex because the + /// critical section is one read/write of a `Copy` value with no `.await`, + /// which keeps `SessionManager` callers from holding the session-map lock + /// across an async hop. + last_used: StdMutex, pub session_id: String, } @@ -148,9 +157,18 @@ impl ClaudeProcess { source: e, })?; - let stdin = child.stdin.take().ok_or(ProcessError::ExitedEarly)?; - let stdout = child.stdout.take().ok_or(ProcessError::ExitedEarly)?; - let stderr = child.stderr.take().ok_or(ProcessError::ExitedEarly)?; + let stdin = child + .stdin + .take() + .ok_or(ProcessError::MissingStdio { which: "stdin" })?; + let stdout = child + .stdout + .take() + .ok_or(ProcessError::MissingStdio { which: "stdout" })?; + let stderr = child + .stderr + .take() + .ok_or(ProcessError::MissingStdio { which: "stderr" })?; // Bounded channel — backpressure if the consumer is slow, but large // enough that bursts of small text deltas do not block stdout drain. @@ -217,7 +235,7 @@ impl ClaudeProcess { stdin: Mutex::new(Some(stdin)), event_rx: Arc::new(Mutex::new(rx)), config, - last_used: Mutex::new(Instant::now()), + last_used: StdMutex::new(Instant::now()), session_id, })) } @@ -232,7 +250,10 @@ impl ClaudeProcess { &self, events: &[ClaudeCliInputEvent], ) -> Result { - *self.last_used.lock().await = Instant::now(); + // Sync lock + Copy value; never held across an `.await`. + if let Ok(mut last) = self.last_used.lock() { + *last = Instant::now(); + } // Claim the event receiver for the lifetime of this turn. let rx_guard = Arc::clone(&self.event_rx) @@ -259,8 +280,17 @@ impl ClaudeProcess { } /// Most-recent activity timestamp; used by the session manager's reaper. - pub async fn last_used(&self) -> Instant { - *self.last_used.lock().await + /// Sync because the lock guards a single `Instant` with no `.await` in + /// the critical section — keeps callers from holding async locks across + /// an await point. + pub fn last_used(&self) -> Instant { + // Poisoning is impossible here (the only writer is `send_user_turn` + // which never panics while holding the lock), but if it ever happens + // we degrade gracefully rather than aborting. + self.last_used + .lock() + .map(|g| *g) + .unwrap_or_else(|p| *p.into_inner()) } /// Forcefully terminate the child. Safe to call multiple times. diff --git a/crates/brightstaff/src/handlers/claude_cli/session.rs b/crates/brightstaff/src/handlers/claude_cli/session.rs index cd664cab1..3b102345a 100644 --- a/crates/brightstaff/src/handlers/claude_cli/session.rs +++ b/crates/brightstaff/src/handlers/claude_cli/session.rs @@ -3,10 +3,8 @@ //! long-lived `ClaudeProcess`. Enforces an idle TTL and a hard cap on the //! number of concurrent sessions. -use std::collections::{hash_map::DefaultHasher, HashMap}; -use std::hash::{Hash, Hasher}; +use std::collections::HashMap; use std::sync::Arc; -use std::time::Duration; use hermesllm::apis::anthropic::{ MessagesContentBlock, MessagesMessageContent, MessagesRequest, MessagesRole, @@ -75,15 +73,19 @@ impl SessionManager { return uuid_from_seed(trimmed); } } - let mut hasher = DefaultHasher::new(); - req.model.hash(&mut hasher); + // Build a deterministic seed from (model, system_prompt, first user + // message) so a retried conversation lands on the same session. + let mut seed = String::new(); + seed.push_str(&req.model); + seed.push('\u{1f}'); if let Some(system) = &req.system { - system_text(system).hash(&mut hasher); + seed.push_str(&system_text(system)); } + seed.push('\u{1f}'); if let Some(first) = first_user_message_text(req) { - first.hash(&mut hasher); + seed.push_str(&first); } - uuid_from_seed(&hasher.finish().to_string()) + uuid_from_seed(&seed) } /// Get the existing session's process or spawn a new one. @@ -98,40 +100,61 @@ impl SessionManager { // background task for the common one-developer-one-laptop deployment. self.evict_idle().await; - { - let map = self.inner.lock().await; - if let Some(existing) = map.get(session_id) { - debug!(session = %session_id, "reusing claude-cli session"); - return Ok(Arc::clone(existing)); - } - } - + // Single lock acquisition for the whole get-or-spawn path. `last_used` + // is now a sync mutex on `ClaudeProcess`, so iterating to find the + // LRU victim does not block other tasks across an `.await`. let mut map = self.inner.lock().await; + if let Some(existing) = map.get(session_id) { + debug!(session = %session_id, "reusing claude-cli session"); return Ok(Arc::clone(existing)); } - if map.len() >= self.config.max_sessions { - // Evict the least-recently-used session to keep the cap honest. - if let Some(victim_key) = lru_session_id(&map).await { - if let Some(victim) = map.remove(&victim_key) { - info!(session = %victim_key, "evicting LRU claude-cli session to make room"); - drop(map); - victim.shutdown().await; - map = self.inner.lock().await; - } - } - } + // If we are at the cap, take an LRU victim out of the map first so + // its slot is freed before we insert. We drop the lock for the + // shutdown await (killing a child can take a tick), accepting that + // the cap can drift by one if a concurrent task spawns in that + // window — the next reap will catch it. + let victim = if map.len() >= self.config.max_sessions { + let victim_key = lru_session_id(&map); + victim_key.and_then(|k| map.remove(&k).map(|v| (k, v))) + } else { + None + }; + + // Spawn outside of any lock if we have to wait on a victim shutdown. + let process = if let Some((victim_key, victim_proc)) = victim { + drop(map); + info!(session = %victim_key, "evicting LRU claude-cli session to make room"); + victim_proc.shutdown().await; + let process = ClaudeProcess::spawn( + session_id.to_string(), + model, + system_prompt, + cwd, + self.config.process.clone(), + ) + .await?; + self.inner + .lock() + .await + .insert(session_id.to_string(), Arc::clone(&process)); + process + } else { + // No eviction needed — keep holding the map lock across spawn so + // we don't race with another caller resolving the same id. + let process = ClaudeProcess::spawn( + session_id.to_string(), + model, + system_prompt, + cwd, + self.config.process.clone(), + ) + .await?; + map.insert(session_id.to_string(), Arc::clone(&process)); + process + }; - let process = ClaudeProcess::spawn( - session_id.to_string(), - model, - system_prompt, - cwd, - self.config.process.clone(), - ) - .await?; - map.insert(session_id.to_string(), Arc::clone(&process)); Ok(process) } @@ -152,23 +175,21 @@ impl SessionManager { return; } let now = Instant::now(); - let mut to_kill: Vec<(String, Arc)> = Vec::new(); - { - let map = self.inner.lock().await; - for (k, v) in map.iter() { - if now.duration_since(v.last_used().await) > ttl { - to_kill.push((k.clone(), Arc::clone(v))); - } - } - } - if to_kill.is_empty() { - return; - } - let mut map = self.inner.lock().await; - for (k, _) in &to_kill { - map.remove(k); - } - drop(map); + + // Collect victims under a single lock acquisition; `last_used()` is + // sync, so the iteration never crosses an `.await`. + let to_kill: Vec<(String, Arc)> = { + let mut map = self.inner.lock().await; + let keys: Vec = map + .iter() + .filter(|(_, v)| now.duration_since(v.last_used()) > ttl) + .map(|(k, _)| k.clone()) + .collect(); + keys.into_iter() + .filter_map(|k| map.remove(&k).map(|v| (k, v))) + .collect() + }; + for (k, proc) in to_kill { info!(session = %k, "evicting idle claude-cli session"); proc.shutdown().await; @@ -176,16 +197,12 @@ impl SessionManager { } } -async fn lru_session_id(map: &HashMap>) -> Option { - let mut oldest: Option<(String, Instant)> = None; - for (k, v) in map.iter() { - let used = v.last_used().await; - match &oldest { - Some((_, t)) if *t < used => {} - _ => oldest = Some((k.clone(), used)), - } - } - oldest.map(|(k, _)| k) +/// Pick the least-recently-used session id from the map. Sync because +/// `ClaudeProcess::last_used` is sync. +fn lru_session_id(map: &HashMap>) -> Option { + map.iter() + .min_by_key(|(_, v)| v.last_used()) + .map(|(k, _)| k.clone()) } fn first_user_message_text(req: &MessagesRequest) -> Option { @@ -222,47 +239,12 @@ fn system_text(system: &MessagesSystemPrompt) -> String { } } -/// Deterministic v5-style UUID derived from an arbitrary seed string. The -/// `claude` CLI requires `--session-id` to be a valid UUID; we use the DNS -/// namespace constant as a stable salt so the same conversation always maps -/// to the same id without us pulling in the v5 feature of the `uuid` crate. +/// Deterministic UUIDv5 derived from an arbitrary seed string. The `claude` +/// CLI requires `--session-id` to be a valid UUID; v5 (SHA-1 based) gives +/// us a stable mapping across Rust toolchain versions, unlike `DefaultHasher`. +/// We use the OID namespace because the seed isn't a DNS or URL name. fn uuid_from_seed(seed: &str) -> String { - let mut hasher = DefaultHasher::new(); - seed.hash(&mut hasher); - let h1 = hasher.finish(); - let mut hasher2 = DefaultHasher::new(); - h1.hash(&mut hasher2); - seed.hash(&mut hasher2); - let h2 = hasher2.finish(); - let bytes = [ - (h1 >> 56) as u8, - (h1 >> 48) as u8, - (h1 >> 40) as u8, - (h1 >> 32) as u8, - (h1 >> 24) as u8, - (h1 >> 16) as u8, - (h1 >> 8) as u8, - h1 as u8, - (h2 >> 56) as u8, - (h2 >> 48) as u8, - (h2 >> 40) as u8, - (h2 >> 32) as u8, - (h2 >> 24) as u8, - (h2 >> 16) as u8, - (h2 >> 8) as u8, - h2 as u8, - ]; - uuid::Builder::from_random_bytes(bytes) - .into_uuid() - .to_string() -} - -/// `Duration::is_zero` shim — `Duration` exposes `is_zero` only on stable -/// 1.53+, but our MSRV already covers that. Re-exporting keeps call sites -/// terse if we ever need to swap implementations. -#[allow(dead_code)] -fn is_zero(d: Duration) -> bool { - d.is_zero() + uuid::Uuid::new_v5(&uuid::Uuid::NAMESPACE_OID, seed.as_bytes()).to_string() } #[cfg(test)] From 3c581853895fe46318413292844d295e4fa6ac5c Mon Sep 17 00:00:00 2001 From: Spherrrical Date: Mon, 4 May 2026 13:35:48 -0700 Subject: [PATCH 05/11] fix(claude-cli): correct streaming SSE for non-MessageStart first events - The synthetic message_start path only fired when the very first observed event was a Result. If the CLI ever emitted (say) a bare ContentBlockStart first, we'd ship malformed Anthropic SSE without a preceding message_start. Trigger the synthesis on any first stream-advancing event that isn't a MessageStart. - Make every send-to-client branch consistent: break out of the loop when the receiver has gone away (mpsc send returned Err), so we don't keep generating events for a vanished client. - Replace serde_json::to_string(...).unwrap() in the streaming error path with the same fallback json_response already uses ("{}" on serialize failure). No more panic surface in the streaming worker. - Drop the dead `_touch_stream_module` placeholder and its unused `use futures::stream` import. --- .../src/handlers/claude_cli/server.rs | 56 ++++++++++--------- 1 file changed, 31 insertions(+), 25 deletions(-) diff --git a/crates/brightstaff/src/handlers/claude_cli/server.rs b/crates/brightstaff/src/handlers/claude_cli/server.rs index 68f3dc579..91cb96fc8 100644 --- a/crates/brightstaff/src/handlers/claude_cli/server.rs +++ b/crates/brightstaff/src/handlers/claude_cli/server.rs @@ -8,7 +8,6 @@ use std::net::SocketAddr; use std::sync::Arc; use bytes::Bytes; -use futures::stream; use hermesllm::apis::anthropic::MessagesRequest; use hermesllm::apis::claude_cli::{ cli_error_to_anthropic_error_body, cli_event_to_messages_stream_event, @@ -194,6 +193,8 @@ fn stream_response( let (tx, rx) = mpsc::channel::, Infallible>>(64); tokio::spawn(async move { + use hermesllm::apis::anthropic::MessagesStreamEvent; + // Some short turns skip MessageStart; emit a synthetic one so the // client always sees a complete stream. let mut emitted_message_start = false; @@ -205,28 +206,39 @@ fn stream_response( Err(err) => { warn!(session = %session_id, error = %err, "claude-cli streaming turn failed"); let body = cli_error_to_anthropic_error_body(&err.to_string()); - let frame = - Frame::data(format_sse("error", &serde_json::to_string(&body).unwrap())); + let payload = serde_json::to_string(&body).unwrap_or_else(|_| "{}".to_string()); + let frame = Frame::data(format_sse("error", &payload)); let _ = tx.send(Ok(frame)).await; break; } }; - if !emitted_message_start { - if let ClaudeCliEvent::StreamEvent { - event: hermesllm::apis::anthropic::MessagesStreamEvent::MessageStart { .. }, - } = &ev - { - emitted_message_start = true; - } else if matches!(&ev, ClaudeCliEvent::Result { .. }) { - // No actual content was streamed; synthesize a - // MessageStart so the SSE stream is well-formed. + // Synthesize a MessageStart frame the first time we see anything + // that advances the stream (StreamEvent or Result) and isn't + // already a MessageStart. Untranslated events (System/Assistant/ + // User/Unknown) don't trigger synthesis — we silently skip them + // and wait for the real or synthetic start later. + let is_message_start = matches!( + &ev, + ClaudeCliEvent::StreamEvent { + event: MessagesStreamEvent::MessageStart { .. } + } + ); + let advances_stream = matches!( + &ev, + ClaudeCliEvent::StreamEvent { .. } | ClaudeCliEvent::Result { .. } + ); + + if !emitted_message_start && advances_stream { + if !is_message_start { let synthetic = synthetic_message_start(&model, Some(&session_id)); if let Some(frame) = sse_frame_for_event(&synthetic) { - let _ = tx.send(Ok(frame)).await; + if tx.send(Ok(frame)).await.is_err() { + break; + } } - emitted_message_start = true; } + emitted_message_start = true; } if let Some(translated) = cli_event_to_messages_stream_event(&ev) { @@ -246,9 +258,11 @@ fn stream_response( .clone() .unwrap_or_else(|| "claude-cli returned an error".to_string()); let body = cli_error_to_anthropic_error_body(&msg); - let frame = - Frame::data(format_sse("error", &serde_json::to_string(&body).unwrap())); - let _ = tx.send(Ok(frame)).await; + let payload = serde_json::to_string(&body).unwrap_or_else(|_| "{}".to_string()); + let frame = Frame::data(format_sse("error", &payload)); + if tx.send(Ok(frame)).await.is_err() { + break; + } } break; } @@ -325,11 +339,3 @@ fn text_response( .insert(header::CONTENT_TYPE, HeaderValue::from_static("text/plain")); resp } - -// Ensure a no-op import so that `stream` (re-exported from futures) is -// considered used in case future expansion needs it. Avoids accidental -// deletion when running `cargo fix`. -#[allow(dead_code)] -fn _touch_stream_module() { - let _: stream::Empty = stream::empty(); -} From 2aa9981f46f3cd31f2da57fd69e74c30336ad157 Mon Sep 17 00:00:00 2001 From: Spherrrical Date: Mon, 4 May 2026 13:36:22 -0700 Subject: [PATCH 06/11] chore(claude-cli): drop dead code, drift-proof env defaults, clippy nits - main.rs: rebuild claude_cli_config_from_env on top of SessionManagerConfig::default() and only override fields that have a parsed env var, so the defaults live in exactly one place. - hermesllm/apis/claude_cli.rs: delete the dead `_touch_messages_message_type` stub and its unused MessagesMessage import; apply pedantic-clippy fixes that touch the new code (clone_from over `= x.clone()`, Map::default() over Default::default(), map_or_else over .map(...).unwrap_or_else(...), str::to_string method reference, collapsed identical match arms). - hermesllm/providers/id.rs: collapse the two match arms that mapped "claude-cli" and "claude_cli" to ProviderId::ClaudeCli. - hermesllm/tests/claude_cli_fixtures.rs: collect text deltas straight into a String instead of `.collect::>().join("")`. - brightstaff/tests/claude_cli_bridge.rs: add a Drop impl on BridgeFixture so a panicking test still releases the listener task. --- crates/brightstaff/src/main.rs | 53 ++++++++++--------- crates/brightstaff/tests/claude_cli_bridge.rs | 14 +++++ crates/hermesllm/src/apis/claude_cli.rs | 33 ++++++------ crates/hermesllm/src/providers/id.rs | 3 +- crates/hermesllm/tests/claude_cli_fixtures.rs | 7 ++- 5 files changed, 60 insertions(+), 50 deletions(-) diff --git a/crates/brightstaff/src/main.rs b/crates/brightstaff/src/main.rs index 1fb5f9735..b93a838e5 100644 --- a/crates/brightstaff/src/main.rs +++ b/crates/brightstaff/src/main.rs @@ -4,9 +4,7 @@ static ALLOC: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc; use brightstaff::app_state::AppState; use brightstaff::handlers::agents::orchestrator::agent_chat; -use brightstaff::handlers::claude_cli::{ - self, ClaudeCliConfig, SessionManager, SessionManagerConfig, -}; +use brightstaff::handlers::claude_cli::{self, SessionManager, SessionManagerConfig}; use brightstaff::handlers::debug; use brightstaff::handlers::empty; use brightstaff::handlers::function_calling::function_calling_chat_handler; @@ -586,6 +584,11 @@ async fn run_server(state: Arc) -> Result<(), Box Option<(std::net::SocketAddr, SessionManagerConfig)> { let addr_str = env::var("CLAUDE_CLI_LISTEN_ADDR").ok()?; let addr: std::net::SocketAddr = match addr_str.parse() { @@ -599,35 +602,33 @@ fn claude_cli_config_from_env() -> Option<(std::net::SocketAddr, SessionManagerC return None; } }; - let binary = env::var("CLAUDE_CLI_BIN").unwrap_or_else(|_| "claude".to_string()); - let permission_mode = - env::var("CLAUDE_CLI_PERMISSION_MODE").unwrap_or_else(|_| "bypassPermissions".to_string()); - let session_ttl = env::var("CLAUDE_CLI_SESSION_TTL_SECS") + + let mut cfg = SessionManagerConfig::default(); + if let Ok(s) = env::var("CLAUDE_CLI_BIN") { + cfg.process.binary = s; + } + if let Ok(s) = env::var("CLAUDE_CLI_PERMISSION_MODE") { + cfg.process.permission_mode = s; + } + if let Some(secs) = env::var("CLAUDE_CLI_SESSION_TTL_SECS") .ok() .and_then(|s| s.parse::().ok()) - .map(Duration::from_secs) - .unwrap_or_else(|| Duration::from_secs(600)); - let watchdog = env::var("CLAUDE_CLI_WATCHDOG_SECS") + { + cfg.process.session_ttl = Duration::from_secs(secs); + } + if let Some(secs) = env::var("CLAUDE_CLI_WATCHDOG_SECS") .ok() .and_then(|s| s.parse::().ok()) - .map(Duration::from_secs) - .unwrap_or_else(|| Duration::from_secs(120)); - let max_sessions = env::var("CLAUDE_CLI_MAX_SESSIONS") + { + cfg.process.watchdog = Duration::from_secs(secs); + } + if let Some(n) = env::var("CLAUDE_CLI_MAX_SESSIONS") .ok() .and_then(|s| s.parse::().ok()) - .unwrap_or(claude_cli::session::DEFAULT_MAX_SESSIONS); - Some(( - addr, - SessionManagerConfig { - max_sessions, - process: ClaudeCliConfig { - binary, - permission_mode, - session_ttl, - watchdog, - }, - }, - )) + { + cfg.max_sessions = n; + } + Some((addr, cfg)) } // --------------------------------------------------------------------------- diff --git a/crates/brightstaff/tests/claude_cli_bridge.rs b/crates/brightstaff/tests/claude_cli_bridge.rs index 6cf97258c..4db35e2a4 100644 --- a/crates/brightstaff/tests/claude_cli_bridge.rs +++ b/crates/brightstaff/tests/claude_cli_bridge.rs @@ -84,6 +84,20 @@ impl BridgeFixture { } } +/// Best-effort cleanup if a test panics before `stop().await`. We can't +/// `.await` from `Drop`, so we just abort the listener task; that's enough to +/// keep the runtime from leaking the spawned future. +impl Drop for BridgeFixture { + fn drop(&mut self) { + if let Some(tx) = self.shutdown.take() { + let _ = tx.send(()); + } + if let Some(h) = self.handle.take() { + h.abort(); + } + } +} + fn anthropic_request(stream: bool) -> Value { json!({ "model": "claude-cli/sonnet", diff --git a/crates/hermesllm/src/apis/claude_cli.rs b/crates/hermesllm/src/apis/claude_cli.rs index 0c107a888..2aa2a7867 100644 --- a/crates/hermesllm/src/apis/claude_cli.rs +++ b/crates/hermesllm/src/apis/claude_cli.rs @@ -9,15 +9,15 @@ //! does the actual spawning and streaming. use serde::{Deserialize, Serialize}; -use serde_json::{json, Value}; +use serde_json::{json, Map, Value}; use serde_with::skip_serializing_none; use thiserror::Error; use uuid::Uuid; use crate::apis::anthropic::{ - MessagesContentBlock, MessagesContentDelta, MessagesMessage, MessagesMessageContent, - MessagesMessageDelta, MessagesRequest, MessagesResponse, MessagesRole, MessagesStopReason, - MessagesStreamEvent, MessagesStreamMessage, MessagesSystemPrompt, MessagesUsage, + MessagesContentBlock, MessagesContentDelta, MessagesMessageContent, MessagesMessageDelta, + MessagesRequest, MessagesResponse, MessagesRole, MessagesStopReason, MessagesStreamEvent, + MessagesStreamMessage, MessagesSystemPrompt, MessagesUsage, }; /// Errors produced by translation between Anthropic Messages and Claude Code @@ -208,7 +208,7 @@ pub fn messages_request_to_stdin_payload( role: "user", content, }, - session_id: session_id.map(|s| s.to_string()), + session_id: session_id.map(str::to_string), }); } Ok(out) @@ -292,10 +292,10 @@ where ClaudeCliEvent::StreamEvent { event } => match event { MessagesStreamEvent::MessageStart { message } => { if id.is_empty() { - id = message.id.clone(); + id.clone_from(&message.id); } if !message.model.is_empty() { - model_out = message.model.clone(); + model_out.clone_from(&message.model); } usage = message.usage.clone(); } @@ -337,7 +337,6 @@ where // clients but dropped from the non-streaming aggregate. _ => {} }, - MessagesStreamEvent::ContentBlockStop { .. } => {} MessagesStreamEvent::MessageDelta { delta, usage: msg_usage, @@ -351,7 +350,9 @@ where // The MessageDelta usage carries final output_tokens. usage.output_tokens = msg_usage.output_tokens; } - MessagesStreamEvent::MessageStop | MessagesStreamEvent::Ping => {} + MessagesStreamEvent::ContentBlockStop { .. } + | MessagesStreamEvent::MessageStop + | MessagesStreamEvent::Ping => {} }, ClaudeCliEvent::Assistant { message } => { last_assistant_message = Some(message); @@ -411,7 +412,7 @@ where BlockKind::ToolUse => { if let Some((tool_id, name, raw_input)) = tool_accum.remove(&idx) { let input_value = if raw_input.is_empty() { - Value::Object(Default::default()) + Value::Object(Map::default()) } else { serde_json::from_str(&raw_input) .unwrap_or_else(|_| Value::String(raw_input)) @@ -505,9 +506,10 @@ pub fn cli_error_to_anthropic_error_body(message: &str) -> Value { /// the CLI did not emit one (it usually does, but very small turns can skip /// straight to `assistant`/`result`). pub fn synthetic_message_start(model: &str, session_id: Option<&str>) -> MessagesStreamEvent { - let id = session_id - .map(|s| format!("msg_cli_{}", s)) - .unwrap_or_else(|| format!("msg_cli_{}", Uuid::new_v4().simple())); + let id = session_id.map_or_else( + || format!("msg_cli_{}", Uuid::new_v4().simple()), + |s| format!("msg_cli_{s}"), + ); MessagesStreamEvent::MessageStart { message: MessagesStreamMessage { id, @@ -537,11 +539,6 @@ pub fn parse_ndjson_line(line: &str) -> Option for ProviderId { "do_ai" => Ok(ProviderId::DigitalOcean), // alias "vercel" => Ok(ProviderId::Vercel), "openrouter" => Ok(ProviderId::OpenRouter), - "claude-cli" => Ok(ProviderId::ClaudeCli), - "claude_cli" => Ok(ProviderId::ClaudeCli), // alias + "claude-cli" | "claude_cli" => Ok(ProviderId::ClaudeCli), _ => Err(format!("Unknown provider: {}", value)), } } diff --git a/crates/hermesllm/tests/claude_cli_fixtures.rs b/crates/hermesllm/tests/claude_cli_fixtures.rs index 3ac335c4e..2847a3525 100644 --- a/crates/hermesllm/tests/claude_cli_fixtures.rs +++ b/crates/hermesllm/tests/claude_cli_fixtures.rs @@ -56,17 +56,16 @@ fn text_response_aggregates_into_messages_response() { )); let final_event = stream.last().unwrap(); assert!(matches!(final_event, MessagesStreamEvent::MessageStop)); - let text_deltas = stream + let text_deltas: String = stream .iter() .filter_map(|ev| match ev { MessagesStreamEvent::ContentBlockDelta { delta: MessagesContentDelta::TextDelta { text }, .. - } => Some(text.clone()), + } => Some(text.as_str()), _ => None, }) - .collect::>() - .join(""); + .collect(); assert_eq!(text_deltas, "Hello, world!"); } From 5e689fed51077cf5ab307551d993cde2017958f9 Mon Sep 17 00:00:00 2001 From: Spherrrical Date: Mon, 4 May 2026 14:45:31 -0700 Subject: [PATCH 07/11] fix(claude-cli): use a fresh UUID per spawn for `claude --session-id` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `--no-session-persistence` only blocks resumability — Claude Code still writes `~/.claude/projects//.jsonl` for every session. Reusing our deterministic brightstaff session id (a v5 UUID hashed from the conversation prefix) caused the CLI to fail every second request for the same conversation with `Error: Session ID ... is already in use`. Generate a per-spawn random v4 UUID inside `ClaudeProcess::spawn` and pass that to `claude --session-id` (and stamp it on every stdin JSONL event so the CLI accepts the turn). Keep the deterministic brightstaff session id as the `SessionManager` map key so retries still hit the hot child. --- .../src/handlers/claude_cli/process.rs | 30 ++++++++++++++++++- .../src/handlers/claude_cli/server.rs | 23 +++++++------- 2 files changed, 42 insertions(+), 11 deletions(-) diff --git a/crates/brightstaff/src/handlers/claude_cli/process.rs b/crates/brightstaff/src/handlers/claude_cli/process.rs index e93642fa0..2e404124c 100644 --- a/crates/brightstaff/src/handlers/claude_cli/process.rs +++ b/crates/brightstaff/src/handlers/claude_cli/process.rs @@ -105,7 +105,16 @@ pub struct ClaudeProcess { /// which keeps `SessionManager` callers from holding the session-map lock /// across an async hop. last_used: StdMutex, + /// Brightstaff-internal identifier — a deterministic UUID v5 derived from + /// the conversation prefix (or supplied by the client header). Stable + /// across retries so the manager can route follow-up turns to this same + /// child. NEVER passed to `claude` itself. pub session_id: String, + /// Per-spawn random UUID v4 passed to `claude --session-id`. Always fresh + /// so we never collide with on-disk state (`~/.claude/projects/...`) + /// from a previous run of the same conversation. Also stamped onto every + /// stdin JSONL event so the CLI can verify the turn matches its session. + cli_session_id: String, } impl ClaudeProcess { @@ -119,6 +128,14 @@ impl ClaudeProcess { cwd: Option<&std::path::Path>, config: ClaudeCliConfig, ) -> Result, ProcessError> { + // Always hand the CLI a brand-new UUID. `--no-session-persistence` + // does NOT actually prevent Claude Code from writing + // `~/.claude/projects//.jsonl` — it only blocks + // resumability — so re-using our deterministic `session_id` would + // collide with any prior run of the same conversation and the CLI + // would exit with `Session ID ... is already in use`. + let cli_session_id = uuid::Uuid::new_v4().to_string(); + let mut cmd = Command::new(&config.binary); cmd.arg("-p") .arg("--output-format") @@ -132,7 +149,7 @@ impl ClaudeProcess { .arg("--model") .arg(normalize_model_arg(model)) .arg("--session-id") - .arg(&session_id) + .arg(&cli_session_id) .arg("--no-session-persistence"); if let Some(prompt) = system_prompt { @@ -226,6 +243,7 @@ impl ClaudeProcess { info!( session = %session_id, + cli_session = %cli_session_id, model = %normalize_model_arg(model), "spawned claude-cli" ); @@ -237,9 +255,19 @@ impl ClaudeProcess { config, last_used: StdMutex::new(Instant::now()), session_id, + cli_session_id, })) } + /// The UUID that `claude --session-id` was launched with. The bridge has + /// to stamp every stdin JSONL event with this id so the CLI accepts the + /// turn as belonging to its current session — see + /// [`Self::session_id`] for why this is distinct from the brightstaff + /// session id. + pub fn cli_session_id(&self) -> &str { + &self.cli_session_id + } + /// Write the user-turn JSONL events to the child's stdin and return a /// stream that yields parsed CLI events for this turn until the terminal /// `result` event (or watchdog) ends it. diff --git a/crates/brightstaff/src/handlers/claude_cli/server.rs b/crates/brightstaff/src/handlers/claude_cli/server.rs index 91cb96fc8..68b7b703e 100644 --- a/crates/brightstaff/src/handlers/claude_cli/server.rs +++ b/crates/brightstaff/src/handlers/claude_cli/server.rs @@ -134,16 +134,19 @@ async fn handle( } }; - let stdin_payload = match messages_request_to_stdin_payload(&parsed, Some(&session_id)) { - Ok(p) => p, - Err(err) => { - warn!(error = %err, "failed to build claude-cli stdin payload"); - return Ok(json_error( - StatusCode::BAD_REQUEST, - &format!("failed to build claude-cli stdin payload: {err}"), - )); - } - }; + // Stamp stdin events with the CLI's per-spawn UUID, NOT our deterministic + // brightstaff session id. The CLI rejects the turn if the two disagree. + let stdin_payload = + match messages_request_to_stdin_payload(&parsed, Some(process.cli_session_id())) { + Ok(p) => p, + Err(err) => { + warn!(error = %err, "failed to build claude-cli stdin payload"); + return Ok(json_error( + StatusCode::BAD_REQUEST, + &format!("failed to build claude-cli stdin payload: {err}"), + )); + } + }; let streaming = parsed.stream.unwrap_or(false); let model = parsed.model.clone(); From 8e65fca7d8da3c17476524a983725d5b3f11934e Mon Sep 17 00:00:00 2001 From: Spherrrical Date: Thu, 7 May 2026 11:09:37 -0700 Subject: [PATCH 08/11] cli: warn + ack local-agent providers at planoai up --- cli/planoai/consts.py | 1 + cli/planoai/local_agent_warning.py | 305 +++++++++++++++++++++++++ cli/planoai/main.py | 23 ++ cli/test/test_local_agent_warning.py | 322 +++++++++++++++++++++++++++ 4 files changed, 651 insertions(+) create mode 100644 cli/planoai/local_agent_warning.py create mode 100644 cli/test/test_local_agent_warning.py diff --git a/cli/planoai/consts.py b/cli/planoai/consts.py index 5cafb8171..1afc1e235 100644 --- a/cli/planoai/consts.py +++ b/cli/planoai/consts.py @@ -13,6 +13,7 @@ PLANO_RUN_DIR = os.path.join(PLANO_HOME, "run") PLANO_BIN_DIR = os.path.join(PLANO_HOME, "bin") PLANO_PLUGINS_DIR = os.path.join(PLANO_HOME, "plugins") +PLANO_STATE_DIR = os.path.join(PLANO_HOME, "state") ENVOY_VERSION = "v1.37.0" # keep in sync with Dockerfile ARG ENVOY_VERSION NATIVE_PID_FILE = os.path.join(PLANO_RUN_DIR, "plano.pid") DEFAULT_NATIVE_OTEL_TRACING_GRPC_ENDPOINT = "http://localhost:4317" diff --git a/cli/planoai/local_agent_warning.py b/cli/planoai/local_agent_warning.py new file mode 100644 index 000000000..fe5ad4bcd --- /dev/null +++ b/cli/planoai/local_agent_warning.py @@ -0,0 +1,305 @@ +"""Detect local-agent provider entries in a Plano config and warn the +operator that the host is about to spawn a local CLI binary with the same +filesystem, shell, and network capabilities as the user running planoai. + +Local-agent providers (e.g. ``claude-cli``) are an entirely different +trust class from stateless network LLM providers (``openai``, +``anthropic``, ``gemini``, ...): the bridge runs inside brightstaff and +shells out to a local binary for every request, so a misconfigured +production deployment would expose the host to whatever the spawned +agent can do — which, for tools like Claude Code, is "anything the +operator can do at the shell". + +This module is intentionally additive and side-effect free until the +caller invokes :func:`maybe_warn_local_agent_providers`. The set of +known local-agent provider interfaces lives in +:data:`LOCAL_AGENT_PROVIDER_INTERFACES`; adding a future entry (codex, +chatgpt-cli, opencode, hermes, ...) is a one-line change. +""" + +from __future__ import annotations + +import json +import os +from dataclasses import dataclass +from datetime import datetime, timezone +from typing import Iterable + +from rich.console import Console +from rich.panel import Panel + +from planoai.consts import PLANO_STATE_DIR + +# Provider interfaces whose runtime spawns a local CLI subprocess with +# host filesystem / shell access. The string here is matched against the +# config's ``provider_interface`` field AND against the ``/...`` +# in ``model:`` and ``name:`` fields, so configs that rely on the +# Python-side autofill (``model: claude-cli/*`` only) are still detected +# before that autofill runs. +# +# Add new entries here as additional local-agent bridges are implemented +# (e.g. a future ``codex-cli`` or ``chatgpt-cli`` bridge that spawns the +# Codex CLI). This is the *only* line that needs to change to extend the +# warning's coverage. +LOCAL_AGENT_PROVIDER_INTERFACES: tuple[str, ...] = ("claude-cli",) + +# Persistent ack lives next to the rest of the per-user planoai state +# (run/, bin/, plugins/, ...). Operators can ``rm`` this file to undo. +ACK_FILE_PATH = os.path.join(PLANO_STATE_DIR, "local_agent_ack.json") + +# Env-var fallback for the ``--ack-local-agents`` CLI flag. Truthy values +# are 1/true/yes (case-insensitive); everything else is treated as unset. +ACK_ENV_VAR = "PLANO_ACK_LOCAL_AGENTS" + +# Where the docs page lives. Printed verbatim in the warning panel — the +# relative path resolves cleanly when an operator opens it from the repo +# root, and the GitHub URL is a valid fallback for users running planoai +# outside a clone. +DOCS_RELATIVE_PATH = "docs/source/resources/local_agent_providers.rst" +DOCS_LEARN_MORE = ( + "https://github.com/katanemo/plano/blob/main/docs/source/resources/" + "local_agent_providers.rst" +) + + +@dataclass(frozen=True) +class LocalAgentProvider: + """A single ``model_providers`` entry that resolves to a local-agent + bridge. ``name`` and ``model`` come straight from the config, while + ``interface`` is the canonical key used for ack persistence.""" + + interface: str + name: str + model: str + + +def _truthy_env(value: str | None) -> bool: + if not value: + return False + return value.strip().lower() in {"1", "true", "yes", "on"} + + +def _interface_for_entry(entry: dict) -> str | None: + """Return the canonical local-agent interface name for ``entry``, or + ``None`` if the entry isn't a local-agent provider. + + Matching is intentionally permissive so that minimally-configured + entries — i.e. just ``model: claude-cli/*`` before the Python + autofill runs — are still detected. The first match wins and is + returned; multiple matches against the same interface collapse. + """ + + if not isinstance(entry, dict): + return None + + provider_interface = (entry.get("provider_interface") or "").strip() + provider = (entry.get("provider") or "").strip() + model = str(entry.get("model") or "").strip() + name = str(entry.get("name") or "").strip() + + for interface in LOCAL_AGENT_PROVIDER_INTERFACES: + if provider_interface == interface or provider == interface: + return interface + prefix = f"{interface}/" + if model.startswith(prefix) or name.startswith(prefix): + return interface + + return None + + +def detect_local_agent_providers(config: dict) -> list[LocalAgentProvider]: + """Walk ``config`` and return every ``model_providers`` entry whose + ``provider_interface`` falls in :data:`LOCAL_AGENT_PROVIDER_INTERFACES`. + + Order is preserved so the warning lists providers in declaration + order. Both the new ``model_providers`` key and the legacy + ``llm_providers`` key are consulted, mirroring the rest of the CLI. + """ + + if not isinstance(config, dict): + return [] + + providers = config.get("model_providers") + if not isinstance(providers, list): + providers = config.get("llm_providers") or [] + + found: list[LocalAgentProvider] = [] + for entry in providers: + interface = _interface_for_entry(entry) + if interface is None: + continue + model = str(entry.get("model") or "").strip() + name = str(entry.get("name") or "").strip() or model or interface + found.append(LocalAgentProvider(interface=interface, name=name, model=model)) + return found + + +def _interfaces_in(providers: Iterable[LocalAgentProvider]) -> set[str]: + return {p.interface for p in providers} + + +def load_acknowledged_interfaces(ack_path: str = ACK_FILE_PATH) -> set[str]: + """Read the ack file and return the set of acknowledged provider + interfaces. Missing or malformed files are treated as "no ack", + never as a hard error, so a half-written ack file degrades to "warn + again" instead of crashing ``planoai up``.""" + + try: + with open(ack_path, "r", encoding="utf-8") as f: + data = json.load(f) + except (OSError, json.JSONDecodeError): + return set() + + if not isinstance(data, dict): + return set() + raw = data.get("acknowledged") + if not isinstance(raw, list): + return set() + return {str(item) for item in raw if isinstance(item, str)} + + +def write_acknowledgement( + interfaces: Iterable[str], + ack_path: str = ACK_FILE_PATH, +) -> set[str]: + """Persist ``interfaces`` (merged with anything already on disk) to + the ack file. Returns the full acknowledged set after the write so + callers can render an "acknowledged: X, Y" line.""" + + merged = load_acknowledged_interfaces(ack_path) | set(interfaces) + payload = { + "acknowledged": sorted(merged), + "ack_at": datetime.now(timezone.utc).isoformat(timespec="seconds"), + } + os.makedirs(os.path.dirname(ack_path), exist_ok=True) + with open(ack_path, "w", encoding="utf-8") as f: + json.dump(payload, f, indent=2, sort_keys=True) + f.write("\n") + return merged + + +def _render_panel( + console: Console, + pending: list[LocalAgentProvider], +) -> None: + """Render the single warning panel for ``pending``. Callers must + ensure ``pending`` is non-empty; the caller decides whether to skip + based on the ack set.""" + + listed = "\n".join( + f" • [bold]{p.name}[/bold]" + + (f" [dim]({p.model})[/dim]" if p.model and p.model != p.name else "") + + f" [dim]→ provider_interface=[/dim][cyan]{p.interface}[/cyan]" + for p in pending + ) + + interfaces_csv = ", ".join(sorted({p.interface for p in pending})) + body_lines = [ + "[bold yellow]This config wires up a local-agent provider.[/bold yellow]", + "", + listed, + "", + ( + "Unlike stateless network providers ([cyan]openai[/cyan], " + "[cyan]anthropic[/cyan], [cyan]gemini[/cyan], ...), these entries " + "spawn a local CLI binary as a subprocess of brightstaff. The " + "subprocess inherits the operator's permissions and can:" + ), + " • read and write any file the operator can touch", + " • execute arbitrary shell commands as the operator's user", + " • use the host's auth keychain / login session", + " • make outbound network calls from the host's IP", + "", + ( + "[bold]Intended for local development only — not production.[/bold] " + "Treat this as the same trust class as OpenClaw / OpenCode / " + "Hermes (agent integrations), not a stateless LLM provider." + ), + "", + f"[dim]Learn more:[/dim] [bold]{DOCS_LEARN_MORE}[/bold]", + f"[dim]Or in this repo:[/dim] [bold]{DOCS_RELATIVE_PATH}[/bold]", + "", + "[dim]Dismiss permanently:[/dim]", + f" [cyan]planoai up --ack-local-agents[/cyan] [dim]# writes {ACK_FILE_PATH}[/dim]", + f" [dim]or:[/dim] [cyan]{ACK_ENV_VAR}=1 planoai up[/cyan]", + f"[dim]Undo with:[/dim] [cyan]rm {ACK_FILE_PATH}[/cyan]", + ] + + console.print( + Panel( + "\n".join(body_lines), + title=f"⚠ Local-agent provider detected ({interfaces_csv})", + title_align="left", + border_style="yellow", + padding=(1, 2), + ) + ) + + +def maybe_warn_local_agent_providers( + config: dict, + console: Console, + *, + ack_flag: bool = False, + ack_path: str = ACK_FILE_PATH, + env: dict | None = None, +) -> bool: + """Show the local-agent warning panel if appropriate and return + ``True`` iff the panel was rendered. + + Resolution order, top to bottom: + + 1. No local-agent providers in config → no-op. + 2. ``ack_flag`` (the ``--ack-local-agents`` CLI flag) **or** the + :data:`ACK_ENV_VAR` env var truthy → write/update the ack file + so it covers every triggering interface, print one ✓ confirmation + line, suppress the panel. + 3. Existing ack file already covers every triggering interface → + print a single dim INFO line and suppress the panel. + 4. Otherwise → render the panel for the *un-acked* interfaces only + (e.g. acknowledged ``claude-cli`` doesn't suppress a fresh + warning when the operator later adds a hypothetical ``codex``). + """ + + env = env if env is not None else os.environ + detected = detect_local_agent_providers(config) + if not detected: + return False + + ack_via_env = _truthy_env(env.get(ACK_ENV_VAR)) + if ack_flag or ack_via_env: + new_set = _interfaces_in(detected) + merged = write_acknowledgement(new_set, ack_path=ack_path) + ack_csv = ", ".join(sorted(new_set)) + console.print( + f"[green]✓[/green] Acknowledged local-agent provider(s): " + f"[bold]{ack_csv}[/bold] [dim]→ {ack_path}[/dim]" + ) + return False + + acknowledged = load_acknowledged_interfaces(ack_path) + pending = [p for p in detected if p.interface not in acknowledged] + if not pending: + ack_csv = ", ".join(sorted(_interfaces_in(detected))) + console.print( + f"[dim]Local-agent providers acknowledged: {ack_csv}. " + f"Remove {ack_path} to undo.[/dim]" + ) + return False + + _render_panel(console, pending) + return True + + +__all__ = [ + "ACK_ENV_VAR", + "ACK_FILE_PATH", + "DOCS_LEARN_MORE", + "DOCS_RELATIVE_PATH", + "LOCAL_AGENT_PROVIDER_INTERFACES", + "LocalAgentProvider", + "detect_local_agent_providers", + "load_acknowledged_interfaces", + "maybe_warn_local_agent_providers", + "write_acknowledgement", +] diff --git a/cli/planoai/main.py b/cli/planoai/main.py index ea43a1a8a..0a1b0acb1 100644 --- a/cli/planoai/main.py +++ b/cli/planoai/main.py @@ -39,6 +39,7 @@ from planoai.trace_cmd import trace as trace_cmd, start_trace_listener_background from planoai.chatgpt_cmd import chatgpt as chatgpt_cmd from planoai.obs_cmd import obs as obs_cmd +from planoai.local_agent_warning import maybe_warn_local_agent_providers from planoai.consts import ( DEFAULT_OTEL_TRACING_GRPC_ENDPOINT, DEFAULT_NATIVE_OTEL_TRACING_GRPC_ENDPOINT, @@ -354,6 +355,18 @@ def build(docker): show_default=True, help="Override the LLM listener port when running without a config file. Ignored when a config file is present.", ) +@click.option( + "--ack-local-agents", + "ack_local_agents", + default=False, + is_flag=True, + help=( + "Acknowledge that local-agent providers (e.g. claude-cli/*) spawn a " + "local CLI binary with full host filesystem and shell access. Writes " + "an ack file so the warning is suppressed on future runs. Equivalent " + "to setting PLANO_ACK_LOCAL_AGENTS=1." + ), +) def up( file, path, @@ -363,6 +376,7 @@ def up( docker, verbose, listener_port, + ack_local_agents, ): """Starts Plano.""" from rich.status import Status @@ -444,6 +458,15 @@ def up( with open(plano_config_file, "r") as f: plano_config = yaml.safe_load(f) + # Warn about local-agent providers (e.g. claude-cli/*) that spawn a + # local CLI binary with full host filesystem and shell access. Fires + # exactly once per `planoai up` invocation; --ack-local-agents (or + # PLANO_ACK_LOCAL_AGENTS=1) writes a persistent ack so the warning + # only re-appears for newly-introduced local-agent interfaces. + maybe_warn_local_agent_providers( + plano_config or {}, console, ack_flag=ack_local_agents + ) + # Inject ChatGPT tokens from ~/.plano/chatgpt/auth.json if any provider needs them _inject_chatgpt_tokens_if_needed(plano_config, env, console) diff --git a/cli/test/test_local_agent_warning.py b/cli/test/test_local_agent_warning.py new file mode 100644 index 000000000..50da63102 --- /dev/null +++ b/cli/test/test_local_agent_warning.py @@ -0,0 +1,322 @@ +"""Tests for the local-agent provider warning, ack persistence, and the +detection logic that decides whether to fire it.""" + +from __future__ import annotations + +import io +import json + +import pytest +from rich.console import Console + +from planoai import local_agent_warning as law + + +def _make_console() -> tuple[Console, io.StringIO]: + buf = io.StringIO() + # ``force_terminal=False`` keeps Rich from emitting ANSI escapes, + # which makes substring assertions readable. ``width`` is generous + # so the panel border doesn't soft-wrap text mid-keyword. + console = Console(file=buf, force_terminal=False, color_system=None, width=140) + return console, buf + + +# --------------------------------------------------------------------------- +# detection +# --------------------------------------------------------------------------- + + +def test_detects_claude_cli_via_model_prefix(): + config = { + "model_providers": [ + {"model": "claude-cli/sonnet"}, + {"model": "openai/gpt-4o"}, + ] + } + found = law.detect_local_agent_providers(config) + assert [p.interface for p in found] == ["claude-cli"] + assert found[0].model == "claude-cli/sonnet" + + +def test_detects_claude_cli_via_explicit_provider_interface(): + config = { + "model_providers": [ + {"name": "local-claude", "provider_interface": "claude-cli", "model": "x"}, + ] + } + found = law.detect_local_agent_providers(config) + assert [p.interface for p in found] == ["claude-cli"] + assert found[0].name == "local-claude" + + +def test_detects_claude_cli_via_legacy_provider_field(): + config = {"model_providers": [{"provider": "claude-cli", "model": "x"}]} + assert [p.interface for p in law.detect_local_agent_providers(config)] == [ + "claude-cli" + ] + + +def test_detects_via_legacy_llm_providers_key(): + config = {"llm_providers": [{"model": "claude-cli/opus"}]} + assert [p.interface for p in law.detect_local_agent_providers(config)] == [ + "claude-cli" + ] + + +def test_no_false_positive_for_network_providers(): + config = { + "model_providers": [ + {"model": "openai/gpt-4o"}, + {"model": "anthropic/claude-3-5-sonnet"}, + {"model": "gemini/gemini-2.5-pro"}, + {"model": "chatgpt/gpt-5"}, # network ChatGPT subscription, not a CLI + {"model": "vercel/some-model"}, + ] + } + assert law.detect_local_agent_providers(config) == [] + + +def test_no_false_positive_for_anthropic_claude_models(): + # ``anthropic/claude-3-5-sonnet`` must not trigger just because the + # word "claude" appears — the prefix has to be ``claude-cli/``. + config = {"model_providers": [{"model": "anthropic/claude-3-5-sonnet-20241022"}]} + assert law.detect_local_agent_providers(config) == [] + + +def test_empty_or_malformed_config_is_safe(): + assert law.detect_local_agent_providers({}) == [] + assert law.detect_local_agent_providers({"model_providers": None}) == [] + assert law.detect_local_agent_providers({"model_providers": "not-a-list"}) == [] + # ``None`` config (e.g. from an empty yaml file) must also be safe. + assert law.detect_local_agent_providers(None) == [] # type: ignore[arg-type] + + +def test_multiple_entries_same_interface_collapse_in_warning_set(): + config = { + "model_providers": [ + {"model": "claude-cli/sonnet", "name": "fast"}, + {"model": "claude-cli/opus", "name": "slow"}, + ] + } + found = law.detect_local_agent_providers(config) + assert len(found) == 2 + assert {p.interface for p in found} == {"claude-cli"} + + +# --------------------------------------------------------------------------- +# ack file +# --------------------------------------------------------------------------- + + +def test_load_ack_returns_empty_when_missing(tmp_path): + ack = tmp_path / "ack.json" + assert law.load_acknowledged_interfaces(str(ack)) == set() + + +@pytest.mark.parametrize( + "contents", + [ + "{not valid json", + "[]", # not a dict + '{"acknowledged": "claude-cli"}', # not a list + '{"acknowledged": [1, 2, 3]}', # not strings + ], +) +def test_load_ack_handles_malformed_files(tmp_path, contents): + ack = tmp_path / "ack.json" + ack.write_text(contents, encoding="utf-8") + # Malformed contents must degrade to "no ack" rather than crashing. + assert law.load_acknowledged_interfaces(str(ack)) == set() + + +def test_write_ack_creates_state_dir(tmp_path): + ack = tmp_path / "fresh" / "deeper" / "ack.json" + merged = law.write_acknowledgement(["claude-cli"], ack_path=str(ack)) + assert merged == {"claude-cli"} + assert ack.exists() + payload = json.loads(ack.read_text(encoding="utf-8")) + assert payload["acknowledged"] == ["claude-cli"] + assert payload["ack_at"] + + +def test_write_ack_merges_with_existing(tmp_path): + ack = tmp_path / "ack.json" + law.write_acknowledgement(["claude-cli"], ack_path=str(ack)) + merged = law.write_acknowledgement(["future-cli"], ack_path=str(ack)) + assert merged == {"claude-cli", "future-cli"} + payload = json.loads(ack.read_text(encoding="utf-8")) + assert payload["acknowledged"] == ["claude-cli", "future-cli"] + + +# --------------------------------------------------------------------------- +# maybe_warn_local_agent_providers +# --------------------------------------------------------------------------- + + +def test_no_panel_when_no_local_agent_providers(tmp_path): + console, buf = _make_console() + fired = law.maybe_warn_local_agent_providers( + {"model_providers": [{"model": "openai/gpt-4o"}]}, + console, + ack_path=str(tmp_path / "ack.json"), + env={}, + ) + assert fired is False + assert buf.getvalue() == "" + + +def test_panel_fires_for_unacked_claude_cli(tmp_path): + console, buf = _make_console() + fired = law.maybe_warn_local_agent_providers( + {"model_providers": [{"model": "claude-cli/sonnet"}]}, + console, + ack_path=str(tmp_path / "ack.json"), + env={}, + ) + output = buf.getvalue() + assert fired is True + # Stable substrings — never pin exact wording. + assert "claude-cli" in output + assert "Local-agent" in output or "local-agent" in output + assert "Learn more" in output + assert "--ack-local-agents" in output + # The dismissal hint must mention the ack file path so the user + # knows where to ``rm`` it. + assert "local_agent_ack.json" in output + + +def test_panel_suppressed_when_ack_covers_interface(tmp_path): + ack = tmp_path / "ack.json" + law.write_acknowledgement(["claude-cli"], ack_path=str(ack)) + + console, buf = _make_console() + fired = law.maybe_warn_local_agent_providers( + {"model_providers": [{"model": "claude-cli/sonnet"}]}, + console, + ack_path=str(ack), + env={}, + ) + assert fired is False + # The dim INFO line still mentions the ack file so the operator + # knows how to undo, but no panel renders. + out = buf.getvalue() + assert "Panel" not in out # no panel object + assert "claude-cli" in out + + +def test_new_unacked_interface_re_triggers(tmp_path, monkeypatch): + # Simulate a future where two local-agent interfaces exist and the + # user has only acknowledged one of them. + monkeypatch.setattr( + law, "LOCAL_AGENT_PROVIDER_INTERFACES", ("claude-cli", "future-cli") + ) + + ack = tmp_path / "ack.json" + law.write_acknowledgement(["claude-cli"], ack_path=str(ack)) + + console, buf = _make_console() + fired = law.maybe_warn_local_agent_providers( + { + "model_providers": [ + {"model": "claude-cli/sonnet"}, + {"model": "future-cli/whatever"}, + ] + }, + console, + ack_path=str(ack), + env={}, + ) + output = buf.getvalue() + assert fired is True + # The panel must list the *unacknowledged* interface only. + assert "future-cli" in output + # ...and must NOT re-list the already-acknowledged one as unacked + # (it can still appear in the suppressed-info line; we check the + # title which only contains pending interfaces). + assert "future-cli" in output + + +def test_ack_flag_writes_file_and_suppresses_panel(tmp_path): + ack = tmp_path / "ack.json" + console, buf = _make_console() + fired = law.maybe_warn_local_agent_providers( + {"model_providers": [{"model": "claude-cli/sonnet"}]}, + console, + ack_flag=True, + ack_path=str(ack), + env={}, + ) + assert fired is False + assert ack.exists() + payload = json.loads(ack.read_text(encoding="utf-8")) + assert "claude-cli" in payload["acknowledged"] + out = buf.getvalue() + assert "Acknowledged" in out + assert "claude-cli" in out + + +@pytest.mark.parametrize("env_value", ["1", "true", "TRUE", "yes", "on"]) +def test_ack_env_var_truthy_values(tmp_path, env_value): + ack = tmp_path / "ack.json" + console, _ = _make_console() + fired = law.maybe_warn_local_agent_providers( + {"model_providers": [{"model": "claude-cli/sonnet"}]}, + console, + ack_path=str(ack), + env={law.ACK_ENV_VAR: env_value}, + ) + assert fired is False + assert ack.exists() + + +@pytest.mark.parametrize("env_value", ["", "0", "false", "no", "off", "maybe"]) +def test_ack_env_var_falsy_values_still_warn(tmp_path, env_value): + ack = tmp_path / "ack.json" + console, buf = _make_console() + fired = law.maybe_warn_local_agent_providers( + {"model_providers": [{"model": "claude-cli/sonnet"}]}, + console, + ack_path=str(ack), + env={law.ACK_ENV_VAR: env_value}, + ) + assert fired is True + assert not ack.exists() + assert "claude-cli" in buf.getvalue() + + +def test_malformed_ack_falls_back_to_warning(tmp_path): + ack = tmp_path / "ack.json" + ack.write_text("{not json", encoding="utf-8") + console, buf = _make_console() + fired = law.maybe_warn_local_agent_providers( + {"model_providers": [{"model": "claude-cli/sonnet"}]}, + console, + ack_path=str(ack), + env={}, + ) + assert fired is True + assert "claude-cli" in buf.getvalue() + + +def test_single_panel_when_multiple_local_agent_entries(tmp_path): + # Two entries with the same interface must produce one panel, + # not two — the warning fires once per ``planoai up`` invocation. + console, buf = _make_console() + fired = law.maybe_warn_local_agent_providers( + { + "model_providers": [ + {"model": "claude-cli/sonnet", "name": "fast"}, + {"model": "claude-cli/opus", "name": "slow"}, + ] + }, + console, + ack_path=str(tmp_path / "ack.json"), + env={}, + ) + assert fired is True + output = buf.getvalue() + # Both names appear in the listing, but the warning header + # (``Local-agent provider detected``) appears exactly once. + assert output.count("Local-agent provider detected") == 1 + assert "fast" in output + assert "slow" in output From 294af49d8ad34c8aad08930b39a6583994f1da73 Mon Sep 17 00:00:00 2001 From: Spherrrical Date: Thu, 7 May 2026 11:09:54 -0700 Subject: [PATCH 09/11] docs: cover claude-cli trust model and dismissal --- docs/source/index.rst | 1 + .../resources/local_agent_providers.rst | 184 ++++++++++++++++++ 2 files changed, 185 insertions(+) create mode 100644 docs/source/resources/local_agent_providers.rst diff --git a/docs/source/index.rst b/docs/source/index.rst index 7a2e5b603..c021d38c3 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -63,4 +63,5 @@ Built by contributors to the widely adopted `Envoy Proxy /...`` prefix in +``model:``/``name:``), it prints a single warning panel listing the +triggering entries and refusing to proceed silently until the operator +acknowledges. This is intentional. The warning fires exactly once per +``planoai up`` run, regardless of how many local-agent entries the +config has. + +Trust model +----------- + +Spawning a local CLI binary as the operator's user is a very different +thing from making an HTTPS call. The subprocess inherits everything the +operator can do: + +.. list-table:: + :header-rows: 1 + :widths: 30 35 35 + + * - Capability + - Network LLM provider + - Local-agent provider + * - Filesystem read + - No + - **Yes** — anything ``$USER`` can read + * - Filesystem write + - No + - **Yes** — anything ``$USER`` can write + * - Shell command execution + - No + - **Yes** — full shell as ``$USER`` + * - Auth / credentials + - Per-provider API key + - **Host login keychain** (no per-tenant isolation) + * - Outbound network + - To the provider only + - **Anywhere the host can reach** + * - Reproducibility + - Deterministic given inputs + - Depends on local FS, env, CWD, installed tools + * - Suitable for production + - Yes + - **No — local development only** + +Concretely, when a request hits a ``claude-cli/*`` model, brightstaff +runs (roughly): + +.. code-block:: bash + + claude -p --output-format stream-json --input-format stream-json \ + --permission-mode bypassPermissions ... + +Whatever Claude Code decides to do with the working directory, the +shell, ``rm``, ``git``, your SSH keys, your ``~/.aws/credentials``, your +production database connection strings — all of that is reachable. This +is the *correct* trust model for a single-developer workstation; it is +the *wrong* trust model for anything multi-tenant. + +Local-agent providers are in the same category as standalone agent +runtimes like `OpenClaw`_, `OpenCode`_, and `Hermes`_: they are agent +integrations that happen to expose an LLM-shaped HTTP API, not +LLM providers that happen to run locally. + +.. _OpenClaw: https://github.com/openclaw/openclaw +.. _OpenCode: https://github.com/sst/opencode +.. _Hermes: https://github.com/HermesAI/hermes + +Recommended setup +----------------- + +If you are using a local-agent provider, treat it like any other +developer-machine agent runtime: + +- **Bind to loopback only.** Do not expose the bridge or the Plano + listener to a network interface. ``127.0.0.1`` only. +- **Single-developer use.** One operator, one host. Do not put a + load balancer in front of it. Do not share the deployment. +- **Opt-in.** Don't add a local-agent provider to a config that other + people deploy. Keep it in a config file that's clearly scoped to one + workstation. +- **Don't run as root** and don't run inside a container that mounts + more of the host filesystem than necessary. The subprocess inherits + the launching process's capabilities verbatim. +- **Audit the spawned binary** the same way you would audit anything + with ``sudo`` access. If the operator's ``claude`` (or future + ``codex``) binary is compromised, so is the host. + +Dismissing the warning +---------------------- + +The warning is dismissable per-host. The recommended path is the CLI +flag: + +.. code-block:: bash + + planoai up --ack-local-agents + +That writes an ack file at ``~/.plano/state/local_agent_ack.json`` +containing every triggering provider interface and the timestamp. On +subsequent ``planoai up`` runs, the warning is suppressed silently as +long as the ack covers every local-agent interface in the config. + +If you prefer an environment variable (e.g. inside a personal +``direnv`` setup), set ``PLANO_ACK_LOCAL_AGENTS=1`` instead. Truthy +values are ``1``, ``true``, ``yes``, ``on`` (case-insensitive). Setting +the env var has the same effect as passing the flag — it writes the +ack file. + +If a *new* local-agent interface appears later (e.g. you add a +hypothetical ``codex-cli/*`` after acknowledging ``claude-cli/*``), the +warning re-fires for the un-acked interface only. + +Undoing the dismissal +~~~~~~~~~~~~~~~~~~~~~ + +To undo the dismissal — for example, when handing the host to another +developer or running through a security review — simply remove the +file: + +.. code-block:: bash + + rm ~/.plano/state/local_agent_ack.json + +The next ``planoai up`` run will print the full warning panel again. + +Adding a new local-agent provider type +-------------------------------------- + +The set of local-agent provider interfaces lives in +``cli/planoai/local_agent_warning.py`` as +``LOCAL_AGENT_PROVIDER_INTERFACES``. Adding a new entry — say, a future +``codex-cli`` bridge that spawns the OpenAI Codex CLI — is a one-line +change: + +.. code-block:: python + + LOCAL_AGENT_PROVIDER_INTERFACES = ("claude-cli", "codex-cli") + +Detection automatically covers ``provider_interface: codex-cli`` as +well as ``model: codex-cli/...`` and ``name: codex-cli/...``, so users +who rely on the Python-side autofill for short-form configs are still +warned. + +.. note:: + + At the time of writing, the only network ``provider_interface`` that + shares any naming with a local agent runtime is ``chatgpt`` — but + that is a stateless HTTPS provider against + ``https://chatgpt.com/backend-api/codex``, **not** a local CLI + bridge. It is correctly excluded from + ``LOCAL_AGENT_PROVIDER_INTERFACES``. The ``codex`` value accepted by + ``planoai cli_agent codex`` is a *client* helper that points the + Codex CLI at a running Plano listener; it does not introduce a + provider into the config. From fbdb66b77f7d1e6b58c7253d48861e76c2600a42 Mon Sep 17 00:00:00 2001 From: Spherrrical Date: Thu, 7 May 2026 11:27:23 -0700 Subject: [PATCH 10/11] ci: pin uv >=0.11.11 to clear GHSA-82j2-j2ch-gfr8 (rustls-webpki) Trivy security-scan flagged uv 0.11.7 (currently fetched by an unpinned `pip install uv`) because it bundles rustls-webpki 0.103.10. The advisory (DoS via panic on malformed CRL BIT STRING) is fixed in 0.103.13. uv 0.11.11 picks up the fixed rustls-webpki, so we pin to that floor. --- Dockerfile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index ad0ca7079..b9c410643 100644 --- a/Dockerfile +++ b/Dockerfile @@ -65,7 +65,10 @@ COPY --from=envoy /usr/local/bin/envoy /usr/local/bin/envoy WORKDIR /app -RUN pip install --no-cache-dir uv +# Pin uv to >=0.11.11; older versions bundle rustls-webpki 0.103.10 which is +# flagged by GHSA-82j2-j2ch-gfr8 (DoS via panic on malformed CRL BIT STRING). +# uv 0.11.11+ ships rustls-webpki 0.103.13. +RUN pip install --no-cache-dir 'uv>=0.11.11' COPY cli/pyproject.toml ./ COPY cli/uv.lock ./ From aaa95468906d5dc12aa7dd7c6e26fd6259f4d3c1 Mon Sep 17 00:00:00 2001 From: Spherrrical Date: Thu, 7 May 2026 11:36:37 -0700 Subject: [PATCH 11/11] cli: shrink local-agent warning panel to a reminder + docs link Drops the bullet-list capability dump, the relative-path "or in this repo" line, and the verbose dismissal block (which leaked the ack file path into user-visible output). The panel is now ~6 lines: title with interface(s), one sentence summary, "Learn more" pointing at docs.planoai.dev, and a one-line `--ack-local-agents` hint. The full trust-model write-up and the `rm` instruction live in the docs page. Also tightens the acknowledged-already and ack-success lines (no path leak) and switches the parenthetical name list to skip autofilled `/...` model strings. --- cli/planoai/local_agent_warning.py | 112 +++++++++++++-------------- cli/test/test_local_agent_warning.py | 8 +- 2 files changed, 57 insertions(+), 63 deletions(-) diff --git a/cli/planoai/local_agent_warning.py b/cli/planoai/local_agent_warning.py index fe5ad4bcd..f6ba67ad8 100644 --- a/cli/planoai/local_agent_warning.py +++ b/cli/planoai/local_agent_warning.py @@ -51,15 +51,10 @@ # are 1/true/yes (case-insensitive); everything else is treated as unset. ACK_ENV_VAR = "PLANO_ACK_LOCAL_AGENTS" -# Where the docs page lives. Printed verbatim in the warning panel — the -# relative path resolves cleanly when an operator opens it from the repo -# root, and the GitHub URL is a valid fallback for users running planoai -# outside a clone. -DOCS_RELATIVE_PATH = "docs/source/resources/local_agent_providers.rst" -DOCS_LEARN_MORE = ( - "https://github.com/katanemo/plano/blob/main/docs/source/resources/" - "local_agent_providers.rst" -) +# Public docs page. The Sphinx source lives at +# ``docs/source/resources/local_agent_providers.rst`` and is published to +# https://docs.planoai.dev (CNAME at ``docs/CNAME``). +DOCS_LEARN_MORE = "https://docs.planoai.dev/resources/local_agent_providers.html" @dataclass(frozen=True) @@ -182,56 +177,54 @@ def _render_panel( console: Console, pending: list[LocalAgentProvider], ) -> None: - """Render the single warning panel for ``pending``. Callers must - ensure ``pending`` is non-empty; the caller decides whether to skip - based on the ack set.""" - - listed = "\n".join( - f" • [bold]{p.name}[/bold]" - + (f" [dim]({p.model})[/dim]" if p.model and p.model != p.name else "") - + f" [dim]→ provider_interface=[/dim][cyan]{p.interface}[/cyan]" - for p in pending - ) + """Render the (small) reminder panel for ``pending``. Callers must + ensure ``pending`` is non-empty. - interfaces_csv = ", ".join(sorted({p.interface for p in pending})) - body_lines = [ - "[bold yellow]This config wires up a local-agent provider.[/bold yellow]", - "", - listed, - "", - ( - "Unlike stateless network providers ([cyan]openai[/cyan], " - "[cyan]anthropic[/cyan], [cyan]gemini[/cyan], ...), these entries " - "spawn a local CLI binary as a subprocess of brightstaff. The " - "subprocess inherits the operator's permissions and can:" - ), - " • read and write any file the operator can touch", - " • execute arbitrary shell commands as the operator's user", - " • use the host's auth keychain / login session", - " • make outbound network calls from the host's IP", - "", - ( - "[bold]Intended for local development only — not production.[/bold] " - "Treat this as the same trust class as OpenClaw / OpenCode / " - "Hermes (agent integrations), not a stateless LLM provider." - ), - "", - f"[dim]Learn more:[/dim] [bold]{DOCS_LEARN_MORE}[/bold]", - f"[dim]Or in this repo:[/dim] [bold]{DOCS_RELATIVE_PATH}[/bold]", - "", - "[dim]Dismiss permanently:[/dim]", - f" [cyan]planoai up --ack-local-agents[/cyan] [dim]# writes {ACK_FILE_PATH}[/dim]", - f" [dim]or:[/dim] [cyan]{ACK_ENV_VAR}=1 planoai up[/cyan]", - f"[dim]Undo with:[/dim] [cyan]rm {ACK_FILE_PATH}[/cyan]", - ] + The panel is intentionally compact: the title names the interface(s), + the body is two short lines (capability summary + dismiss hint), and + the "Learn more" link points at the published Sphinx docs. Operators + who want the full trust-model write-up follow the link. + """ + + interfaces = sorted({p.interface for p in pending}) + interfaces_csv = ", ".join(interfaces) + + # Show user-set names parenthetically, but skip ``/...`` + # values — those are just the model id (or the autofilled placeholder) + # and add no information beyond the interface itself. + extra_names = sorted( + { + p.name + for p in pending + if p.name + and p.name != p.interface + and not any( + p.name.startswith(f"{iface}/") + for iface in LOCAL_AGENT_PROVIDER_INTERFACES + ) + } + ) + names_suffix = f" [dim]({', '.join(extra_names)})[/dim]" if extra_names else "" + + plural = len(interfaces) > 1 + pronoun = "they spawn" if plural else "it spawns" + + body = ( + f"[bold]{interfaces_csv}[/bold]{names_suffix} is a local-agent provider — " + f"{pronoun} a CLI subprocess that runs as you (full filesystem and shell " + f"access). For local development only.\n\n" + f"[dim]Learn more:[/dim] [link={DOCS_LEARN_MORE}]" + f"{DOCS_LEARN_MORE}[/link]\n" + f"[dim]Hide this:[/dim] [cyan]planoai up --ack-local-agents[/cyan]" + ) console.print( Panel( - "\n".join(body_lines), + body, title=f"⚠ Local-agent provider detected ({interfaces_csv})", title_align="left", border_style="yellow", - padding=(1, 2), + padding=(0, 2), ) ) @@ -269,22 +262,22 @@ def maybe_warn_local_agent_providers( ack_via_env = _truthy_env(env.get(ACK_ENV_VAR)) if ack_flag or ack_via_env: new_set = _interfaces_in(detected) - merged = write_acknowledgement(new_set, ack_path=ack_path) + write_acknowledgement(new_set, ack_path=ack_path) ack_csv = ", ".join(sorted(new_set)) console.print( - f"[green]✓[/green] Acknowledged local-agent provider(s): " - f"[bold]{ack_csv}[/bold] [dim]→ {ack_path}[/dim]" + f"[green]✓[/green] Acknowledged local-agent provider: " + f"[bold]{ack_csv}[/bold] [dim](won't warn again)[/dim]" ) return False acknowledged = load_acknowledged_interfaces(ack_path) pending = [p for p in detected if p.interface not in acknowledged] if not pending: + # Stay silent on the happy path — the operator already acknowledged. + # We still emit one dim line so the suppression is discoverable in + # logs and the test that asserts the interface name still passes. ack_csv = ", ".join(sorted(_interfaces_in(detected))) - console.print( - f"[dim]Local-agent providers acknowledged: {ack_csv}. " - f"Remove {ack_path} to undo.[/dim]" - ) + console.print(f"[dim]local-agent provider: {ack_csv} (acknowledged)[/dim]") return False _render_panel(console, pending) @@ -295,7 +288,6 @@ def maybe_warn_local_agent_providers( "ACK_ENV_VAR", "ACK_FILE_PATH", "DOCS_LEARN_MORE", - "DOCS_RELATIVE_PATH", "LOCAL_AGENT_PROVIDER_INTERFACES", "LocalAgentProvider", "detect_local_agent_providers", diff --git a/cli/test/test_local_agent_warning.py b/cli/test/test_local_agent_warning.py index 50da63102..fb027376e 100644 --- a/cli/test/test_local_agent_warning.py +++ b/cli/test/test_local_agent_warning.py @@ -180,9 +180,11 @@ def test_panel_fires_for_unacked_claude_cli(tmp_path): assert "Local-agent" in output or "local-agent" in output assert "Learn more" in output assert "--ack-local-agents" in output - # The dismissal hint must mention the ack file path so the user - # knows where to ``rm`` it. - assert "local_agent_ack.json" in output + # The panel is intentionally compact: it must NOT leak the ack file + # path into the user-visible reminder. The ``rm`` instruction lives + # in the docs page that "Learn more" links to. + assert "local_agent_ack.json" not in output + assert "docs.planoai.dev" in output def test_panel_suppressed_when_ack_covers_interface(tmp_path):