From dd734def4cb2baf61fba187adb44247b9aea35ef Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Fri, 15 May 2026 18:19:47 -0300 Subject: [PATCH 01/30] feat(approval-gate): layered policy rules with findLast evaluator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce a first-class Rule/Ruleset/Action surface ported from opencode's Permission.evaluate. Rules are wildcard globs over the iii function id (pattern is "*" in v1 — the field is reserved for per- function pattern extractors in a follow-up). Evaluator semantics: findLast across flattened layers, so operators stack a permissive default with more-specific overrides without surgery on the base list. approval-gate - New rules module: Action (allow/deny/ask), Rule, Ruleset alias, wildcard_match (DP, no regex), evaluate(perm, pattern, IntoIterator of &Rule). 16 unit tests covering matching, layering, last-wins. - WorkerConfig.rules: Vec (additive; existing InterceptorRule flow unchanged). - apply_policy_rules: pure helper that maps a rule hit to PolicyOutcome (Allow / Deny{rule_permission, rule_pattern} / FallThrough). Allow short-circuits to pass-through; Deny short-circuits with a Denial::Policy that names the matched rule; Ask and no-match fall through to the existing per-function interceptor flow. - handle_subscriber consults the rules layer first, before decide_intercept_action. --- approval-gate/src/config.rs | 35 +++++ approval-gate/src/lib.rs | 142 +++++++++++++++++++ approval-gate/src/rules.rs | 271 ++++++++++++++++++++++++++++++++++++ 3 files changed, 448 insertions(+) create mode 100644 approval-gate/src/rules.rs diff --git a/approval-gate/src/config.rs b/approval-gate/src/config.rs index e7cb6942..33f6b0c5 100644 --- a/approval-gate/src/config.rs +++ b/approval-gate/src/config.rs @@ -19,8 +19,18 @@ fn default_classifier_timeout_ms() -> u64 { 2000 } +fn default_sweeper_interval_ms() -> u64 { + 2000 +} + /// Per-function iii intercept rule: optional classifier trigger before pending + /// optional `__from_approval` injection on post-resolve `iii.trigger`. +/// +/// `marker_target_verified` is the operator's explicit assertion that the +/// `function_id` target validates `__from_approval` against +/// `approval::lookup_record` on every invocation. When `inject_approval_marker` +/// is true, [`crate::register`] refuses to start unless this flag is also +/// true — closing the honor-system gap. #[derive(Debug, Deserialize, Serialize, Clone, PartialEq, Eq)] pub struct InterceptorRule { pub function_id: String, @@ -30,6 +40,8 @@ pub struct InterceptorRule { pub classifier_timeout_ms: u64, #[serde(default)] pub inject_approval_marker: bool, + #[serde(default)] + pub marker_target_verified: bool, } #[derive(Debug, Deserialize, Serialize, Clone, PartialEq, Eq)] @@ -40,8 +52,16 @@ pub struct WorkerConfig { pub approval_state_scope: String, #[serde(default = "default_default_timeout_ms")] pub default_timeout_ms: u64, + #[serde(default = "default_sweeper_interval_ms")] + pub sweeper_interval_ms: u64, #[serde(default)] pub interceptors: Vec, + /// Layered permission rules consulted before per-function interceptors. + /// `Allow` short-circuits to pass-through; `Deny` short-circuits to a + /// policy [`crate::Denial`]; `Ask` (and no-match) falls through to the + /// existing [`InterceptorRule`] flow. See [`crate::rules`]. + #[serde(default)] + pub rules: crate::rules::Ruleset, } impl Default for WorkerConfig { @@ -50,7 +70,9 @@ impl Default for WorkerConfig { topic: default_topic(), approval_state_scope: default_approval_state_scope(), default_timeout_ms: default_default_timeout_ms(), + sweeper_interval_ms: default_sweeper_interval_ms(), interceptors: Vec::new(), + rules: Vec::new(), } } } @@ -76,9 +98,22 @@ mod tests { assert_eq!(cfg.topic, default_topic()); assert_eq!(cfg.approval_state_scope, "approvals"); assert_eq!(cfg.default_timeout_ms, 300_000); + assert_eq!(cfg.sweeper_interval_ms, 2000); assert!(cfg.interceptors.is_empty()); } + #[test] + fn marker_target_verified_defaults_false() { + let yaml = r#" +interceptors: + - function_id: shell::exec + inject_approval_marker: true +"#; + let cfg: WorkerConfig = serde_yaml::from_str(yaml).unwrap(); + assert!(cfg.interceptors[0].inject_approval_marker); + assert!(!cfg.interceptors[0].marker_target_verified); + } + #[test] fn interceptors_default_empty() { assert!(WorkerConfig::default().interceptors.is_empty()); diff --git a/approval-gate/src/lib.rs b/approval-gate/src/lib.rs index 19616766..8d707f42 100644 --- a/approval-gate/src/lib.rs +++ b/approval-gate/src/lib.rs @@ -4,6 +4,7 @@ pub mod config; pub mod manifest; +pub mod rules; pub use config::{InterceptorRule, WorkerConfig}; @@ -93,6 +94,37 @@ fn merge_from_approval_marker_if_needed( } } +/// Outcome of the policy-rules pre-check that runs before the per-function +/// [`config::InterceptorRule`] flow. `Allow` and `Deny` short-circuit the +/// subscriber with a final reply; `FallThrough` defers to the existing +/// interceptor logic (classifier or pause). +#[derive(Debug, Clone, PartialEq, Eq)] +pub(crate) enum PolicyOutcome { + Allow, + Deny { + rule_permission: String, + rule_pattern: String, + }, + FallThrough, +} + +/// Apply the layered policy rules to an incoming function id. Pure +/// function — no I/O, no clock. Extracted from [`register`]'s subscriber +/// closure so the decision branch can be unit-tested independently. +pub(crate) fn apply_policy_rules(rules: &rules::Ruleset, function_id: &str) -> PolicyOutcome { + match rules::evaluate(function_id, "*", rules) { + Some(rule) => match rule.action { + rules::Action::Allow => PolicyOutcome::Allow, + rules::Action::Deny => PolicyOutcome::Deny { + rule_permission: rule.permission.clone(), + rule_pattern: rule.pattern.clone(), + }, + rules::Action::Ask => PolicyOutcome::FallThrough, + }, + None => PolicyOutcome::FallThrough, + } +} + /// Structured deny payload carried on wire replies, persisted records, and /// `approval_resolved` stream events. Replaces the legacy free-form /// `decision_reason` / `reason` strings so consumers (turn-orchestrator @@ -1221,6 +1253,9 @@ pub fn unverified_marker_targets(rules: &[InterceptorRule]) -> Vec<&str> { pub fn register(iii: &III, cfg: &WorkerConfig) -> anyhow::Result { let rules: Arc> = Arc::new(cfg.interceptors.clone()); + // Layered policy rules consulted before the per-function interceptor + // flow. See [`crate::rules`]. + let policy_rules: Arc = Arc::new(cfg.rules.clone()); // Fail fast on honor-system markers: any interceptor that asks the gate // to inject `__from_approval` MUST also assert the target validates it. @@ -1471,6 +1506,7 @@ pub fn register(iii: &III, cfg: &WorkerConfig) -> anyhow::Result { let bus_for_sub = bus.clone(); let subscriber_scope = state_scope.clone(); let rules_for_sub = rules.clone(); + let policy_rules_for_sub = policy_rules.clone(); let subscriber_fn = iii.register_function(( RegisterFunctionMessage::with_id("policy::approval_gate".into()) .with_description("Pause function calls listed in approval_required.".into()), @@ -1479,6 +1515,7 @@ pub fn register(iii: &III, cfg: &WorkerConfig) -> anyhow::Result { let bus = bus_for_sub.clone(); let sc = subscriber_scope.clone(); let intercept_rules = rules_for_sub.clone(); + let policy_rules = policy_rules_for_sub.clone(); async move { let Some(call) = extract_call(&envelope) else { return Ok::<_, IIIError>(json!({ "block": false })); @@ -1488,6 +1525,34 @@ pub fn register(iii: &III, cfg: &WorkerConfig) -> anyhow::Result { .map(|d| d.as_millis() as u64) .unwrap_or(0); + // Layered policy rules run first. Allow / Deny short-circuit; + // Ask (and no-match) falls through to the existing per-function + // interceptor flow. Pattern is "*" in v1 — see `crate::rules`. + match apply_policy_rules(policy_rules.as_ref(), &call.function_id) { + PolicyOutcome::Allow => { + return Ok::<_, IIIError>(json!({ "block": false })); + } + PolicyOutcome::Deny { + rule_permission, + rule_pattern, + } => { + let denial = Denial::Policy { + classifier_reason: format!( + "rule {rule_permission} {rule_pattern} denies" + ), + classifier_fn: "approval-gate::rules".to_string(), + }; + return Ok::<_, IIIError>(json!({ + "block": true, + "denial": denial, + "status": "denied", + "call_id": call.function_call_id, + "function_id": call.function_id, + })); + } + PolicyOutcome::FallThrough => {} + } + let action = decide_intercept_action( rule_for(intercept_rules.as_slice(), &call.function_id), call.requires_approval(), @@ -2423,6 +2488,83 @@ mod tests { assert_eq!(decide_intercept_action(None, false), InterceptAction::Pass); } + #[test] + fn apply_policy_rules_empty_ruleset_falls_through() { + let rs: rules::Ruleset = vec![]; + assert_eq!( + apply_policy_rules(&rs, "shell::exec"), + PolicyOutcome::FallThrough + ); + } + + #[test] + fn apply_policy_rules_allow_short_circuits() { + let rs: rules::Ruleset = vec![rules::Rule { + permission: "shell::exec".into(), + pattern: "*".into(), + action: rules::Action::Allow, + }]; + assert_eq!( + apply_policy_rules(&rs, "shell::exec"), + PolicyOutcome::Allow + ); + } + + #[test] + fn apply_policy_rules_deny_carries_matched_rule_identity() { + let rs: rules::Ruleset = vec![rules::Rule { + permission: "shell::*".into(), + pattern: "*".into(), + action: rules::Action::Deny, + }]; + assert_eq!( + apply_policy_rules(&rs, "shell::fs::write"), + PolicyOutcome::Deny { + rule_permission: "shell::*".into(), + rule_pattern: "*".into(), + } + ); + } + + #[test] + fn apply_policy_rules_ask_falls_through_to_interceptor_flow() { + // Ask means "no decision from this layer — let the next handle it". + let rs: rules::Ruleset = vec![rules::Rule { + permission: "shell::exec".into(), + pattern: "*".into(), + action: rules::Action::Ask, + }]; + assert_eq!( + apply_policy_rules(&rs, "shell::exec"), + PolicyOutcome::FallThrough + ); + } + + #[test] + fn apply_policy_rules_last_matching_wins() { + // Later-listed more-specific rule overrides earlier permissive default. + let rs: rules::Ruleset = vec![ + rules::Rule { + permission: "*".into(), + pattern: "*".into(), + action: rules::Action::Allow, + }, + rules::Rule { + permission: "shell::exec".into(), + pattern: "*".into(), + action: rules::Action::Deny, + }, + ]; + assert!(matches!( + apply_policy_rules(&rs, "shell::exec"), + PolicyOutcome::Deny { .. } + )); + assert_eq!( + apply_policy_rules(&rs, "approval::resolve"), + PolicyOutcome::Allow + ); + } + #[test] fn decide_intercept_action_classifier_empty_string_treated_as_no_classifier() { let rule = InterceptorRule { diff --git a/approval-gate/src/rules.rs b/approval-gate/src/rules.rs new file mode 100644 index 00000000..eb167f63 --- /dev/null +++ b/approval-gate/src/rules.rs @@ -0,0 +1,271 @@ +//! Layered permission rules — first-class policy primitive ported from +//! opencode's `Permission.evaluate` / `Wildcard.match`. +//! +//! ## Shape +//! +//! A [`Rule`] pairs a permission glob (matched against the iii function id) +//! with a pattern glob (matched against a caller-supplied pattern string, +//! always `"*"` in v1 — see [`evaluate`] for the forward-compatible call +//! shape). An [`Action`] tells the gate what to do on match: +//! [`Action::Allow`] passes the call through, [`Action::Deny`] short-circuits +//! with a policy [`crate::Denial`], [`Action::Ask`] falls back to the existing +//! per-function [`crate::config::InterceptorRule`] flow. +//! +//! ## Layering +//! +//! Operators stack rules — a workspace-default ruleset, plus a per-session +//! override, plus an operator-pinned global. [`evaluate`] flattens N +//! rulesets in caller order and returns the **last** matching rule. +//! Last-wins is the standard policy-stacking semantic: a more-specific +//! later layer overrides an earlier permissive default without surgery on +//! the earlier list. +//! +//! ## Wildcard match +//! +//! [`wildcard_match`] supports `*` (zero or more of any character) and +//! literal text. No regex, no `?`, no character classes — the surface is +//! intentionally tiny to match opencode's `Wildcard.match` behaviour and +//! keep the rule language operator-readable. `*` is greedy via dynamic +//! programming so `"a*b*c"` matches `"axxxbxxxc"` correctly. + +use serde::{Deserialize, Serialize}; + +/// Decision a [`Rule`] expresses when it matches an incoming call. +/// +/// Wire format is the lowercase string `"allow"` | `"deny"` | `"ask"` so +/// rules are operator-readable in YAML / JSON config. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "lowercase")] +pub enum Action { + Allow, + Deny, + Ask, +} + +/// A single permission rule. +/// +/// `permission` is matched against the iii function id (e.g. `shell::exec`, +/// `shell::fs::*`). `pattern` is matched against a caller-supplied pattern +/// string; in v1 every call site passes `"*"`, so `pattern: "*"` is the +/// only useful value today. The field is kept on the type so the forward +/// path to per-function pattern extractors (shell::exec → joined argv, +/// shell::fs::* → path) is a config-level change, not a schema break. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct Rule { + /// Wildcard pattern matched against the iii function id. + pub permission: String, + /// Wildcard pattern matched against a caller-supplied pattern string. + /// In v1 callers pass `"*"`; setting `pattern: "*"` here matches them. + pub pattern: String, + pub action: Action, +} + +/// A list of rules, evaluated in order. Stacked rulesets are flattened by +/// [`evaluate`] in caller order so the **last** matching rule across all +/// layers wins. +pub type Ruleset = Vec; + +/// True if `text` matches the wildcard `pattern`. Supports `*` (zero or +/// more of any character) and literal text. Tiny on purpose — operators +/// should be able to read a rule and know what it matches without a regex +/// engine in their head. +/// +/// Dynamic-programming implementation so `"a*b*c"` matches `"axxxbxxxc"` +/// without exponential backtracking on patterns with many `*`. +pub fn wildcard_match(pattern: &str, text: &str) -> bool { + let p: Vec = pattern.chars().collect(); + let t: Vec = text.chars().collect(); + let (np, nt) = (p.len(), t.len()); + // dp[i][j] = true iff p[..i] matches t[..j]. + let mut dp = vec![vec![false; nt + 1]; np + 1]; + dp[0][0] = true; + // A leading run of '*' can match the empty string. + for i in 1..=np { + if p[i - 1] == '*' { + dp[i][0] = dp[i - 1][0]; + } + } + for i in 1..=np { + for j in 1..=nt { + dp[i][j] = if p[i - 1] == '*' { + // '*' matches empty (dp[i-1][j]) or extends by one char (dp[i][j-1]). + dp[i - 1][j] || dp[i][j - 1] + } else { + p[i - 1] == t[j - 1] && dp[i - 1][j - 1] + }; + } + } + dp[np][nt] +} + +/// Find the **last** rule in `rules` whose `permission` and `pattern` +/// both wildcard-match the given inputs. Takes any iterator of rule +/// references so callers can pass a single [`Ruleset`] directly +/// (`&Vec` is `IntoIterator`) or chain several layers +/// via `global.iter().chain(session.iter())` without temporary borrows. +/// Returns the matched rule by reference so the caller can read its +/// [`Action`] and report the matching pattern in audit / Denial detail. +/// +/// `None` means no rule matched — the caller should fall back to whatever +/// it would do without a rules layer (in approval-gate: the existing +/// per-function [`crate::config::InterceptorRule`] path). +pub fn evaluate<'a, I>(permission: &str, pattern: &str, rules: I) -> Option<&'a Rule> +where + I: IntoIterator, +{ + rules + .into_iter() + .filter(|r| wildcard_match(&r.permission, permission) && wildcard_match(&r.pattern, pattern)) + .last() +} + +#[cfg(test)] +mod tests { + use super::*; + + fn r(permission: &str, pattern: &str, action: Action) -> Rule { + Rule { + permission: permission.to_string(), + pattern: pattern.to_string(), + action, + } + } + + #[test] + fn wildcard_literal_match() { + assert!(wildcard_match("shell::exec", "shell::exec")); + assert!(!wildcard_match("shell::exec", "shell::fs::read")); + } + + #[test] + fn wildcard_star_matches_empty() { + assert!(wildcard_match("*", "")); + assert!(wildcard_match("*", "anything")); + } + + #[test] + fn wildcard_star_matches_prefix() { + assert!(wildcard_match("shell::*", "shell::exec")); + assert!(wildcard_match("shell::*", "shell::fs::write")); + assert!(!wildcard_match("shell::*", "approval::resolve")); + } + + #[test] + fn wildcard_star_matches_suffix_and_middle() { + assert!(wildcard_match("*::exec", "shell::exec")); + assert!(wildcard_match("shell::*::write", "shell::fs::write")); + assert!(!wildcard_match("shell::*::write", "shell::fs::read")); + } + + #[test] + fn wildcard_multiple_stars_no_backtracking_blowup() { + // The dp implementation must not blow up on many '*'. + let pat = "*a*a*a*a*a*a*a*a*a*a*a*a*a*b"; + let text: String = "a".repeat(50); + assert!(!wildcard_match(pat, &text)); + let text_ok: String = format!("{}b", "a".repeat(50)); + assert!(wildcard_match(pat, &text_ok)); + } + + #[test] + fn evaluate_returns_none_for_empty_ruleset() { + let empty: Ruleset = vec![]; + assert!(evaluate("shell::exec", "*", &empty).is_none()); + } + + #[test] + fn evaluate_returns_none_when_nothing_matches() { + let rs: Ruleset = vec![r("approval::*", "*", Action::Allow)]; + assert!(evaluate("shell::exec", "*", &rs).is_none()); + } + + #[test] + fn evaluate_matches_exact_permission() { + let rs: Ruleset = vec![r("shell::exec", "*", Action::Allow)]; + let m = evaluate("shell::exec", "*", &rs).expect("match"); + assert_eq!(m.action, Action::Allow); + } + + #[test] + fn evaluate_matches_wildcard_permission() { + let rs: Ruleset = vec![r("shell::*", "*", Action::Allow)]; + let m = evaluate("shell::fs::write", "*", &rs).expect("match"); + assert_eq!(m.action, Action::Allow); + } + + #[test] + fn evaluate_last_wins_within_single_ruleset() { + // Two matching rules in the same ruleset; the later one wins. + let rs: Ruleset = vec![ + r("shell::*", "*", Action::Allow), + r("shell::exec", "*", Action::Deny), + ]; + let m = evaluate("shell::exec", "*", &rs).expect("match"); + assert_eq!( + m.action, + Action::Deny, + "more-specific later rule must override earlier permissive default" + ); + } + + #[test] + fn evaluate_last_wins_across_layered_rulesets() { + // global allows everything; session denies shell::exec. Session + // (passed last) overrides global. + let global: Ruleset = vec![r("*", "*", Action::Allow)]; + let session: Ruleset = vec![r("shell::exec", "*", Action::Deny)]; + let m = evaluate( + "shell::exec", + "*", + global.iter().chain(session.iter()), + ) + .expect("match"); + assert_eq!(m.action, Action::Deny); + + // For a permission only matched by global, global still wins. + let m2 = evaluate( + "approval::resolve", + "*", + global.iter().chain(session.iter()), + ) + .expect("match"); + assert_eq!(m2.action, Action::Allow); + } + + #[test] + fn evaluate_ask_is_a_valid_action() { + let rs: Ruleset = vec![r("shell::exec", "*", Action::Ask)]; + let m = evaluate("shell::exec", "*", &rs).expect("match"); + assert_eq!(m.action, Action::Ask); + } + + #[test] + fn evaluate_pattern_matches_when_both_globs_pass() { + let rs: Ruleset = vec![r("shell::exec", "git*", Action::Allow)]; + // pattern matches + let m = evaluate("shell::exec", "git checkout main", &rs).expect("match"); + assert_eq!(m.action, Action::Allow); + // pattern doesn't match → no rule selected + assert!(evaluate("shell::exec", "rm -rf /", &rs).is_none()); + } + + #[test] + fn rule_serde_round_trip() { + let original = r("shell::exec", "*", Action::Deny); + let json = serde_json::to_value(&original).unwrap(); + assert_eq!(json["permission"], "shell::exec"); + assert_eq!(json["pattern"], "*"); + assert_eq!(json["action"], "deny"); + let back: Rule = serde_json::from_value(json).unwrap(); + assert_eq!(back, original); + } + + #[test] + fn action_yaml_round_trip() { + for a in [Action::Allow, Action::Deny, Action::Ask] { + let y = serde_yaml::to_string(&a).unwrap(); + let back: Action = serde_yaml::from_str(&y).unwrap(); + assert_eq!(back, a); + } + } +} From aa9c4bfc846c636249b2bf3b959796d121e4c8c7 Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Fri, 15 May 2026 18:34:57 -0300 Subject: [PATCH 02/30] feat(approval-gate): cascade allow on `always: true` resolve reply When a user resolves a pending call with decision=allow + always=true, approval-gate now pushes a runtime Allow rule for the call's function id and sweeps the same session's pending records, auto-resolving every other one the new rule covers. One click instead of N. approval-gate - approve_and_execute() extracted from handle_resolve's allow branch as a reusable async helper. Both the user-driven allow path and the cascade sweep drive state through the same transitions (approved -> invoke -> executed | failed). - Shared policy ruleset wrapped as Arc> so reply-time mutation is safe across the subscriber + resolve closures. Read guards are scoped so they never cross an .await (std::sync::RwLock is not async-safe to hold across suspension). - cascade_allow_for_session() pushes the new rule under the write lock, snapshots session pending via list_prefix, and runs approve_and_execute for each newly-Allow record. The originator is skipped (already resolved above). Per-record state-write failures are logged and the rest of the cascade continues. - handle_resolve response carries `cascaded: N` when the sweep resolved at least one extra record; omitted otherwise so the one-shot path stays unchanged. Cascade scope is intentionally narrow for v1: - function-id-only matching (pattern "*"), mirrors the v1 rules surface - same session only - in-memory rule (no cross-restart persistence) - allow + always; deny + always is out of scope Five new tests cover the cascade decision tree (no-always, same-session match, cross-session non-effect, originator-skip, terminal-record skip). --- approval-gate/src/lib.rs | 511 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 472 insertions(+), 39 deletions(-) diff --git a/approval-gate/src/lib.rs b/approval-gate/src/lib.rs index 8d707f42..60cda0f9 100644 --- a/approval-gate/src/lib.rs +++ b/approval-gate/src/lib.rs @@ -8,7 +8,7 @@ pub mod rules; pub use config::{InterceptorRule, WorkerConfig}; -use std::sync::Arc; +use std::sync::{Arc, RwLock}; use iii_sdk::{ FunctionRef, IIIError, RegisterFunctionMessage, RegisterTriggerInput, TriggerRequest, III, @@ -661,6 +661,7 @@ pub async fn handle_resolve( bus: &dyn StateBus, exec: &dyn FunctionExecutor, state_scope: &str, + policy_rules: &RwLock, payload: Value, now_ms: u64, ) -> Value { @@ -723,36 +724,183 @@ pub async fn handle_resolve( json!({ "ok": true }) } WireDecision::Allow => { - let function_id = existing - .get("function_id") - .and_then(Value::as_str) - .unwrap_or("") - .to_string(); - let args = existing.get("args").cloned().unwrap_or(json!({})); - let approved = transition_record(&existing, "approved", None, None, None); - // Best-effort intermediate write; if it fails, still try to invoke. - let _ = bus.set(state_scope, &key, approved.clone()).await; - match exec - .invoke(&function_id, args, function_call_id, session_id) - .await + if let Err(err) = approve_and_execute( + bus, + exec, + state_scope, + &existing, + session_id, + function_call_id, + ) + .await { - Ok(result) => { - let executed = - transition_record(&approved, "executed", Some(result), None, None); - if let Err(e) = bus.set(state_scope, &key, executed).await { - tracing::error!("approval-gate: failed to write executed record: {e}"); - return json!({ "ok": false, "error": "state_write_failed" }); - } - } - Err(error) => { - let failed = transition_record(&approved, "failed", None, Some(error), None); - if let Err(e) = bus.set(state_scope, &key, failed).await { - tracing::error!("approval-gate: failed to write failed record: {e}"); - return json!({ "ok": false, "error": "state_write_failed" }); - } - } + tracing::error!( + "approval-gate: failed to execute approved call: {err}" + ); + return json!({ "ok": false, "error": "state_write_failed" }); } - json!({ "ok": true }) + + // Optional cascade: when `always: true` is set on an allow + // reply, add a runtime Allow rule for this call's function id + // and resolve every other pending record in the same session + // that the new rule covers. v1 scope is function-id-only — + // the cascade rule's `pattern` is "*" to match the v1 rules + // surface. See [`crate::rules`]. + let cascaded = if payload + .get("always") + .and_then(Value::as_bool) + .unwrap_or(false) + { + let function_id = existing + .get("function_id") + .and_then(Value::as_str) + .unwrap_or("") + .to_string(); + cascade_allow_for_session( + bus, + exec, + state_scope, + policy_rules, + session_id, + function_call_id, + &function_id, + ) + .await + } else { + 0 + }; + + if cascaded > 0 { + json!({ "ok": true, "cascaded": cascaded }) + } else { + json!({ "ok": true }) + } + } + } +} + +/// Push an Allow rule for `function_id` into the shared policy ruleset, +/// then resolve every pending record in `session_id` (other than the one +/// just resolved by the caller) that the new rule covers. Returns the +/// number of records auto-resolved. +/// +/// The function id rule is appended once; if the user clicks "always +/// allow X" twice for the same X within a session, the second push is a +/// duplicate but harmless (last-wins still picks Allow). State-write +/// failures inside the loop are logged and skipped so a single bad +/// record can't prevent the rest of the cascade. +async fn cascade_allow_for_session( + bus: &dyn StateBus, + exec: &dyn FunctionExecutor, + state_scope: &str, + policy_rules: &RwLock, + session_id: &str, + originator_call_id: &str, + originator_function_id: &str, +) -> u64 { + // Push the new Allow rule under the write lock. Hold the guard only + // for the mutation, not across the .await in the sweep below. + { + let mut guard = policy_rules + .write() + .expect("approval-gate policy rules lock poisoned"); + guard.push(rules::Rule { + permission: originator_function_id.to_string(), + pattern: "*".to_string(), + action: rules::Action::Allow, + }); + } + + // Snapshot the session's pending records and re-evaluate each one + // against the now-updated rules. Use a read-clone so we don't hold + // the lock across .await. + let prefix = format!("{session_id}/"); + let session_records = bus.list_prefix(state_scope, &prefix).await; + let mut cascaded = 0u64; + for rec in session_records { + let rec_call_id = match rec.get("function_call_id").and_then(Value::as_str) { + Some(s) => s.to_string(), + None => continue, + }; + if rec_call_id == originator_call_id { + continue; + } + if rec.get("status").and_then(Value::as_str) != Some("pending") { + continue; + } + let fn_id = rec + .get("function_id") + .and_then(Value::as_str) + .unwrap_or("") + .to_string(); + let outcome = { + let guard = policy_rules + .read() + .expect("approval-gate policy rules lock poisoned"); + apply_policy_rules(&guard, &fn_id) + }; + if !matches!(outcome, PolicyOutcome::Allow) { + continue; + } + if let Err(err) = + approve_and_execute(bus, exec, state_scope, &rec, session_id, &rec_call_id).await + { + tracing::warn!( + session_id, + call_id = %rec_call_id, + "approval-gate: cascade auto-resolve failed: {err}" + ); + continue; + } + cascaded += 1; + } + cascaded +} + +/// Drive a pending record through the approved → invoke → executed/failed +/// flow. Pure plumbing — does not consult policy rules, does not check +/// the original status (caller must have verified it's pending). Used by +/// both the user-driven [`handle_resolve`] allow path and the +/// cascade-on-`always` sweep so the state transitions stay in one place. +/// +/// Returns `Err` only when a state write fails; the invocation result +/// itself (success or function-error) is captured on the record. The +/// caller decides how to surface a state-write failure (the existing +/// handlers map it to `{ok:false, error:"state_write_failed"}`). +pub(crate) async fn approve_and_execute( + bus: &dyn StateBus, + exec: &dyn FunctionExecutor, + state_scope: &str, + pending: &Value, + session_id: &str, + function_call_id: &str, +) -> Result<(), String> { + let function_id = pending + .get("function_id") + .and_then(Value::as_str) + .unwrap_or("") + .to_string(); + let args = pending.get("args").cloned().unwrap_or(json!({})); + let key = pending_key(session_id, function_call_id); + let approved = transition_record(pending, "approved", None, None, None); + // Best-effort intermediate write; if it fails we still try to invoke + // so the user-visible behavior matches the pre-extraction allow path. + let _ = bus.set(state_scope, &key, approved.clone()).await; + match exec + .invoke(&function_id, args, function_call_id, session_id) + .await + { + Ok(result) => { + let executed = transition_record(&approved, "executed", Some(result), None, None); + bus.set(state_scope, &key, executed) + .await + .map_err(|e| e.to_string()) + } + Err(error) => { + let failed = transition_record(&approved, "failed", None, Some(error), None); + bus.set(state_scope, &key, failed) + .await + .map_err(|e| e.to_string()) } } } @@ -1254,8 +1402,11 @@ pub fn unverified_marker_targets(rules: &[InterceptorRule]) -> Vec<&str> { pub fn register(iii: &III, cfg: &WorkerConfig) -> anyhow::Result { let rules: Arc> = Arc::new(cfg.interceptors.clone()); // Layered policy rules consulted before the per-function interceptor - // flow. See [`crate::rules`]. - let policy_rules: Arc = Arc::new(cfg.rules.clone()); + // flow. Wrapped in RwLock so a user reply with `always: true` on + // `approval::resolve` can push a new Allow rule at runtime (see the + // cascade in `handle_resolve`). See [`crate::rules`]. + let policy_rules: Arc> = + Arc::new(RwLock::new(cfg.rules.clone())); // Fail fast on honor-system markers: any interceptor that asks the gate // to inject `__from_approval` MUST also assert the target validates it. @@ -1300,11 +1451,14 @@ pub fn register(iii: &III, cfg: &WorkerConfig) -> anyhow::Result { rules: rules.clone(), }); let iii_for_resolve = iii.clone(); + let policy_rules_for_resolve = policy_rules.clone(); let resolve = iii.register_function(( RegisterFunctionMessage::with_id(FN_RESOLVE.into()).with_description( "Resolve a pending approval. On allow, invokes the underlying function; \ - on deny, records the denial. The result is stitched into the agent's \ - next turn as a system message." + on deny, records the denial. With `always: true` on an allow reply, \ + a runtime rule is added so future calls to this function id auto-allow, \ + and the session's other pending calls newly matching are cascade-resolved. \ + The result is stitched into the agent's next turn as a system message." .into(), ), move |payload: Value| { @@ -1312,6 +1466,7 @@ pub fn register(iii: &III, cfg: &WorkerConfig) -> anyhow::Result { let exec = exec_for_resolve.clone(); let scope_resolve = scope_resolve.clone(); let iii = iii_for_resolve.clone(); + let policy_rules = policy_rules_for_resolve.clone(); async move { let now_ms = std::time::SystemTime::now() .duration_since(std::time::UNIX_EPOCH) @@ -1321,6 +1476,7 @@ pub fn register(iii: &III, cfg: &WorkerConfig) -> anyhow::Result { bus.as_ref(), exec.as_ref(), &scope_resolve, + &policy_rules, payload.clone(), now_ms, ) @@ -1528,7 +1684,16 @@ pub fn register(iii: &III, cfg: &WorkerConfig) -> anyhow::Result { // Layered policy rules run first. Allow / Deny short-circuit; // Ask (and no-match) falls through to the existing per-function // interceptor flow. Pattern is "*" in v1 — see `crate::rules`. - match apply_policy_rules(policy_rules.as_ref(), &call.function_id) { + // Read-lock is acquired and dropped inside a block so the + // guard never crosses an `.await` (std::sync::RwLock is not + // async-safe to hold across suspension points). + let policy_outcome = { + let guard = policy_rules + .read() + .expect("approval-gate policy rules lock poisoned"); + apply_policy_rules(&guard, &call.function_id) + }; + match policy_outcome { PolicyOutcome::Allow => { return Ok::<_, IIIError>(json!({ "block": false })); } @@ -1660,6 +1825,13 @@ mod tests { use super::*; use serde_json::json; + /// Empty policy ruleset for tests that exercise [`handle_resolve`] + /// without cascading. Each call freshly constructs the lock so unit + /// tests stay independent — there's no shared mutable state. + fn empty_policy_rules() -> std::sync::RwLock { + std::sync::RwLock::new(crate::rules::Ruleset::new()) + } + #[test] fn maybe_flip_timed_out_returns_some_when_pending_and_expired() { let rec = build_pending_record("tc-1", "shell::fs::write", &json!({}), 1_000, 60_000); @@ -2246,6 +2418,7 @@ mod tests { &bus, &exec, STATE_SCOPE, + &empty_policy_rules(), json!({"session_id":"s1","function_call_id":"tc-1","decision":"allow"}), 70_000, ) @@ -2894,6 +3067,7 @@ mod tests { &bus, &exec, STATE_SCOPE, + &empty_policy_rules(), json!({ "session_id": "s1", "function_call_id": "tc-1", @@ -2919,6 +3093,249 @@ mod tests { assert_eq!(rec["result"], json!({"ok": true})); } + #[tokio::test] + async fn allow_without_always_does_not_cascade() { + // Two pending shell::exec calls in the same session. Resolving + // the first with allow (always=false) must NOT touch the second. + let bus = InMemoryStateBus::new(); + let exec = FakeExecutor::default(); + for cid in ["tc-1", "tc-2"] { + let mut rec = build_pending_record(cid, "shell::exec", &json!({}), 1_000, 60_000); + rec.as_object_mut() + .unwrap() + .insert("session_id".into(), json!("s1")); + bus.set(STATE_SCOPE, &pending_key("s1", cid), rec) + .await + .unwrap(); + } + let rules = empty_policy_rules(); + let resp = handle_resolve( + &bus, + &exec, + STATE_SCOPE, + &rules, + json!({ + "session_id": "s1", + "function_call_id": "tc-1", + "decision": "allow", + }), + 1_500, + ) + .await; + assert_eq!(resp["ok"], true); + assert!( + resp.get("cascaded").is_none(), + "cascaded field must be omitted when always was not set: {resp}" + ); + let other = bus + .get(STATE_SCOPE, &pending_key("s1", "tc-2")) + .await + .unwrap(); + assert_eq!(other["status"], "pending"); + assert_eq!(rules.read().unwrap().len(), 0, "rule must not be pushed"); + } + + #[tokio::test] + async fn allow_with_always_pushes_rule_and_cascades_same_session_pending() { + // Three pending calls in session s1: two shell::exec, one + // shell::fs::write. Resolving the first shell::exec with + // always=true must: + // 1. Push an Allow rule for shell::exec + // 2. Auto-resolve the other shell::exec pending in this session + // 3. Leave the shell::fs::write pending untouched + let bus = InMemoryStateBus::new(); + let exec = FakeExecutor::default(); + for (cid, fn_id) in [ + ("tc-1", "shell::exec"), + ("tc-2", "shell::exec"), + ("tc-3", "shell::fs::write"), + ] { + let mut rec = build_pending_record(cid, fn_id, &json!({}), 1_000, 60_000); + rec.as_object_mut() + .unwrap() + .insert("session_id".into(), json!("s1")); + bus.set(STATE_SCOPE, &pending_key("s1", cid), rec) + .await + .unwrap(); + } + let rules = empty_policy_rules(); + + let resp = handle_resolve( + &bus, + &exec, + STATE_SCOPE, + &rules, + json!({ + "session_id": "s1", + "function_call_id": "tc-1", + "decision": "allow", + "always": true, + }), + 1_500, + ) + .await; + assert_eq!(resp["ok"], true); + assert_eq!( + resp["cascaded"], json!(1), + "tc-2 should cascade; tc-1 originator excluded; tc-3 not matched" + ); + + // The Allow rule for shell::exec is now in the shared ruleset. + let pushed = rules.read().unwrap(); + assert_eq!(pushed.len(), 1); + assert_eq!(pushed[0].permission, "shell::exec"); + assert_eq!(pushed[0].action, rules::Action::Allow); + drop(pushed); + + // Originator and cascaded record both transitioned to executed. + let r1 = bus + .get(STATE_SCOPE, &pending_key("s1", "tc-1")) + .await + .unwrap(); + let r2 = bus + .get(STATE_SCOPE, &pending_key("s1", "tc-2")) + .await + .unwrap(); + let r3 = bus + .get(STATE_SCOPE, &pending_key("s1", "tc-3")) + .await + .unwrap(); + assert_eq!(r1["status"], "executed"); + assert_eq!(r2["status"], "executed"); + assert_eq!( + r3["status"], "pending", + "non-matching function_id must stay pending: {r3}" + ); + + // Executor was invoked twice: originator + cascaded. + assert_eq!(exec.calls.lock().unwrap().len(), 2); + } + + #[tokio::test] + async fn cascade_does_not_cross_session_boundary() { + // tc-1 in session s1, tc-2 in session s2 — both shell::exec. + // Resolving s1/tc-1 with always must not touch s2/tc-2. + let bus = InMemoryStateBus::new(); + let exec = FakeExecutor::default(); + for (session, cid) in [("s1", "tc-1"), ("s2", "tc-2")] { + let mut rec = build_pending_record(cid, "shell::exec", &json!({}), 1_000, 60_000); + rec.as_object_mut() + .unwrap() + .insert("session_id".into(), json!(session)); + bus.set(STATE_SCOPE, &pending_key(session, cid), rec) + .await + .unwrap(); + } + let rules = empty_policy_rules(); + + let resp = handle_resolve( + &bus, + &exec, + STATE_SCOPE, + &rules, + json!({ + "session_id": "s1", + "function_call_id": "tc-1", + "decision": "allow", + "always": true, + }), + 1_500, + ) + .await; + assert_eq!(resp["ok"], true); + assert!( + resp.get("cascaded").is_none() || resp["cascaded"] == json!(0), + "no record in s1 to cascade onto; tc-2 in s2 must NOT be touched: {resp}" + ); + + let other_session = bus + .get(STATE_SCOPE, &pending_key("s2", "tc-2")) + .await + .unwrap(); + assert_eq!(other_session["status"], "pending"); + assert_eq!( + exec.calls.lock().unwrap().len(), + 1, + "only the originator should have been invoked" + ); + } + + #[tokio::test] + async fn cascade_skips_originator_record() { + // Single pending record. always=true must not double-resolve it. + let bus = InMemoryStateBus::new(); + let exec = FakeExecutor::default(); + let mut rec = build_pending_record("tc-1", "shell::exec", &json!({}), 1_000, 60_000); + rec.as_object_mut() + .unwrap() + .insert("session_id".into(), json!("s1")); + bus.set(STATE_SCOPE, &pending_key("s1", "tc-1"), rec) + .await + .unwrap(); + let rules = empty_policy_rules(); + + let resp = handle_resolve( + &bus, + &exec, + STATE_SCOPE, + &rules, + json!({ + "session_id": "s1", + "function_call_id": "tc-1", + "decision": "allow", + "always": true, + }), + 1_500, + ) + .await; + assert_eq!(resp["ok"], true); + // Originator counts under the existing allow path, not the cascade. + assert!(resp.get("cascaded").is_none() || resp["cascaded"] == json!(0)); + assert_eq!(exec.calls.lock().unwrap().len(), 1); + } + + #[tokio::test] + async fn cascade_skips_already_resolved_records_in_session() { + // Two records in s1: tc-1 pending, tc-2 already terminal. The + // cascade must skip tc-2. + let bus = InMemoryStateBus::new(); + let exec = FakeExecutor::default(); + let mut r1 = build_pending_record("tc-1", "shell::exec", &json!({}), 1_000, 60_000); + r1.as_object_mut() + .unwrap() + .insert("session_id".into(), json!("s1")); + bus.set(STATE_SCOPE, &pending_key("s1", "tc-1"), r1) + .await + .unwrap(); + let mut r2 = build_pending_record("tc-2", "shell::exec", &json!({}), 1_000, 60_000); + r2.as_object_mut() + .unwrap() + .insert("session_id".into(), json!("s1")); + let r2_done = transition_record(&r2, "executed", Some(json!({"ok": true})), None, None); + bus.set(STATE_SCOPE, &pending_key("s1", "tc-2"), r2_done) + .await + .unwrap(); + + let rules = empty_policy_rules(); + let resp = handle_resolve( + &bus, + &exec, + STATE_SCOPE, + &rules, + json!({ + "session_id": "s1", + "function_call_id": "tc-1", + "decision": "allow", + "always": true, + }), + 1_500, + ) + .await; + assert_eq!(resp["ok"], true); + // tc-2 is terminal — not pending — so cascade skips it. + assert!(resp.get("cascaded").is_none() || resp["cascaded"] == json!(0)); + } + #[tokio::test] async fn handle_resolve_deny_does_not_invoke_function() { let bus = InMemoryStateBus::new(); @@ -2935,6 +3352,7 @@ mod tests { &bus, &exec, STATE_SCOPE, + &empty_policy_rules(), json!({ "session_id": "s1", "function_call_id": "tc-1", @@ -2977,6 +3395,7 @@ mod tests { &bus, &exec, STATE_SCOPE, + &empty_policy_rules(), json!({"session_id":"s1","function_call_id":"tc-1","decision":"allow"}), 1_500, ) @@ -3059,6 +3478,7 @@ mod tests { &bus, &exec, STATE_SCOPE, + &empty_policy_rules(), json!({ "function_call_id": "tc-1", "session_id": "s1", @@ -3092,6 +3512,7 @@ mod tests { &bus, &exec, STATE_SCOPE, + &empty_policy_rules(), json!({ "tool_call_id": "tc-1", "session_id": "s1", @@ -3118,6 +3539,7 @@ mod tests { &bus, &exec, STATE_SCOPE, + &empty_policy_rules(), json!({"function_call_id": "tc-1", "session_id": "s1", "decision": "deny"}), 1_500, ) @@ -3171,6 +3593,7 @@ mod tests { &bus, &exec, STATE_SCOPE, + &empty_policy_rules(), json!({ "session_id": "s1", "function_call_id": "tc-1", @@ -3205,6 +3628,7 @@ mod tests { &bus, &exec, STATE_SCOPE, + &empty_policy_rules(), json!({ "session_id": "s1", "function_call_id": "tc-1", @@ -3635,6 +4059,7 @@ mod tests { &bus, &exec, STATE_SCOPE, + &empty_policy_rules(), json!({"session_id": "", "function_call_id": "c", "decision": "allow"}), 0, ) @@ -3644,6 +4069,7 @@ mod tests { &bus, &exec, STATE_SCOPE, + &empty_policy_rules(), json!({"session_id": "s", "function_call_id": "", "decision": "allow"}), 0, ) @@ -3938,26 +4364,33 @@ mod tests { } Op::ResolveAllow => { let _ = handle_resolve( - &bus, &exec, STATE_SCOPE, + &bus, + &exec, + STATE_SCOPE, + &empty_policy_rules(), json!({ "session_id": session_id, "function_call_id": call_id, "decision": "allow", }), now_ms, - ).await; + ) + .await; } Op::ResolveDeny => { let _ = handle_resolve( - &bus, &exec, STATE_SCOPE, + &bus, + &exec, + STATE_SCOPE, + &empty_policy_rules(), json!({ "session_id": session_id, "function_call_id": call_id, "decision": "deny", - "reason": "user", }), now_ms, - ).await; + ) + .await; } Op::AdvanceClockAndLazyFlip => { now_ms = now_ms.saturating_add(timeout_ms + 1); From 04bb1d4e5e39557db91e848204c80f9781f22960 Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Fri, 15 May 2026 19:04:31 -0300 Subject: [PATCH 03/30] chore(approval-gate): remove legacy migration surface The Denial refactor replaced decision_reason with a typed structure, but several legacy hooks remained in place. They are now removed because the underlying schemas they protected against (pre-trigger-model status strings, pre-Denial flat decision_reason field) cannot occur in any state store this branch is wired to. approval-gate - Drop migrate_legacy_record entirely. The pre-trigger-model status="allow"/"deny" rename and the decision_reason -> Denial::Legacy lift both go away. Old persisted records (if any) now surface as filtered-out orphans rather than silently mistranslated. - Drop Denial::Legacy variant from the enum. - Drop legacy_migrated flag from records. - Drop the four migrate_legacy_record_* unit tests and the handle_list_undelivered_persists_migrated_legacy_record integration test. - handle_list_pending no longer pre-filters legacy shapes; orphan records lacking a session_id stamp are dropped uniformly. - timeout_resolved_event drops the decision_reason: "timeout" field (status: "timed_out" is self-describing). - handle_sweep_session drops the optional `reason` payload field. The cause (session_deleted vs run_stopped) is the caller's concern; the swept timed_out records carry no Denial. - skills/sweep_session.md updated. turn-orchestrator - approval_stitching drops the legacy denial-kind branch and the legacy_migrated note line; the corresponding stitch test is removed. - run_stop no longer forwards reason: "run_stopped" on the approval::sweep_session payload; it logs the cause locally instead. - The run_stop integration test now asserts the absence of the reason field on the sweep payload. harness-types (6 vendored copies, byte-identical) - Drop the Legacy variant and its docblock line from each agent_event.rs copy. --- approval-gate/skills/sweep_session.md | 5 +- approval-gate/src/lib.rs | 281 +++-------------- .../crates/harness-types/src/agent_event.rs | 4 - .../crates/harness-types/src/agent_event.rs | 4 - .../crates/harness-types/src/agent_event.rs | 4 - .../crates/harness-types/src/agent_event.rs | 4 - .../crates/harness-types/src/agent_event.rs | 4 - .../crates/harness-types/src/agent_event.rs | 4 - turn-orchestrator/src/run_stop.rs | 136 +++++++++ .../src/states/approval_stitching.rs | 25 -- turn-orchestrator/tests/run_stop.rs | 287 ++++++++++++++++++ 11 files changed, 472 insertions(+), 286 deletions(-) create mode 100644 turn-orchestrator/src/run_stop.rs create mode 100644 turn-orchestrator/tests/run_stop.rs diff --git a/approval-gate/skills/sweep_session.md b/approval-gate/skills/sweep_session.md index 9dc67c66..e791db36 100644 --- a/approval-gate/skills/sweep_session.md +++ b/approval-gate/skills/sweep_session.md @@ -1,6 +1,6 @@ # approval::sweep_session -Sweep all pending approval records for a session to `timed_out` with reason `session_deleted`. +Sweep all pending approval records for a session to `timed_out`. **Payload:** - `session_id` (string, required) @@ -12,4 +12,5 @@ Sweep all pending approval records for a session to `timed_out` with reason `ses **Behavior:** - Only records with `status: "pending"` are flipped. - Non-pending records (already resolved, executed, denied, etc.) are left untouched. -- Intended to be called by the session worker or turn-orchestrator when a session is being deleted, so that pending approvals don't dangle forever. +- The flipped records carry no `Denial` — `status: "timed_out"` is self-describing per the Denial refactor. Callers that need to distinguish session-delete from run-stop sweeps should log that context in their own worker. +- Intended to be called by the session worker or turn-orchestrator when a session is being deleted or a run is stopped, so pending approvals don't dangle forever. diff --git a/approval-gate/src/lib.rs b/approval-gate/src/lib.rs index 60cda0f9..84616f29 100644 --- a/approval-gate/src/lib.rs +++ b/approval-gate/src/lib.rs @@ -135,10 +135,6 @@ pub(crate) fn apply_policy_rules(rules: &rules::Ruleset, function_id: &str) -> P /// `{ "kind": "user_rejected", "detail": null }` /// `{ "kind": "user_corrected", "detail": { "feedback": "..." } }` /// `{ "kind": "state_error", "detail": { "phase": "...", "error": "..." } }` -/// `{ "kind": "legacy", "detail": { "reason": "..." } }` -/// -/// `Legacy` is the read-time landing pad for records persisted before this -/// type existed (see [`migrate_legacy_record`]). New writes never emit it. #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)] #[serde(tag = "kind", content = "detail", rename_all = "snake_case")] pub enum Denial { @@ -154,9 +150,6 @@ pub enum Denial { phase: String, error: String, }, - Legacy { - reason: String, - }, } #[derive(Debug, Clone, PartialEq, Eq)] @@ -596,67 +589,6 @@ pub fn maybe_flip_timed_out(rec: &Value, now_ms: u64) -> Option { Some(transition_record(rec, "timed_out", None, None, None)) } -/// Map a legacy approval record to the current shape. Covers two -/// generations of drift: -/// -/// 1. **Pre-trigger-model** (status `"allow"` / `"deny"`, free-form -/// `reason`): rewritten as `"executed"` / `"denied"`, with the old -/// `reason` folded into a [`Denial::Legacy`]. -/// 2. **Pre-Denial** (`decision_reason: ` field, no `denial`): the -/// string is moved into [`Denial::Legacy { reason }`] and the old key is -/// stripped so writers never resurface it. -/// -/// Returns `None` only when the record is already current. -pub fn migrate_legacy_record(rec: &Value) -> Option { - let status = rec.get("status").and_then(Value::as_str)?; - // Path 1: pre-trigger-model status rename. - let (new_status, denial_to_carry) = match status { - "allow" => ("executed", None), - "deny" => ( - "denied", - rec.get("reason") - .and_then(Value::as_str) - .map(|s| Denial::Legacy { - reason: s.to_string(), - }), - ), - _ => { - // Path 2: status already current, but the record may carry the - // pre-Denial `decision_reason` flat string. Lift it into Denial - // and strip the legacy key; otherwise return None. - let legacy_reason = rec - .get("decision_reason") - .and_then(Value::as_str) - .map(str::to_string); - let needs_lift = legacy_reason.is_some() && rec.get("denial").is_none(); - if !needs_lift { - return None; - } - let denial = Denial::Legacy { - reason: legacy_reason.expect("checked Some above"), - }; - let mut migrated = rec.clone(); - if let Some(obj) = migrated.as_object_mut() { - obj.remove("decision_reason"); - obj.insert( - "denial".into(), - serde_json::to_value(&denial).expect("Denial is always serializable"), - ); - obj.insert("legacy_migrated".into(), Value::Bool(true)); - } - return Some(migrated); - } - }; - let mut migrated = transition_record(rec, new_status, None, None, denial_to_carry); - if let Some(obj) = migrated.as_object_mut() { - // Strip the pre-trigger-model `reason` once it has been folded into - // `denial`; leaving it would create a dead field on the new shape. - obj.remove("reason"); - obj.insert("legacy_migrated".into(), Value::Bool(true)); - } - Some(migrated) -} - pub async fn handle_resolve( bus: &dyn StateBus, exec: &dyn FunctionExecutor, @@ -917,12 +849,7 @@ pub async fn handle_list_pending(bus: &dyn StateBus, state_scope: &str, payload: let all = bus.list_prefix(state_scope, &prefix).await; let pending: Vec = all .into_iter() - .filter(|v| { - if migrate_legacy_record(v).is_some() { - return false; - } - v.get("status").and_then(Value::as_str) == Some("pending") - }) + .filter(|v| v.get("status").and_then(Value::as_str) == Some("pending")) .collect(); json!({ "pending": pending }) } @@ -965,43 +892,14 @@ pub async fn handle_list_undelivered( let mut entries: Vec = Vec::new(); for rec in all { // Defensive scope: some bus backends ignore the prefix and return - // every record in `state_scope`. Filter by stamped `session_id`: - // - // - record has session_id matching ours → keep - // - record has session_id different from ours → drop - // - record lacks session_id AND is in "allow"/"deny" pre-trigger - // legacy form → keep (`migrate_legacy_record` below re-keys it - // under our session) - // - record lacks session_id AND is already terminal → drop - // (orphan from before session-id stamping; cannot be attributed) + // every record in `state_scope`. Drop anything not stamped with + // the session_id we're listing for. Orphan records lacking a + // session_id stamp are dropped (cannot be attributed); the + // migration path that used to recover them no longer exists. match rec.get("session_id").and_then(Value::as_str) { Some(sid) if sid == session_id => {} - Some(_) => continue, - None => { - let status = rec.get("status").and_then(Value::as_str).unwrap_or(""); - if status != "allow" && status != "deny" { - continue; - } - } + _ => continue, } - let rec = if let Some(migrated) = migrate_legacy_record(&rec) { - let call_id = migrated - .get("function_call_id") - .and_then(Value::as_str) - .unwrap_or(""); - if !call_id.is_empty() { - let _ = bus - .set( - state_scope, - &pending_key(session_id, call_id), - migrated.clone(), - ) - .await; - } - migrated - } else { - rec - }; let rec = if let Some(flipped) = maybe_flip_timed_out(&rec, now_ms) { let call_id = flipped .get("function_call_id") @@ -1186,10 +1084,10 @@ pub async fn handle_flush_delivered(bus: &dyn StateBus, state_scope: &str, paylo /// Sweep all still-pending approvals for a session to timed_out. /// -/// Reason defaults to `"session_deleted"` (legacy callers) but can be -/// overridden via the `reason` payload field — `run::stop` passes -/// `"run_stopped"` so consumers can distinguish a manual abort from a -/// session delete. +/// The `timed_out` status is self-describing per the Denial refactor — +/// callers no longer pass (or get back) a reason string. If you need to +/// distinguish *why* a session was swept (delete vs. abort vs. timeout), +/// the calling worker already has that context and should log it there. pub async fn handle_sweep_session(bus: &dyn StateBus, state_scope: &str, payload: Value) -> Value { let session_id = payload .get("session_id") @@ -1198,14 +1096,6 @@ pub async fn handle_sweep_session(bus: &dyn StateBus, state_scope: &str, payload if session_id.is_empty() { return json!({ "ok": false, "error": "missing_session_id", "swept": 0 }); } - // The optional `reason` payload field used to be persisted as - // `decision_reason` on the resulting record. With Denial now the only - // structured reason channel and timed_out carrying no denial, the - // sweep_session reason is informational for the caller only — we log - // it but do not stamp it on the record. - if let Some(r) = payload.get("reason").and_then(Value::as_str) { - tracing::info!(session_id, reason = r, "approval-gate: sweep_session"); - } let prefix = format!("{session_id}/"); let all = bus.list_prefix(state_scope, &prefix).await; let mut swept = 0_u64; @@ -1263,13 +1153,15 @@ async fn write_event(iii: &III, session_id: &str, event: &Value) { /// Build the `approval_resolved` event a sweeper emits when it auto-flips an /// expired pending record. Pure — caller pumps the result onto the stream. fn timeout_resolved_event(function_call_id: &str) -> Value { + // Timed-out approvals carry no Denial — the `status: "timed_out"` is + // self-describing per the Denial refactor. Consumers (turn-orchestrator + // stitching, UIs) render the timeout from the status alone. json!({ "type": "approval_resolved", "function_call_id": function_call_id, "tool_call_id": function_call_id, "decision": "deny", "status": "timed_out", - "decision_reason": "timeout", }) } @@ -2435,85 +2327,6 @@ mod tests { assert_eq!(rec["status"], "timed_out"); } - #[test] - fn migrate_legacy_record_maps_allow_to_executed_without_result() { - let legacy = json!({ - "function_call_id": "c1", - "function_id": "shell::fs::write", - "args": {}, - "status": "allow", - "expires_at": 1_000_u64, - }); - let migrated = migrate_legacy_record(&legacy).expect("migrates"); - assert_eq!(migrated["status"], "executed"); - assert!( - migrated["result"].is_null() - || migrated.get("result").is_none() - || migrated["result"] == json!(null) - ); - assert_eq!(migrated["legacy_migrated"], json!(true)); - } - - #[test] - fn migrate_legacy_record_maps_deny_to_denied_with_legacy_denial() { - let legacy = json!({ - "function_call_id": "c1", - "status": "deny", - "reason": "manual", - "expires_at": 1_000_u64, - }); - let migrated = migrate_legacy_record(&legacy).expect("migrates"); - assert_eq!(migrated["status"], "denied"); - assert_eq!(migrated["denial"]["kind"], "legacy"); - assert_eq!(migrated["denial"]["detail"]["reason"], "manual"); - assert_eq!(migrated["legacy_migrated"], json!(true)); - assert!( - migrated.get("decision_reason").is_none(), - "legacy decision_reason must be stripped: {migrated}" - ); - assert!( - migrated.get("reason").is_none(), - "pre-trigger-model `reason` must be stripped after lifting into denial: {migrated}" - ); - } - - #[test] - fn migrate_legacy_record_lifts_decision_reason_when_status_is_already_current() { - // Pre-Denial records carry the flat `decision_reason: ` — - // migration lifts it into Denial::Legacy and strips the old field. - let legacy = json!({ - "function_call_id": "c1", - "status": "denied", - "decision_reason": "user typed nope", - "expires_at": 1_000_u64, - }); - let migrated = migrate_legacy_record(&legacy).expect("should lift"); - assert_eq!(migrated["status"], "denied"); - assert_eq!(migrated["denial"]["kind"], "legacy"); - assert_eq!(migrated["denial"]["detail"]["reason"], "user typed nope"); - assert!(migrated.get("decision_reason").is_none()); - assert_eq!(migrated["legacy_migrated"], json!(true)); - } - - #[test] - fn migrate_legacy_record_returns_none_for_new_status_strings() { - for new_status in [ - "pending", - "executed", - "failed", - "denied", - "timed_out", - "approved", - ] { - let rec = json!({"status": new_status}); - assert!( - migrate_legacy_record(&rec).is_none(), - "should not migrate already-new status '{}'", - new_status - ); - } - } - #[test] fn fn_constants_match_spec_strings() { assert_eq!(FN_RESOLVE, "approval::resolve"); @@ -3712,7 +3525,7 @@ mod tests { } #[tokio::test] - async fn handle_sweep_session_flips_pending_records_to_timed_out_with_reason_session_deleted() { + async fn handle_sweep_session_flips_pending_records_to_timed_out() { let bus = InMemoryStateBus::new(); bus.set( STATE_SCOPE, @@ -3731,12 +3544,39 @@ mod tests { .unwrap(); assert_eq!(rec["status"], "timed_out"); // sweep_session no longer stamps a reason string — timed_out is - // self-describing and the sweep cause is logged at info, not - // surfaced on the record. + // self-describing per the Denial refactor. assert!(rec.get("denial").is_none()); assert!(rec.get("decision_reason").is_none()); } + #[tokio::test] + async fn handle_sweep_session_ignores_legacy_reason_payload_field() { + // Old callers may still pass `reason` — approval-gate accepts the + // payload but does not persist it. Behavior is identical to a + // bare {session_id} payload. + let bus = InMemoryStateBus::new(); + bus.set( + STATE_SCOPE, + &pending_key("s1", "c1"), + build_pending_record("c1", "shell::fs::write", &json!({}), 1_000, 60_000), + ) + .await + .unwrap(); + let resp = handle_sweep_session( + &bus, + STATE_SCOPE, + json!({"session_id": "s1", "reason": "run_stopped"}), + ) + .await; + assert_eq!(resp["swept"], json!(1)); + let rec = bus + .get(STATE_SCOPE, &pending_key("s1", "c1")) + .await + .unwrap(); + assert_eq!(rec["status"], "timed_out"); + assert!(rec.get("denial").is_none()); + } + #[tokio::test] async fn handle_sweep_session_skips_non_pending_records() { let bus = InMemoryStateBus::new(); @@ -3888,7 +3728,9 @@ mod tests { assert_eq!(evt["tool_call_id"], "tc-1"); assert_eq!(evt["decision"], "deny"); assert_eq!(evt["status"], "timed_out"); - assert_eq!(evt["decision_reason"], "timeout"); + // timed_out is self-describing — no Denial / no legacy reason. + assert!(evt.get("decision_reason").is_none()); + assert!(evt.get("denial").is_none()); } #[test] @@ -4221,37 +4063,6 @@ mod tests { ); } - #[tokio::test] - async fn handle_list_undelivered_persists_migrated_legacy_record() { - // mutant L614 — `delete !` on the `if !call_id.is_empty()` guard. - // The legacy migration block writes the migrated row back to state - // so subsequent reads use the new shape. The mutant inverts the - // guard, suppressing the write. Verify the write happens. - let bus = InMemoryStateBus::new(); - // Pre-trigger-model row: status="allow" (legacy form). - let legacy = json!({ - "function_call_id": "c1", - "function_id": "shell::fs::write", - "args": {}, - "status": "allow", - "expires_at": 1_000u64, - }); - bus.set(STATE_SCOPE, &pending_key("s1", "c1"), legacy) - .await - .unwrap(); - - let _ = - handle_list_undelivered(&bus, STATE_SCOPE, json!({"session_id": "s1"}), 5_000).await; - - // Storage now reflects the migrated shape. - let stored = bus - .get(STATE_SCOPE, &pending_key("s1", "c1")) - .await - .expect("migrated row persisted"); - assert_eq!(stored["status"], json!("executed")); - assert_eq!(stored["legacy_migrated"], json!(true)); - } - #[test] fn maybe_flip_timed_out_flips_at_exact_expires_at() { // mutant L439: `<` → `<=` would not flip at the exact boundary. diff --git a/harness/crates/harness-types/src/agent_event.rs b/harness/crates/harness-types/src/agent_event.rs index 6b37f381..f87ad305 100644 --- a/harness/crates/harness-types/src/agent_event.rs +++ b/harness/crates/harness-types/src/agent_event.rs @@ -23,7 +23,6 @@ pub enum ApprovalDecision { /// `{ "kind": "user_rejected", "detail": null }` /// `{ "kind": "user_corrected", "detail": { "feedback": "..." } }` /// `{ "kind": "state_error", "detail": { "phase": "...", "error": "..." } }` -/// `{ "kind": "legacy", "detail": { "reason": "..." } }` #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] #[serde(tag = "kind", content = "detail", rename_all = "snake_case")] pub enum Denial { @@ -39,9 +38,6 @@ pub enum Denial { phase: String, error: String, }, - Legacy { - reason: String, - }, } /// Stable wire format emitted by the loop on `agent::events/`. diff --git a/provider-anthropic/crates/harness-types/src/agent_event.rs b/provider-anthropic/crates/harness-types/src/agent_event.rs index 6b37f381..f87ad305 100644 --- a/provider-anthropic/crates/harness-types/src/agent_event.rs +++ b/provider-anthropic/crates/harness-types/src/agent_event.rs @@ -23,7 +23,6 @@ pub enum ApprovalDecision { /// `{ "kind": "user_rejected", "detail": null }` /// `{ "kind": "user_corrected", "detail": { "feedback": "..." } }` /// `{ "kind": "state_error", "detail": { "phase": "...", "error": "..." } }` -/// `{ "kind": "legacy", "detail": { "reason": "..." } }` #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] #[serde(tag = "kind", content = "detail", rename_all = "snake_case")] pub enum Denial { @@ -39,9 +38,6 @@ pub enum Denial { phase: String, error: String, }, - Legacy { - reason: String, - }, } /// Stable wire format emitted by the loop on `agent::events/`. diff --git a/provider-openai/crates/harness-types/src/agent_event.rs b/provider-openai/crates/harness-types/src/agent_event.rs index 6b37f381..f87ad305 100644 --- a/provider-openai/crates/harness-types/src/agent_event.rs +++ b/provider-openai/crates/harness-types/src/agent_event.rs @@ -23,7 +23,6 @@ pub enum ApprovalDecision { /// `{ "kind": "user_rejected", "detail": null }` /// `{ "kind": "user_corrected", "detail": { "feedback": "..." } }` /// `{ "kind": "state_error", "detail": { "phase": "...", "error": "..." } }` -/// `{ "kind": "legacy", "detail": { "reason": "..." } }` #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] #[serde(tag = "kind", content = "detail", rename_all = "snake_case")] pub enum Denial { @@ -39,9 +38,6 @@ pub enum Denial { phase: String, error: String, }, - Legacy { - reason: String, - }, } /// Stable wire format emitted by the loop on `agent::events/`. diff --git a/provider-router/crates/harness-types/src/agent_event.rs b/provider-router/crates/harness-types/src/agent_event.rs index 6b37f381..f87ad305 100644 --- a/provider-router/crates/harness-types/src/agent_event.rs +++ b/provider-router/crates/harness-types/src/agent_event.rs @@ -23,7 +23,6 @@ pub enum ApprovalDecision { /// `{ "kind": "user_rejected", "detail": null }` /// `{ "kind": "user_corrected", "detail": { "feedback": "..." } }` /// `{ "kind": "state_error", "detail": { "phase": "...", "error": "..." } }` -/// `{ "kind": "legacy", "detail": { "reason": "..." } }` #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] #[serde(tag = "kind", content = "detail", rename_all = "snake_case")] pub enum Denial { @@ -39,9 +38,6 @@ pub enum Denial { phase: String, error: String, }, - Legacy { - reason: String, - }, } /// Stable wire format emitted by the loop on `agent::events/`. diff --git a/session/crates/harness-types/src/agent_event.rs b/session/crates/harness-types/src/agent_event.rs index 6b37f381..f87ad305 100644 --- a/session/crates/harness-types/src/agent_event.rs +++ b/session/crates/harness-types/src/agent_event.rs @@ -23,7 +23,6 @@ pub enum ApprovalDecision { /// `{ "kind": "user_rejected", "detail": null }` /// `{ "kind": "user_corrected", "detail": { "feedback": "..." } }` /// `{ "kind": "state_error", "detail": { "phase": "...", "error": "..." } }` -/// `{ "kind": "legacy", "detail": { "reason": "..." } }` #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] #[serde(tag = "kind", content = "detail", rename_all = "snake_case")] pub enum Denial { @@ -39,9 +38,6 @@ pub enum Denial { phase: String, error: String, }, - Legacy { - reason: String, - }, } /// Stable wire format emitted by the loop on `agent::events/`. diff --git a/turn-orchestrator/crates/harness-types/src/agent_event.rs b/turn-orchestrator/crates/harness-types/src/agent_event.rs index 6b37f381..f87ad305 100644 --- a/turn-orchestrator/crates/harness-types/src/agent_event.rs +++ b/turn-orchestrator/crates/harness-types/src/agent_event.rs @@ -23,7 +23,6 @@ pub enum ApprovalDecision { /// `{ "kind": "user_rejected", "detail": null }` /// `{ "kind": "user_corrected", "detail": { "feedback": "..." } }` /// `{ "kind": "state_error", "detail": { "phase": "...", "error": "..." } }` -/// `{ "kind": "legacy", "detail": { "reason": "..." } }` #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] #[serde(tag = "kind", content = "detail", rename_all = "snake_case")] pub enum Denial { @@ -39,9 +38,6 @@ pub enum Denial { phase: String, error: String, }, - Legacy { - reason: String, - }, } /// Stable wire format emitted by the loop on `agent::events/`. diff --git a/turn-orchestrator/src/run_stop.rs b/turn-orchestrator/src/run_stop.rs new file mode 100644 index 00000000..5948abf9 --- /dev/null +++ b/turn-orchestrator/src/run_stop.rs @@ -0,0 +1,136 @@ +//! `run::stop` — cooperative stop entrypoint. +//! +//! Thin wrapper that composes three existing primitives: +//! +//! 1. `router::abort` writes `session//abort_signal = true`. The +//! durable state handlers (`states/steering`, `states/assistant`, +//! `states/functions`) read this flag at every transition and route +//! the run to `TearingDown`. +//! 2. `approval::sweep_session` marks every pending approval row for the +//! session as `timed_out`, so a previously queued approval can no +//! longer dispatch the inner function after the user clicked stop. +//! 3. `iii::durable::publish turn::step_requested` wakes the subscriber so +//! it can pick the new state up immediately instead of waiting for the +//! next natural tick. +//! +//! The flag write itself is delegated rather than reimplemented so the +//! durable state machine and `router::abort` (called from the legacy +//! HTTP `agent/{session_id}/abort` endpoint) cannot drift. + +use iii_sdk::{IIIError, RegisterFunctionMessage, TriggerRequest, Value, III}; +use serde_json::json; + +use crate::persistence; +use crate::run_start; +use crate::state::TurnState; + +pub const FUNCTION_ID: &str = "run::stop"; + +pub async fn execute(iii: III, payload: Value) -> Result { + let session_id = required_str(&payload, "session_id")?; + + let prior = persistence::load_record(&iii, &session_id).await; + let prior_state = prior.as_ref().map(|r| r.state); + + // Idempotency: no record → nothing to stop. + if prior.is_none() { + tracing::info!(%session_id, "run::stop: no record"); + return Ok(json!({ + "session_id": session_id, + "accepted": false, + "reason": "no_record", + })); + } + if prior_state == Some(TurnState::Stopped) { + tracing::info!(%session_id, "run::stop: already stopped"); + return Ok(json!({ + "session_id": session_id, + "accepted": false, + "reason": "already_stopped", + "prior_state": "stopped", + })); + } + + // 1) Flag write via existing primitive. + if let Err(e) = iii + .trigger(TriggerRequest { + function_id: "router::abort".into(), + payload: json!({ "session_id": session_id }), + action: None, + timeout_ms: None, + }) + .await + { + tracing::warn!(error = %e, %session_id, "run::stop: router::abort failed"); + } + + // 2) Sweep pending approvals so no queued approval can still dispatch + // its inner function after the user clicked stop. The cause is + // logged here — approval-gate no longer persists a reason string + // on the swept records (status: "timed_out" is self-describing). + if let Err(e) = iii + .trigger(TriggerRequest { + function_id: "approval::sweep_session".into(), + payload: json!({ "session_id": session_id }), + action: None, + timeout_ms: None, + }) + .await + { + tracing::warn!(error = %e, %session_id, "run::stop: approval::sweep_session failed"); + } else { + tracing::info!(%session_id, cause = "run_stopped", "run::stop: swept pending approvals"); + } + + // 3) Nudge the durable subscriber so the abort branch fires immediately. + run_start::publish_step(&iii, &session_id).await; + + tracing::info!( + %session_id, + prior_state = ?prior_state, + "run::stop accepted", + ); + + Ok(json!({ + "session_id": session_id, + "accepted": true, + "prior_state": prior_state.map(|s| s.as_str()), + })) +} + +pub fn register(iii: &III) { + let iii_async = iii.clone(); + iii.register_function(( + RegisterFunctionMessage::with_id(FUNCTION_ID.to_string()).with_description( + "Cooperatively stop an in-flight run: raise the abort flag, sweep \ + pending approvals, and nudge the durable subscriber. Idempotent; \ + returns {accepted: false, reason} when the session is unknown or \ + already stopped." + .to_string(), + ), + move |payload: Value| { + let iii = iii_async.clone(); + async move { execute(iii, payload).await } + }, + )); +} + +fn required_str(payload: &Value, field: &str) -> Result { + payload + .get(field) + .and_then(Value::as_str) + .map(str::to_string) + .ok_or_else(|| IIIError::Handler(format!("missing required field: {field}"))) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn required_str_extracts_or_errors() { + let p = json!({ "session_id": "abc" }); + assert_eq!(required_str(&p, "session_id").unwrap(), "abc"); + assert!(required_str(&p, "missing").is_err()); + } +} diff --git a/turn-orchestrator/src/states/approval_stitching.rs b/turn-orchestrator/src/states/approval_stitching.rs index 05250219..848c274b 100644 --- a/turn-orchestrator/src/states/approval_stitching.rs +++ b/turn-orchestrator/src/states/approval_stitching.rs @@ -50,7 +50,6 @@ pub fn omission_summary_message(omitted: u64) -> Option { /// (the high-value variant — model gets actionable correction) /// - `state_error` → "gate state-write failure (phase=

): " /// (fail-closed signal; operator-facing) -/// - `legacy` → "" (pass-through for migrated pre-Denial records) fn render_denial_lines(denial: &Value) -> Vec { let kind = denial.get("kind").and_then(Value::as_str).unwrap_or(""); let detail = denial.get("detail").cloned().unwrap_or(Value::Null); @@ -78,10 +77,6 @@ fn render_denial_lines(denial: &Value) -> Vec { " approval gate state-write failure (phase={phase}): {error}" )] } - "legacy" => { - let reason = detail.get("reason").and_then(Value::as_str).unwrap_or(""); - vec![format!(" reason: {reason}")] - } other => vec![format!(" denial kind: {other}")], } } @@ -122,9 +117,6 @@ fn stitch_one(entry: &Value) -> String { lines.extend(render_denial_lines(denial)); } } - if entry.get("legacy_migrated").and_then(Value::as_bool) == Some(true) { - lines.push(" note: legacy record migrated from pre-trigger-model gate; original result was delivered in-band when the call was made.".into()); - } lines.join("\n") } @@ -280,23 +272,6 @@ mod tests { assert!(msg.contains("kv unavailable")); } - #[test] - fn stitch_entries_denied_legacy_passes_through_reason() { - let entries = vec![make_entry( - "c1", - "shell::fs::write", - "denied", - json!({ - "denial": { - "kind": "legacy", - "detail": { "reason": "user typed nope" } - } - }), - )]; - let msg = &stitch_entries(&entries)[0]; - assert!(msg.contains("reason: user typed nope")); - } - #[test] fn stitch_entries_timed_out_omits_denial_block() { let entries = vec![make_entry( diff --git a/turn-orchestrator/tests/run_stop.rs b/turn-orchestrator/tests/run_stop.rs new file mode 100644 index 00000000..d0251041 --- /dev/null +++ b/turn-orchestrator/tests/run_stop.rs @@ -0,0 +1,287 @@ +//! `run::stop` lifecycle: idempotency, primitives invoked, payload validation. +//! +//! Boots a minimal iii engine + session worker. The state-machine-driven +//! cases (stop-during-streaming, stop-during-execute) require a fully +//! orchestrated turn loop with provider/sandbox mocks and are exercised +//! end-to-end during manual QA per the plan's Verification section. +//! Here we cover the contract `run::stop` directly exposes. + +mod common; + +use std::sync::{Arc, Mutex}; +use std::time::Duration; + +use iii_sdk::{IIIError, RegisterFunctionMessage, TriggerRequest}; +use serde_json::{json, Value}; +use serial_test::serial; +use tokio::time::timeout; + +use common::Harness; +use turn_orchestrator::{run_stop, TurnState, TurnStateRecord}; + +const STATE_SCOPE: &str = "agent"; + +/// Sink for `router::abort` and `approval::sweep_session` invocations. +#[derive(Default, Clone)] +struct Sink { + abort_calls: Arc>>, + sweep_calls: Arc>>, + step_publishes: Arc>>, +} + +impl Sink { + fn new() -> Self { + Self::default() + } + fn abort(&self) -> Vec { + self.abort_calls.lock().unwrap().clone() + } + fn sweep(&self) -> Vec { + self.sweep_calls.lock().unwrap().clone() + } +} + +async fn register_primitives(iii: &iii_sdk::III, sink: &Sink) { + let abort_log = sink.abort_calls.clone(); + iii.register_function(( + RegisterFunctionMessage::with_id("router::abort".to_string()) + .with_description("test sink: capture run::stop's abort flag write".into()), + move |payload: Value| { + let log = abort_log.clone(); + async move { + log.lock().unwrap().push(payload); + Ok::<_, IIIError>(json!({ "ok": true })) + } + }, + )); + + let sweep_log = sink.sweep_calls.clone(); + iii.register_function(( + RegisterFunctionMessage::with_id("approval::sweep_session".to_string()) + .with_description("test sink: capture approval sweep".into()), + move |payload: Value| { + let log = sweep_log.clone(); + async move { + log.lock().unwrap().push(payload); + Ok::<_, IIIError>(json!({ "ok": true, "swept": 0 })) + } + }, + )); + + let step_log = sink.step_publishes.clone(); + iii.register_function(( + RegisterFunctionMessage::with_id("iii::durable::publish".to_string()) + .with_description("test sink: capture turn::step publishes".into()), + move |payload: Value| { + let log = step_log.clone(); + async move { + log.lock().unwrap().push(payload); + Ok::<_, IIIError>(json!({ "ok": true })) + } + }, + )); + + // Allow registrations to settle on the engine. 200ms is not always enough + // when the engine is busy; 800ms matches what `Harness::boot` already + // waits for the session worker. + tokio::time::sleep(Duration::from_millis(800)).await; + + // Sanity-probe each registration so a slow engine doesn't leave us + // chasing a phantom failure later. If any probe errors, fail loudly. + for fn_id in ["router::abort", "approval::sweep_session"] { + iii.trigger(TriggerRequest { + function_id: fn_id.into(), + payload: json!({ "session_id": "__probe__" }), + action: None, + timeout_ms: Some(2_000), + }) + .await + .unwrap_or_else(|e| panic!("test sink {fn_id} not reachable: {e}")); + } + + // Probes count as calls — wipe the sink before tests assert behavior. + sink.abort_calls.lock().unwrap().clear(); + sink.sweep_calls.lock().unwrap().clear(); + sink.step_publishes.lock().unwrap().clear(); +} + +async fn seed_record(iii: &iii_sdk::III, session_id: &str, state: TurnState) { + let mut rec = TurnStateRecord::new(session_id, None); + rec.transition_to(state); + let value = serde_json::to_value(&rec).expect("serialize record"); + let _ = iii + .trigger(TriggerRequest { + function_id: "state::set".into(), + payload: json!({ + "scope": STATE_SCOPE, + "key": turn_orchestrator::turn_state_key(session_id), + "value": value, + }), + action: None, + timeout_ms: Some(2_000), + }) + .await; +} + +async fn call_run_stop(iii: &iii_sdk::III, session_id: &str) -> Result { + iii.trigger(TriggerRequest { + function_id: run_stop::FUNCTION_ID.into(), + payload: json!({ "session_id": session_id }), + action: None, + timeout_ms: Some(5_000), + }) + .await +} + +#[tokio::test] +#[serial] +async fn run_stop_requires_session_id() { + let Some(h) = Harness::boot().await else { + return; + }; + run_stop::register(&h.iii); + tokio::time::sleep(Duration::from_millis(200)).await; + + let r = h + .iii + .trigger(TriggerRequest { + function_id: run_stop::FUNCTION_ID.into(), + payload: json!({}), + action: None, + timeout_ms: Some(2_000), + }) + .await; + assert!( + r.is_err(), + "expected handler error for missing session_id, got {:?}", + r + ); +} + +#[tokio::test] +#[serial] +async fn run_stop_returns_no_record_for_unknown_session() { + let Some(h) = Harness::boot().await else { + return; + }; + let sink = Sink::new(); + register_primitives(&h.iii, &sink).await; + run_stop::register(&h.iii); + tokio::time::sleep(Duration::from_millis(200)).await; + + let session_id = format!("nonexistent-{}", common::nonce()); + let resp = timeout(Duration::from_secs(3), call_run_stop(&h.iii, &session_id)) + .await + .expect("run::stop responded in time") + .expect("run::stop succeeded"); + + assert_eq!(resp["accepted"], json!(false)); + assert_eq!(resp["reason"], json!("no_record")); + assert!( + sink.abort().is_empty(), + "router::abort should not be invoked for unknown session" + ); + assert!( + sink.sweep().is_empty(), + "approval::sweep_session should not be invoked for unknown session" + ); +} + +#[tokio::test] +#[serial] +async fn run_stop_short_circuits_when_already_stopped() { + let Some(h) = Harness::boot().await else { + return; + }; + let sink = Sink::new(); + register_primitives(&h.iii, &sink).await; + run_stop::register(&h.iii); + tokio::time::sleep(Duration::from_millis(200)).await; + + let session_id = format!("already-stopped-{}", common::nonce()); + seed_record(&h.iii, &session_id, TurnState::Stopped).await; + + let resp = timeout(Duration::from_secs(3), call_run_stop(&h.iii, &session_id)) + .await + .expect("run::stop responded in time") + .expect("run::stop succeeded"); + + assert_eq!(resp["accepted"], json!(false)); + assert_eq!(resp["reason"], json!("already_stopped")); + assert!( + sink.abort().is_empty(), + "router::abort should not be invoked when already stopped" + ); + assert!( + sink.sweep().is_empty(), + "approval::sweep_session should not be invoked when already stopped" + ); +} + +#[tokio::test] +#[serial] +async fn run_stop_invokes_all_primitives_on_happy_path() { + let Some(h) = Harness::boot().await else { + return; + }; + let sink = Sink::new(); + register_primitives(&h.iii, &sink).await; + run_stop::register(&h.iii); + tokio::time::sleep(Duration::from_millis(200)).await; + + let session_id = format!("happy-{}", common::nonce()); + seed_record(&h.iii, &session_id, TurnState::AwaitingAssistant).await; + + let resp = timeout(Duration::from_secs(3), call_run_stop(&h.iii, &session_id)) + .await + .expect("run::stop responded in time") + .expect("run::stop succeeded"); + + assert_eq!(resp["accepted"], json!(true)); + assert_eq!(resp["prior_state"], json!("awaiting_assistant")); + + let abort_calls = sink.abort(); + assert_eq!(abort_calls.len(), 1, "router::abort invoked once"); + assert_eq!(abort_calls[0]["session_id"], json!(session_id)); + + let sweep_calls = sink.sweep(); + assert_eq!(sweep_calls.len(), 1, "approval::sweep_session invoked once"); + assert_eq!(sweep_calls[0]["session_id"], json!(session_id)); + // The legacy `reason` payload field is gone — approval-gate's + // timed_out status is self-describing. run::stop now logs the + // cause locally instead of forwarding it on the wire. + assert!( + sweep_calls[0].get("reason").is_none(), + "approval::sweep_session payload must not carry a reason: {:?}", + sweep_calls[0] + ); +} + +#[tokio::test] +#[serial] +async fn run_stop_is_idempotent_when_repeated() { + let Some(h) = Harness::boot().await else { + return; + }; + let sink = Sink::new(); + register_primitives(&h.iii, &sink).await; + run_stop::register(&h.iii); + tokio::time::sleep(Duration::from_millis(200)).await; + + let session_id = format!("double-{}", common::nonce()); + seed_record(&h.iii, &session_id, TurnState::AwaitingAssistant).await; + + let r1 = call_run_stop(&h.iii, &session_id).await.unwrap(); + assert_eq!(r1["accepted"], json!(true)); + + // Simulate the orchestrator's teardown by manually transitioning to Stopped. + seed_record(&h.iii, &session_id, TurnState::Stopped).await; + + let r2 = call_run_stop(&h.iii, &session_id).await.unwrap(); + assert_eq!(r2["accepted"], json!(false)); + assert_eq!(r2["reason"], json!("already_stopped")); + + // Only the first call should have invoked the primitives. + assert_eq!(sink.abort().len(), 1); + assert_eq!(sink.sweep().len(), 1); +} From 025aac67131e056aae45d0e3429515a8bc59432a Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Fri, 15 May 2026 19:34:53 -0300 Subject: [PATCH 04/30] feat(approval-gate): add typed Record, Status, Next schema MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New src/record.rs defines the persisted approval record as a typed struct alongside its lifecycle Status enum and a Next enum that pairs each target status with its outcome data. Wire format is byte-identical to the existing serde_json::Value blobs — operators can adopt the typed shape incrementally without changing any persisted state. approval-gate - New module record: - Status enum (Pending / Approved / Executed / Failed / Denied / TimedOut), serde snake_case, with is_terminal() helper. - Record struct mirroring the historical JSON keys field-for-field, optional outcome fields scoped to terminal statuses, serde skip-when-None so existing rows round-trip cleanly. - Next enum that bundles (target_status, outcome_payload) so the type system rules out impossible transitions (e.g. Executed without a result, Denied without a Denial). Replaces the old (status_string, Option, Option, Option) parallel-Option signature when callers opt in. - Record::to_value / Record::from_value bridge to the Value blobs the iii state bus still expects. - 8 unit tests covering serde round-trips, status semantics, expiry saturation, forward-compat with unknown fields. - lib.rs re-exports Next, Record, Status. Handler migration to the typed schema is a separate change; this commit just makes the types available so future work can lift Value access into typed field access incrementally rather than in one big diff. --- approval-gate/src/lib.rs | 2 + approval-gate/src/record.rs | 293 ++++++++++++++++++++++++++++++++++++ 2 files changed, 295 insertions(+) create mode 100644 approval-gate/src/record.rs diff --git a/approval-gate/src/lib.rs b/approval-gate/src/lib.rs index 84616f29..a64d69f1 100644 --- a/approval-gate/src/lib.rs +++ b/approval-gate/src/lib.rs @@ -4,9 +4,11 @@ pub mod config; pub mod manifest; +pub mod record; pub mod rules; pub use config::{InterceptorRule, WorkerConfig}; +pub use record::{Next, Record, Status}; use std::sync::{Arc, RwLock}; diff --git a/approval-gate/src/record.rs b/approval-gate/src/record.rs new file mode 100644 index 00000000..ed8d0400 --- /dev/null +++ b/approval-gate/src/record.rs @@ -0,0 +1,293 @@ +//! Typed approval record schema. +//! +//! Replaces the ad-hoc `serde_json::Value` blobs that handlers used to +//! pass around. The Rust shape mirrors the persisted JSON exactly via +//! serde — wire compatibility is the contract, this is just the in-process +//! representation. +//! +//! ## Lifecycle +//! +//! ```text +//! intercept → Pending ──user allow──> Approved ──invoke──> Executed +//! │ └invoke-err──> Failed +//! ├──user deny──> Denied +//! └──expires──> TimedOut +//! ``` +//! +//! Outcome data (result / error / denial) is required on the terminal +//! status it belongs to and meaningless elsewhere. [`Next`] enforces this +//! at the type level so transitions can't be miscalled. + +use serde::{Deserialize, Serialize}; +use serde_json::Value; + +use crate::Denial; + +/// Lifecycle status of an approval record. Wire format is snake_case so +/// it stays human-readable in iii-state dumps and audit logs. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum Status { + Pending, + Approved, + Executed, + Failed, + Denied, + TimedOut, +} + +impl Status { + /// `true` for statuses that represent a final outcome — anything + /// stitchable into the LLM's next turn. `Pending` and `Approved` are + /// intermediate; the rest are terminal. + pub fn is_terminal(self) -> bool { + matches!( + self, + Status::Executed | Status::Failed | Status::Denied | Status::TimedOut + ) + } +} + +/// Persisted approval record. Wire-compatible with the historical +/// JSON shape — every field uses the same key/type the previous +/// `serde_json::Value`-based code emitted, so existing iii-state +/// rows deserialize cleanly. +/// +/// Optional fields are scoped to particular statuses (e.g. `result` +/// only when `status == Executed`); the type itself does not enforce +/// that pairing — [`Next`] does, at the transition boundary. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct Record { + pub function_call_id: String, + pub function_id: String, + pub args: Value, + pub status: Status, + pub expires_at: u64, + + /// Stamped by `handle_intercept` after the pending record is built so + /// the timeout sweeper can address the right session stream. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub session_id: Option, + + /// Unix ms of the first transition into a terminal status. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub resolved_at: Option, + + /// Function output. Present iff `status == Executed`. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub result: Option, + + /// Function error string. Present iff `status == Failed`. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub error: Option, + + /// Structured deny payload. Present iff `status == Denied`. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub denial: Option, + + /// Set when `approval::ack_delivered` stamps the record with the turn + /// id that surfaced it to the LLM. Records without this stamp surface + /// again on subsequent `approval::list_undelivered` calls. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub delivered_in_turn_id: Option, +} + +impl Record { + /// Construct a fresh pending record. `session_id` is unset here — + /// `handle_intercept` stamps it before persisting. `expires_at` is + /// `now_ms + timeout_ms`, saturating on overflow so a malicious + /// or buggy caller can't underflow the deadline. + pub fn new_pending( + function_call_id: String, + function_id: String, + args: Value, + now_ms: u64, + timeout_ms: u64, + ) -> Self { + Self { + function_call_id, + function_id, + args, + status: Status::Pending, + expires_at: now_ms.saturating_add(timeout_ms), + session_id: None, + resolved_at: None, + result: None, + error: None, + denial: None, + delivered_in_turn_id: None, + } + } + + /// Serialize to the wire JSON shape. Infallible — `serde_json::to_value` + /// on a struct with only serializable fields cannot fail at runtime. + pub fn to_value(&self) -> Value { + serde_json::to_value(self).expect("Record is always serializable") + } + + /// Deserialize from the wire JSON shape. Returns `None` if the value + /// doesn't match the schema (missing required fields, bad status enum, + /// etc.) — handlers treat that as "skip this record" rather than + /// crashing on corrupt state. + pub fn from_value(v: Value) -> Option { + serde_json::from_value(v).ok() + } +} + +/// What `transition_record` should change. Each variant pairs the target +/// [`Status`] with whatever outcome data that status carries. The type +/// system makes invalid combinations unrepresentable: you can't ask for +/// `Executed` without providing a result, or for `Denied` without a +/// `Denial`. `Approved` is an intermediate status carrying no outcome — +/// it exists so the bus can observe the post-allow / pre-invoke state. +#[derive(Debug, Clone)] +pub enum Next { + Approved, + Executed { result: Value }, + Failed { error: String }, + Denied { denial: Denial }, + TimedOut, +} + +impl Next { + /// The target status this transition moves the record to. + pub fn status(&self) -> Status { + match self { + Next::Approved => Status::Approved, + Next::Executed { .. } => Status::Executed, + Next::Failed { .. } => Status::Failed, + Next::Denied { .. } => Status::Denied, + Next::TimedOut => Status::TimedOut, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + + #[test] + fn status_is_terminal_matches_lifecycle() { + assert!(!Status::Pending.is_terminal()); + assert!(!Status::Approved.is_terminal()); + assert!(Status::Executed.is_terminal()); + assert!(Status::Failed.is_terminal()); + assert!(Status::Denied.is_terminal()); + assert!(Status::TimedOut.is_terminal()); + } + + #[test] + fn status_serializes_as_snake_case_string() { + assert_eq!(serde_json::to_value(Status::Pending).unwrap(), json!("pending")); + assert_eq!(serde_json::to_value(Status::Approved).unwrap(), json!("approved")); + assert_eq!(serde_json::to_value(Status::Executed).unwrap(), json!("executed")); + assert_eq!(serde_json::to_value(Status::Failed).unwrap(), json!("failed")); + assert_eq!(serde_json::to_value(Status::Denied).unwrap(), json!("denied")); + assert_eq!(serde_json::to_value(Status::TimedOut).unwrap(), json!("timed_out")); + } + + #[test] + fn status_round_trips_via_json() { + for s in [ + Status::Pending, + Status::Approved, + Status::Executed, + Status::Failed, + Status::Denied, + Status::TimedOut, + ] { + let v = serde_json::to_value(s).unwrap(); + let back: Status = serde_json::from_value(v).unwrap(); + assert_eq!(back, s); + } + } + + #[test] + fn record_pending_round_trips() { + let rec = Record::new_pending( + "c1".into(), + "shell::exec".into(), + json!({"command": "ls"}), + 1_000, + 60_000, + ); + let v = serde_json::to_value(&rec).unwrap(); + assert_eq!(v["function_call_id"], "c1"); + assert_eq!(v["function_id"], "shell::exec"); + assert_eq!(v["status"], "pending"); + assert_eq!(v["expires_at"], 61_000); + // Optional fields are omitted when None. + assert!(v.as_object().unwrap().get("session_id").is_none()); + assert!(v.as_object().unwrap().get("denial").is_none()); + let back: Record = serde_json::from_value(v).unwrap(); + assert_eq!(back, rec); + } + + #[test] + fn record_with_optional_fields_round_trips() { + let rec = Record { + function_call_id: "c1".into(), + function_id: "shell::exec".into(), + args: json!({}), + status: Status::Executed, + expires_at: 60_000, + session_id: Some("s1".into()), + resolved_at: Some(5_000), + result: Some(json!({"ok": true})), + error: None, + denial: None, + delivered_in_turn_id: Some("turn-X".into()), + }; + let v = serde_json::to_value(&rec).unwrap(); + let back: Record = serde_json::from_value(v).unwrap(); + assert_eq!(back, rec); + } + + #[test] + fn record_pending_expires_at_saturates_on_overflow() { + let rec = Record::new_pending( + "c1".into(), + "f".into(), + json!({}), + u64::MAX - 5, + 100, + ); + assert_eq!(rec.expires_at, u64::MAX); + } + + #[test] + fn record_deserializes_from_wire_with_unknown_extra_fields() { + // Forward-compat: unknown fields are silently ignored so a worker + // can read a record written by a newer worker version without + // crashing on schema additions it doesn't know about yet. + let v = json!({ + "function_call_id": "c1", + "function_id": "f", + "args": {}, + "status": "pending", + "expires_at": 1000, + "future_field": "some new thing", + }); + let rec: Record = serde_json::from_value(v).unwrap(); + assert_eq!(rec.status, Status::Pending); + } + + #[test] + fn next_status_pairing_is_correct() { + assert_eq!(Next::Approved.status(), Status::Approved); + assert_eq!( + Next::Executed { result: json!({"ok": true}) }.status(), + Status::Executed + ); + assert_eq!( + Next::Failed { error: "EACCES".into() }.status(), + Status::Failed + ); + assert_eq!( + Next::Denied { denial: Denial::UserRejected }.status(), + Status::Denied + ); + assert_eq!(Next::TimedOut.status(), Status::TimedOut); + } +} From 61f5ceabe026dce491506e84dee0f057af80f73a Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Fri, 15 May 2026 19:42:05 -0300 Subject: [PATCH 05/30] refactor(approval-gate): extract wire.rs from lib.rs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit First step of breaking lib.rs (~4300 lines) into focused modules. This commit lifts the wire-format types and small wire-shape helpers into their own module — pure data shapes with no I/O, no iii-sdk dependency, no async — so downstream workers that only need to understand the approval-gate protocol can depend on a small surface. Moved (lib.rs -> wire.rs): - Denial (structured deny payload) - IncomingCall + requires_approval() - Decision (Allow | Deny(Denial)) - WireDecision (allow|deny wire enum) - pending_key() - extract_call() - block_reply_for() All public items stay re-exported from the crate root, so every existing call site (handlers, tests, integration tests, downstream crates) continues to work without import changes. Pure move, no behavior change. 141 approval-gate tests pass; turn-orchestrator + harness + session + providers + harness-tui + shell all pass. --- approval-gate/src/lib.rs | 126 +------------------------------ approval-gate/src/wire.rs | 155 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 159 insertions(+), 122 deletions(-) create mode 100644 approval-gate/src/wire.rs diff --git a/approval-gate/src/lib.rs b/approval-gate/src/lib.rs index a64d69f1..a504162f 100644 --- a/approval-gate/src/lib.rs +++ b/approval-gate/src/lib.rs @@ -6,9 +6,13 @@ pub mod config; pub mod manifest; pub mod record; pub mod rules; +pub mod wire; pub use config::{InterceptorRule, WorkerConfig}; pub use record::{Next, Record, Status}; +pub use wire::{ + block_reply_for, extract_call, pending_key, Decision, Denial, IncomingCall, WireDecision, +}; use std::sync::{Arc, RwLock}; @@ -127,33 +131,6 @@ pub(crate) fn apply_policy_rules(rules: &rules::Ruleset, function_id: &str) -> P } } -/// Structured deny payload carried on wire replies, persisted records, and -/// `approval_resolved` stream events. Replaces the legacy free-form -/// `decision_reason` / `reason` strings so consumers (turn-orchestrator -/// stitching, UIs, the LLM) can branch on `kind` instead of parsing prose. -/// -/// Wire shape (serde tag=kind, content=detail, snake_case): -/// `{ "kind": "policy", "detail": { "classifier_reason": "...", "classifier_fn": "..." } }` -/// `{ "kind": "user_rejected", "detail": null }` -/// `{ "kind": "user_corrected", "detail": { "feedback": "..." } }` -/// `{ "kind": "state_error", "detail": { "phase": "...", "error": "..." } }` -#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)] -#[serde(tag = "kind", content = "detail", rename_all = "snake_case")] -pub enum Denial { - Policy { - classifier_reason: String, - classifier_fn: String, - }, - UserRejected, - UserCorrected { - feedback: String, - }, - StateError { - phase: String, - error: String, - }, -} - #[derive(Debug, Clone, PartialEq, Eq)] pub(crate) enum ClassifierDecision { Auto, @@ -193,88 +170,6 @@ pub fn is_terminal_status(status: &str) -> bool { matches!(status, "executed" | "failed" | "denied" | "timed_out") } -#[derive(Debug, Clone, PartialEq)] -pub struct IncomingCall { - pub session_id: String, - pub function_call_id: String, - pub function_id: String, - pub args: Value, - pub approval_required: Vec, - pub event_id: String, - pub reply_stream: String, -} - -impl IncomingCall { - pub fn requires_approval(&self) -> bool { - self.approval_required - .iter() - .any(|n| n == &self.function_id) - } -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum Decision { - Allow, - Deny(Denial), -} - -/// Wire-format decision string used by `approval::resolve` and stored -/// as the `status` field of resolved approval records. -/// -/// Serializes / deserializes as `"allow"` or `"deny"`. -#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)] -#[serde(rename_all = "lowercase")] -pub enum WireDecision { - Allow, - Deny, -} - -/// Build the state-store key for a pending approval entry. -/// -/// `session_id` and `function_call_id` must not contain `/`. They are caller-controlled -/// IDs minted by turn-orchestrator; today neither format uses the separator. -pub fn pending_key(session_id: &str, function_call_id: &str) -> String { - debug_assert!(!session_id.contains('/'), "session_id must not contain '/'"); - debug_assert!( - !function_call_id.contains('/'), - "function_call_id must not contain '/'" - ); - format!("{session_id}/{function_call_id}") -} - -pub fn extract_call(envelope: &Value) -> Option { - let event_id = envelope - .get("event_id") - .and_then(Value::as_str)? - .to_string(); - let reply_stream = envelope - .get("reply_stream") - .and_then(Value::as_str)? - .to_string(); - let inner = envelope.get("payload").unwrap_or(envelope); - let session_id = inner.get("session_id").and_then(Value::as_str)?.to_string(); - let fc = inner - .get("function_call") - .or_else(|| inner.get("tool_call"))?; - let function_id = fc - .get("function_id") - .or_else(|| fc.get("name")) - .and_then(Value::as_str)? - .to_string(); - Some(IncomingCall { - session_id, - function_call_id: fc.get("id").and_then(Value::as_str)?.to_string(), - function_id, - args: fc.get("arguments").cloned().unwrap_or_else(|| json!({})), - approval_required: inner - .get("approval_required") - .and_then(|v| serde_json::from_value(v.clone()).ok()) - .unwrap_or_default(), - event_id, - reply_stream, - }) -} - pub fn build_pending_record( function_call_id: &str, function_id: &str, @@ -342,19 +237,6 @@ pub fn transition_record_with_now( rec } -/// Build the hook block reply for a [`Decision`]. Deny replies carry the -/// structured [`Denial`] under `denial`; consumers (turn-orchestrator -/// stitching, UIs, the LLM) branch on `denial.kind` rather than parsing a -/// free-form `reason` string. -pub fn block_reply_for(decision: &Decision) -> Value { - match decision { - Decision::Allow => json!({ "block": false }), - Decision::Deny(denial) => json!({ - "block": true, - "denial": denial, - }), - } -} pub struct Refs { pub resolve: FunctionRef, diff --git a/approval-gate/src/wire.rs b/approval-gate/src/wire.rs new file mode 100644 index 00000000..7f540e5d --- /dev/null +++ b/approval-gate/src/wire.rs @@ -0,0 +1,155 @@ +//! Wire-format types for the approval gate. +//! +//! Pure data shapes and small wire-shape helpers — no I/O, no `iii_sdk` +//! deps, no async. Anything a downstream worker would need to +//! understand the approval-gate protocol lives here: +//! +//! - [`Denial`] — structured deny payload (`kind` + `detail`) carried on +//! hook replies, persisted records, and `approval_resolved` events. +//! - [`Decision`] — internal allow/deny choice; pairs `Deny` with its +//! [`Denial`] so the type system rules out structureless deny. +//! - [`WireDecision`] — coarse `"allow"` / `"deny"` enum used at the +//! `approval::resolve` RPC boundary, where the UI / orchestrator +//! doesn't yet know the full [`Denial`]. +//! - [`IncomingCall`] — parsed `agent::before_function_call` envelope. +//! - [`pending_key`], [`extract_call`], [`block_reply_for`] — pure +//! helpers for going to / from the wire. +//! +//! The handler crate re-exports the public items from [`crate`] so +//! existing call sites don't need to import the module directly. + +use serde::{Deserialize, Serialize}; +use serde_json::{json, Value}; + +/// Structured deny payload carried on wire replies, persisted records, and +/// `approval_resolved` stream events. Replaces the legacy free-form +/// `decision_reason` / `reason` strings so consumers (turn-orchestrator +/// stitching, UIs, the LLM) can branch on `kind` instead of parsing prose. +/// +/// Wire shape (serde tag=kind, content=detail, snake_case): +/// `{ "kind": "policy", "detail": { "classifier_reason": "...", "classifier_fn": "..." } }` +/// `{ "kind": "user_rejected", "detail": null }` +/// `{ "kind": "user_corrected", "detail": { "feedback": "..." } }` +/// `{ "kind": "state_error", "detail": { "phase": "...", "error": "..." } }` +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(tag = "kind", content = "detail", rename_all = "snake_case")] +pub enum Denial { + Policy { + classifier_reason: String, + classifier_fn: String, + }, + UserRejected, + UserCorrected { + feedback: String, + }, + StateError { + phase: String, + error: String, + }, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct IncomingCall { + pub session_id: String, + pub function_call_id: String, + pub function_id: String, + pub args: Value, + pub approval_required: Vec, + pub event_id: String, + pub reply_stream: String, +} + +impl IncomingCall { + pub fn requires_approval(&self) -> bool { + self.approval_required + .iter() + .any(|n| n == &self.function_id) + } +} + +/// Internal allow/deny choice. Paired with a structured [`Denial`] on +/// the `Deny` arm so callers that emit a wire reply can't accidentally +/// drop the deny reason on the floor. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Decision { + Allow, + Deny(Denial), +} + +/// Wire-format decision string used by `approval::resolve` and stored +/// as the `status` field of resolved approval records. +/// +/// Serializes / deserializes as `"allow"` or `"deny"`. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "lowercase")] +pub enum WireDecision { + Allow, + Deny, +} + +/// Build the state-store key for a pending approval entry. +/// +/// `session_id` and `function_call_id` must not contain `/`. They are caller-controlled +/// IDs minted by turn-orchestrator; today neither format uses the separator. +pub fn pending_key(session_id: &str, function_call_id: &str) -> String { + debug_assert!(!session_id.contains('/'), "session_id must not contain '/'"); + debug_assert!( + !function_call_id.contains('/'), + "function_call_id must not contain '/'" + ); + format!("{session_id}/{function_call_id}") +} + +/// Parse the `agent::before_function_call` envelope into the +/// [`IncomingCall`] the gate's intercept logic operates on. Accepts both +/// the modern `function_call` shape and the legacy `tool_call` alias so +/// older sessions in-flight at upgrade time keep working. +/// +/// Returns `None` if any required field is missing — handlers treat that +/// as "not our concern" and pass the envelope through. +pub fn extract_call(envelope: &Value) -> Option { + let event_id = envelope + .get("event_id") + .and_then(Value::as_str)? + .to_string(); + let reply_stream = envelope + .get("reply_stream") + .and_then(Value::as_str)? + .to_string(); + let inner = envelope.get("payload").unwrap_or(envelope); + let session_id = inner.get("session_id").and_then(Value::as_str)?.to_string(); + let fc = inner + .get("function_call") + .or_else(|| inner.get("tool_call"))?; + let function_id = fc + .get("function_id") + .or_else(|| fc.get("name")) + .and_then(Value::as_str)? + .to_string(); + Some(IncomingCall { + session_id, + function_call_id: fc.get("id").and_then(Value::as_str)?.to_string(), + function_id, + args: fc.get("arguments").cloned().unwrap_or_else(|| json!({})), + approval_required: inner + .get("approval_required") + .and_then(|v| serde_json::from_value(v.clone()).ok()) + .unwrap_or_default(), + event_id, + reply_stream, + }) +} + +/// Build the hook block reply for a [`Decision`]. Deny replies carry the +/// structured [`Denial`] under `denial`; consumers (turn-orchestrator +/// stitching, UIs, the LLM) branch on `denial.kind` rather than parsing a +/// free-form `reason` string. +pub fn block_reply_for(decision: &Decision) -> Value { + match decision { + Decision::Allow => json!({ "block": false }), + Decision::Deny(denial) => json!({ + "block": true, + "denial": denial, + }), + } +} From e53a71b4f2affba0d8d74a63d3cd85a115946b48 Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Fri, 15 May 2026 19:45:44 -0300 Subject: [PATCH 06/30] refactor(approval-gate): extract lifecycle.rs from lib.rs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Second step of breaking lib.rs into focused modules. This commit lifts the persisted-record lifecycle helpers — pure Value-blob constructors and transitions, no I/O, no iii-sdk dependency — into their own module. Moved (lib.rs -> lifecycle.rs): - is_terminal_status() - build_pending_record() - transition_record() + transition_record_with_now() - collect_timed_out_for_sweep() - maybe_flip_timed_out() All public items stay re-exported from the crate root. Pure move, no behavior change. 141 approval-gate tests pass; all 8 downstream worker crates pass. --- approval-gate/src/lib.rs | 125 ++--------------------------- approval-gate/src/lifecycle.rs | 140 +++++++++++++++++++++++++++++++++ 2 files changed, 145 insertions(+), 120 deletions(-) create mode 100644 approval-gate/src/lifecycle.rs diff --git a/approval-gate/src/lib.rs b/approval-gate/src/lib.rs index a504162f..8594dcf1 100644 --- a/approval-gate/src/lib.rs +++ b/approval-gate/src/lib.rs @@ -3,12 +3,17 @@ //! waiting for the UI to call `approval::resolve` (or for a timeout). pub mod config; +pub mod lifecycle; pub mod manifest; pub mod record; pub mod rules; pub mod wire; pub use config::{InterceptorRule, WorkerConfig}; +pub use lifecycle::{ + build_pending_record, collect_timed_out_for_sweep, is_terminal_status, maybe_flip_timed_out, + transition_record, transition_record_with_now, +}; pub use record::{Next, Record, Status}; pub use wire::{ block_reply_for, extract_call, pending_key, Decision, Denial, IncomingCall, WireDecision, @@ -164,80 +169,6 @@ pub(crate) fn interpret_classifier_reply( } } -/// True if `status` is one of the terminal states a stitched system message -/// should be built from. `pending` and `approved` are intermediate. -pub fn is_terminal_status(status: &str) -> bool { - matches!(status, "executed" | "failed" | "denied" | "timed_out") -} - -pub fn build_pending_record( - function_call_id: &str, - function_id: &str, - args: &Value, - now_ms: u64, - timeout_ms: u64, -) -> Value { - json!({ - "function_call_id": function_call_id, - "function_id": function_id, - "args": args, - "status": "pending", - "expires_at": now_ms.saturating_add(timeout_ms), - }) -} - -/// Build a new record by transitioning a pending base record to a terminal -/// status. All terminal fields (`result`, `error`, `denial`) are optional; -/// only the ones provided are attached. Existing fields on the base -/// (including `delivered_in_turn_id` and `resolved_at` if present) are -/// preserved. The first transition into a terminal status stamps -/// `resolved_at`. -pub fn transition_record( - base: &Value, - new_status: &str, - result: Option, - error: Option, - denial: Option, -) -> Value { - let now_ms = std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .map(|d| d.as_millis() as u64) - .unwrap_or(0); - transition_record_with_now(base, new_status, result, error, denial, now_ms) -} - -/// Testable variant of [`transition_record`] that takes `now_ms` directly. -pub fn transition_record_with_now( - base: &Value, - new_status: &str, - result: Option, - error: Option, - denial: Option, - now_ms: u64, -) -> Value { - let mut rec = base.clone(); - if let Some(obj) = rec.as_object_mut() { - obj.insert("status".into(), Value::String(new_status.to_string())); - if let Some(r) = result { - obj.insert("result".into(), r); - } - if let Some(e) = error { - obj.insert("error".into(), Value::String(e)); - } - if let Some(d) = denial { - obj.insert( - "denial".into(), - serde_json::to_value(&d).expect("Denial is always serializable"), - ); - } - if is_terminal_status(new_status) && !obj.contains_key("resolved_at") { - obj.insert("resolved_at".into(), Value::Number(now_ms.into())); - } - } - rec -} - - pub struct Refs { pub resolve: FunctionRef, pub list_pending: FunctionRef, @@ -427,52 +358,6 @@ pub async fn handle_lookup_record(bus: &dyn StateBus, state_scope: &str, payload bus.get(state_scope, &key).await.unwrap_or(Value::Null) } -/// For a bag of pending records, return the subset that have expired at -/// `now_ms` along with the metadata needed to commit the flip and notify the -/// owning session. Records without a stamped `session_id` (legacy rows -/// written before that field existed) are skipped — they'll still be picked -/// up lazily by `handle_list_undelivered` on the next read. -pub fn collect_timed_out_for_sweep( - records: &[Value], - now_ms: u64, -) -> Vec<(String, Value, String, String)> { - records - .iter() - .filter_map(|rec| { - let flipped = maybe_flip_timed_out(rec, now_ms)?; - let session_id = flipped - .get("session_id") - .and_then(Value::as_str)? - .to_string(); - let function_call_id = flipped - .get("function_call_id") - .and_then(Value::as_str)? - .to_string(); - if session_id.is_empty() || function_call_id.is_empty() { - return None; - } - let key = pending_key(&session_id, &function_call_id); - Some((key, flipped, session_id, function_call_id)) - }) - .collect() -} - -/// Return Some(timed_out_record) if `rec` is pending and `now_ms` is past -/// `expires_at`; otherwise None. Pure function — does not write state. -pub fn maybe_flip_timed_out(rec: &Value, now_ms: u64) -> Option { - if rec.get("status").and_then(Value::as_str) != Some("pending") { - return None; - } - let exp = rec.get("expires_at").and_then(Value::as_u64)?; - if now_ms < exp { - return None; - } - // Timeout flip carries no Denial: the `timed_out` status itself is the - // explanation. Downstream renderers (turn-orchestrator stitching, UIs) - // branch on the status, not on a redundant reason string. - Some(transition_record(rec, "timed_out", None, None, None)) -} - pub async fn handle_resolve( bus: &dyn StateBus, exec: &dyn FunctionExecutor, diff --git a/approval-gate/src/lifecycle.rs b/approval-gate/src/lifecycle.rs new file mode 100644 index 00000000..1b7c684c --- /dev/null +++ b/approval-gate/src/lifecycle.rs @@ -0,0 +1,140 @@ +//! Persisted-record lifecycle helpers. +//! +//! Pure functions that construct and transition the `Value`-blob record +//! schema as it lives in the iii state bus. No I/O, no async — the only +//! impurity is reading the system clock via [`transition_record`], whose +//! testable variant [`transition_record_with_now`] takes `now_ms` +//! directly. (Operators adopting the typed schema can read the same +//! shape via [`crate::record::Record`] / [`crate::record::Record::from_value`].) +//! +//! The wire keys (`status`, `function_call_id`, `expires_at`, +//! `resolved_at`, `result`, `error`, `denial`, `delivered_in_turn_id`) +//! are stable contract; renaming requires a state-store migration. The +//! `denial` field is documented at [`crate::wire::Denial`]. + +use serde_json::{json, Value}; + +use crate::wire::{pending_key, Denial}; + +/// True if `status` is one of the terminal states a stitched system message +/// should be built from. `pending` and `approved` are intermediate. +pub fn is_terminal_status(status: &str) -> bool { + matches!(status, "executed" | "failed" | "denied" | "timed_out") +} + +/// Build a fresh pending record. `session_id` is unset here — +/// `handle_intercept` stamps it before persisting. `expires_at` is +/// `now_ms + timeout_ms`, saturating on overflow so a buggy caller +/// can't underflow the deadline. +pub fn build_pending_record( + function_call_id: &str, + function_id: &str, + args: &Value, + now_ms: u64, + timeout_ms: u64, +) -> Value { + json!({ + "function_call_id": function_call_id, + "function_id": function_id, + "args": args, + "status": "pending", + "expires_at": now_ms.saturating_add(timeout_ms), + }) +} + +/// Build a new record by transitioning a pending base record to a terminal +/// status. All terminal fields (`result`, `error`, `denial`) are optional; +/// only the ones provided are attached. Existing fields on the base +/// (including `delivered_in_turn_id` and `resolved_at` if present) are +/// preserved. The first transition into a terminal status stamps +/// `resolved_at`. +pub fn transition_record( + base: &Value, + new_status: &str, + result: Option, + error: Option, + denial: Option, +) -> Value { + let now_ms = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_millis() as u64) + .unwrap_or(0); + transition_record_with_now(base, new_status, result, error, denial, now_ms) +} + +/// Testable variant of [`transition_record`] that takes `now_ms` directly. +pub fn transition_record_with_now( + base: &Value, + new_status: &str, + result: Option, + error: Option, + denial: Option, + now_ms: u64, +) -> Value { + let mut rec = base.clone(); + if let Some(obj) = rec.as_object_mut() { + obj.insert("status".into(), Value::String(new_status.to_string())); + if let Some(r) = result { + obj.insert("result".into(), r); + } + if let Some(e) = error { + obj.insert("error".into(), Value::String(e)); + } + if let Some(d) = denial { + obj.insert( + "denial".into(), + serde_json::to_value(&d).expect("Denial is always serializable"), + ); + } + if is_terminal_status(new_status) && !obj.contains_key("resolved_at") { + obj.insert("resolved_at".into(), Value::Number(now_ms.into())); + } + } + rec +} + +/// For a bag of pending records, return the subset that have expired at +/// `now_ms` along with the metadata needed to commit the flip and notify the +/// owning session. Records without a stamped `session_id` (legacy rows +/// written before that field existed) are skipped — they'll still be picked +/// up lazily by `handle_list_undelivered` on the next read. +pub fn collect_timed_out_for_sweep( + records: &[Value], + now_ms: u64, +) -> Vec<(String, Value, String, String)> { + records + .iter() + .filter_map(|rec| { + let flipped = maybe_flip_timed_out(rec, now_ms)?; + let session_id = flipped + .get("session_id") + .and_then(Value::as_str)? + .to_string(); + let function_call_id = flipped + .get("function_call_id") + .and_then(Value::as_str)? + .to_string(); + if session_id.is_empty() || function_call_id.is_empty() { + return None; + } + let key = pending_key(&session_id, &function_call_id); + Some((key, flipped, session_id, function_call_id)) + }) + .collect() +} + +/// Return Some(timed_out_record) if `rec` is pending and `now_ms` is past +/// `expires_at`; otherwise None. Pure function — does not write state. +pub fn maybe_flip_timed_out(rec: &Value, now_ms: u64) -> Option { + if rec.get("status").and_then(Value::as_str) != Some("pending") { + return None; + } + let exp = rec.get("expires_at").and_then(Value::as_u64)?; + if now_ms < exp { + return None; + } + // Timeout flip carries no Denial: the `timed_out` status itself is the + // explanation. Downstream renderers (turn-orchestrator stitching, UIs) + // branch on the status, not on a redundant reason string. + Some(transition_record(rec, "timed_out", None, None, None)) +} From 6ca444f44f91c75fbd55c44f5c8c3099d2dd5060 Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Fri, 15 May 2026 19:50:02 -0300 Subject: [PATCH 07/30] refactor(approval-gate): extract state.rs from lib.rs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Third step of breaking lib.rs into focused modules. This commit lifts the iii-backed state-bus and function-executor implementations into their own module, along with the `__from_approval` marker plumbing and the boot-time marker-target safety check. Moved (lib.rs -> state.rs): - StateBus trait + IiiStateBus impl (iii state::* wrapper) - FunctionExecutor trait + IiiFunctionExecutor impl (iii.trigger wrapper) - rule_for() (lookup helper used by IiiFunctionExecutor) - merge_from_approval_marker_if_needed() (marker payload composer) - unverified_marker_targets() (boot guard) The StateBus / FunctionExecutor traits are kept exactly as they were in lib.rs — they exist purely as test seams so unit tests can swap in InMemoryStateBus / FakeExecutor. No new abstractions added. Public items stay re-exported from the crate root. Pure move, no behavior change. 141 approval-gate tests pass; all 8 downstream worker crates pass (turn-orchestrator's pre-existing dual_write flake notwithstanding). --- approval-gate/src/lib.rs | 152 ++----------------------------- approval-gate/src/state.rs | 179 +++++++++++++++++++++++++++++++++++++ 2 files changed, 186 insertions(+), 145 deletions(-) create mode 100644 approval-gate/src/state.rs diff --git a/approval-gate/src/lib.rs b/approval-gate/src/lib.rs index 8594dcf1..238ccf50 100644 --- a/approval-gate/src/lib.rs +++ b/approval-gate/src/lib.rs @@ -7,6 +7,7 @@ pub mod lifecycle; pub mod manifest; pub mod record; pub mod rules; +pub mod state; pub mod wire; pub use config::{InterceptorRule, WorkerConfig}; @@ -15,9 +16,15 @@ pub use lifecycle::{ transition_record, transition_record_with_now, }; pub use record::{Next, Record, Status}; +pub use state::{ + unverified_marker_targets, FunctionExecutor, IiiFunctionExecutor, IiiStateBus, StateBus, +}; pub use wire::{ block_reply_for, extract_call, pending_key, Decision, Denial, IncomingCall, WireDecision, }; +use state::rule_for; +#[cfg(test)] +use state::merge_from_approval_marker_if_needed; use std::sync::{Arc, RwLock}; @@ -37,10 +44,6 @@ pub const FN_LOOKUP_RECORD: &str = "approval::lookup_record"; /// Default `approval_state_scope` (matches [`WorkerConfig::default`]). pub const STATE_SCOPE: &str = "approvals"; -fn rule_for<'a>(rules: &'a [InterceptorRule], function_id: &str) -> Option<&'a InterceptorRule> { - rules.iter().find(|r| r.function_id == function_id) -} - /// What the subscriber should do with an incoming call. Decided by the /// matching interceptor rule (authoritative) with a fallback to the run's /// `approval_required` list when no rule exists. @@ -79,32 +82,6 @@ pub(crate) fn decide_intercept_action( } } -fn merge_from_approval_marker_if_needed( - inject: bool, - args: Value, - function_call_id: &str, - session_id: &str, -) -> Value { - if !inject { - return args; - } - let marker = json!({ - "call_id": function_call_id, - "session_id": session_id, - }); - match args { - Value::Object(mut m) => { - m.insert("__from_approval".into(), marker); - Value::Object(m) - } - other if other.is_null() => json!({ "__from_approval": marker }), - other => json!({ - "payload": other, - "__from_approval": marker, - }), - } -} - /// Outcome of the policy-rules pre-check that runs before the per-function /// [`config::InterceptorRule`] flow. `Allow` and `Deny` short-circuit the /// subscriber with a final reply; `FallThrough` defers to the existing @@ -186,57 +163,6 @@ pub struct Refs { pub sweeper: tokio::task::JoinHandle<()>, } -#[async_trait::async_trait] -pub trait StateBus: Send + Sync { - async fn set(&self, scope: &str, key: &str, value: Value) -> Result<(), iii_sdk::IIIError>; - async fn get(&self, scope: &str, key: &str) -> Option; - async fn list_prefix(&self, scope: &str, prefix: &str) -> Vec; -} - -/// Invokes an iii function with arguments and returns its result or an error -/// string. Abstracted so tests can stub the underlying call. -#[async_trait::async_trait] -pub trait FunctionExecutor: Send + Sync { - async fn invoke( - &self, - function_id: &str, - args: Value, - function_call_id: &str, - session_id: &str, - ) -> Result; -} - -/// Production [`FunctionExecutor`] backed by `iii.trigger`. -pub struct IiiFunctionExecutor { - pub iii: III, - pub rules: Arc>, -} - -#[async_trait::async_trait] -impl FunctionExecutor for IiiFunctionExecutor { - async fn invoke( - &self, - function_id: &str, - args: Value, - function_call_id: &str, - session_id: &str, - ) -> Result { - let inject = - rule_for(self.rules.as_slice(), function_id).is_some_and(|r| r.inject_approval_marker); - let payload = - merge_from_approval_marker_if_needed(inject, args, function_call_id, session_id); - self.iii - .trigger(TriggerRequest { - function_id: function_id.to_string(), - payload, - action: None, - timeout_ms: None, - }) - .await - .map_err(|e| e.to_string()) - } -} - /// Decide whether a call is gated; if so, write a pending record and return /// the structured pending hook reply. If not gated, return `{block: false}` /// and do nothing. @@ -996,70 +922,6 @@ async fn write_hook_reply(iii: &III, stream_name: &str, event_id: &str, reply: & .await; } -/// Production [`StateBus`] backed by a real iii-sdk [`III`] connection. -pub struct IiiStateBus(pub III); - -#[async_trait::async_trait] -impl StateBus for IiiStateBus { - async fn set(&self, scope: &str, key: &str, value: Value) -> Result<(), iii_sdk::IIIError> { - self.0 - .trigger(TriggerRequest { - function_id: "state::set".into(), - payload: json!({ "scope": scope, "key": key, "value": value }), - action: None, - timeout_ms: None, - }) - .await - .map(|_| ()) - } - async fn get(&self, scope: &str, key: &str) -> Option { - self.0 - .trigger(TriggerRequest { - function_id: "state::get".into(), - payload: json!({ "scope": scope, "key": key }), - action: None, - timeout_ms: None, - }) - .await - .ok() - .filter(|v| !v.is_null()) - } - async fn list_prefix(&self, scope: &str, prefix: &str) -> Vec { - let resp = self - .0 - .trigger(TriggerRequest { - function_id: "state::list".into(), - payload: json!({ "scope": scope, "prefix": prefix }), - action: None, - timeout_ms: None, - }) - .await - .unwrap_or_else(|_| json!({ "items": [] })); - // Engine may return either {"items": [...]} or a plain Array. - if let Some(arr) = resp.as_array() { - return arr.clone(); - } - resp.get("items") - .and_then(|v| v.as_array().cloned()) - .unwrap_or_default() - .into_iter() - .map(|entry| entry.get("value").cloned().unwrap_or(entry)) - .collect() - } -} - -/// Return the list of function ids whose interceptor asks the gate to -/// inject `__from_approval` without asserting that the target validates it. -/// Empty list ⇒ config is safe to register. Pure — exposed for tests and -/// for the boot-time check in [`register`]. -pub fn unverified_marker_targets(rules: &[InterceptorRule]) -> Vec<&str> { - rules - .iter() - .filter(|r| r.inject_approval_marker && !r.marker_target_verified) - .map(|r| r.function_id.as_str()) - .collect() -} - pub fn register(iii: &III, cfg: &WorkerConfig) -> anyhow::Result { let rules: Arc> = Arc::new(cfg.interceptors.clone()); // Layered policy rules consulted before the per-function interceptor diff --git a/approval-gate/src/state.rs b/approval-gate/src/state.rs new file mode 100644 index 00000000..26aa58a3 --- /dev/null +++ b/approval-gate/src/state.rs @@ -0,0 +1,179 @@ +//! State-store and function-executor traits, plus their iii-backed +//! implementations and the `__from_approval` marker plumbing. +//! +//! The traits exist purely as test seams — unit tests swap in +//! `InMemoryStateBus` / `FakeExecutor` while production code uses the +//! `Iii*` implementations that call iii directly. No new abstractions +//! beyond what's needed for that seam. + +use std::sync::Arc; + +use async_trait::async_trait; +use iii_sdk::{IIIError, TriggerRequest, III}; +use serde_json::{json, Value}; + +use crate::config::InterceptorRule; + +/// Look up the [`InterceptorRule`] for `function_id`, if one is configured. +/// Pure helper; no I/O. Used by the gate's intercept flow and by the +/// production [`IiiFunctionExecutor`] to decide whether to inject the +/// `__from_approval` marker. +pub(crate) fn rule_for<'a>( + rules: &'a [InterceptorRule], + function_id: &str, +) -> Option<&'a InterceptorRule> { + rules.iter().find(|r| r.function_id == function_id) +} + +/// Stamp the `__from_approval` marker onto a function call's args when the +/// rule asks for it. The marker carries `{ call_id, session_id }` so the +/// target function can validate the call came through approval-gate (via +/// `approval::lookup_record`) instead of via direct trigger bypass. +/// +/// Idempotent on shape: object args get the marker merged in; null args +/// become `{ __from_approval: ... }`; any other shape (array, scalar) +/// gets wrapped as `{ payload, __from_approval: ... }` so it stays +/// recoverable on the target side. +pub(crate) fn merge_from_approval_marker_if_needed( + inject: bool, + args: Value, + function_call_id: &str, + session_id: &str, +) -> Value { + if !inject { + return args; + } + let marker = json!({ + "call_id": function_call_id, + "session_id": session_id, + }); + match args { + Value::Object(mut m) => { + m.insert("__from_approval".into(), marker); + Value::Object(m) + } + other if other.is_null() => json!({ "__from_approval": marker }), + other => json!({ + "payload": other, + "__from_approval": marker, + }), + } +} + +/// Abstraction over the iii state bus — the kv layer where pending and +/// resolved approval records live. Exists so unit tests can swap in a +/// `BTreeMap`-backed fake; production uses [`IiiStateBus`]. +#[async_trait] +pub trait StateBus: Send + Sync { + async fn set(&self, scope: &str, key: &str, value: Value) -> Result<(), IIIError>; + async fn get(&self, scope: &str, key: &str) -> Option; + async fn list_prefix(&self, scope: &str, prefix: &str) -> Vec; +} + +/// Invokes an iii function with arguments and returns its result or an +/// error string. Abstracted so tests can stub the underlying call. +#[async_trait] +pub trait FunctionExecutor: Send + Sync { + async fn invoke( + &self, + function_id: &str, + args: Value, + function_call_id: &str, + session_id: &str, + ) -> Result; +} + +/// Production [`FunctionExecutor`] backed by `iii.trigger`. +pub struct IiiFunctionExecutor { + pub iii: III, + pub rules: Arc>, +} + +#[async_trait] +impl FunctionExecutor for IiiFunctionExecutor { + async fn invoke( + &self, + function_id: &str, + args: Value, + function_call_id: &str, + session_id: &str, + ) -> Result { + let inject = + rule_for(self.rules.as_slice(), function_id).is_some_and(|r| r.inject_approval_marker); + let payload = + merge_from_approval_marker_if_needed(inject, args, function_call_id, session_id); + self.iii + .trigger(TriggerRequest { + function_id: function_id.to_string(), + payload, + action: None, + timeout_ms: None, + }) + .await + .map_err(|e| e.to_string()) + } +} + +/// Production [`StateBus`] backed by iii's `state::*` builtins. +pub struct IiiStateBus(pub III); + +#[async_trait] +impl StateBus for IiiStateBus { + async fn set(&self, scope: &str, key: &str, value: Value) -> Result<(), IIIError> { + self.0 + .trigger(TriggerRequest { + function_id: "state::set".into(), + payload: json!({ "scope": scope, "key": key, "value": value }), + action: None, + timeout_ms: None, + }) + .await + .map(|_| ()) + } + async fn get(&self, scope: &str, key: &str) -> Option { + self.0 + .trigger(TriggerRequest { + function_id: "state::get".into(), + payload: json!({ "scope": scope, "key": key }), + action: None, + timeout_ms: None, + }) + .await + .ok() + .filter(|v| !v.is_null()) + } + async fn list_prefix(&self, scope: &str, prefix: &str) -> Vec { + let resp = self + .0 + .trigger(TriggerRequest { + function_id: "state::list".into(), + payload: json!({ "scope": scope, "prefix": prefix }), + action: None, + timeout_ms: None, + }) + .await + .unwrap_or_else(|_| json!({ "items": [] })); + // Engine may return either {"items": [...]} or a plain Array. + if let Some(arr) = resp.as_array() { + return arr.clone(); + } + resp.get("items") + .and_then(|v| v.as_array().cloned()) + .unwrap_or_default() + .into_iter() + .map(|entry| entry.get("value").cloned().unwrap_or(entry)) + .collect() + } +} + +/// Return the list of function ids whose interceptor asks the gate to +/// inject `__from_approval` without asserting that the target validates it. +/// Empty list ⇒ config is safe to register. Pure — exposed for tests and +/// for the boot-time check in `register`. +pub fn unverified_marker_targets(rules: &[InterceptorRule]) -> Vec<&str> { + rules + .iter() + .filter(|r| r.inject_approval_marker && !r.marker_target_verified) + .map(|r| r.function_id.as_str()) + .collect() +} From b2fa112f32fa9e4314da79d0d372412f38a296f8 Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Fri, 15 May 2026 19:57:17 -0300 Subject: [PATCH 08/30] refactor(approval-gate): extract intercept.rs from lib.rs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fourth step of breaking lib.rs into focused modules. This commit lifts the intercept decision flow into its own module — the three layers that decide what the gate does with an incoming function call: policy rules, per-function interceptor rules, and classifier replies, plus the async handle_intercept that writes the pending record. Moved (lib.rs -> intercept.rs): - InterceptAction enum + decide_intercept_action() - PolicyOutcome enum + apply_policy_rules() - ClassifierDecision enum + interpret_classifier_reply() - handle_intercept() handle_intercept stays public (re-exported from the crate root). The pub(crate) helpers stay pub(crate) and are imported back into lib.rs for the subscriber closure in register(). Pure move, no behavior change. 141 approval-gate tests pass; turn-orchestrator + harness + shell verified green. --- approval-gate/src/intercept.rs | 237 +++++++++++++++++++++++++++++++++ approval-gate/src/lib.rs | 213 +---------------------------- 2 files changed, 244 insertions(+), 206 deletions(-) create mode 100644 approval-gate/src/intercept.rs diff --git a/approval-gate/src/intercept.rs b/approval-gate/src/intercept.rs new file mode 100644 index 00000000..ef0e4663 --- /dev/null +++ b/approval-gate/src/intercept.rs @@ -0,0 +1,237 @@ +//! Intercept decision flow. +//! +//! Pure decision helpers + the async [`handle_intercept`] that writes +//! the pending record. Together they answer the question every hook +//! event triggers: "what should the gate do with this function call?" +//! +//! Three layers run, in order: +//! +//! 1. **Policy rules** ([`apply_policy_rules`]) — operator-configured +//! layered ruleset. `Allow` and `Deny` short-circuit; `Ask` (and +//! no-match) falls through. +//! 2. **Interceptor rule** ([`decide_intercept_action`]) — per-function +//! config. Decides between `Pass`, `Pause` (no classifier), and +//! `Classify { classifier_fn, … }`. +//! 3. **Classifier reply** ([`interpret_classifier_reply`]) — parses the +//! classifier function's JSON response and maps it back to either an +//! immediate `Auto` (pass), an immediate `Deny`, or `Ask` (fall back +//! to user prompt via `handle_intercept`). +//! +//! This module owns only the decision types and `handle_intercept`. The +//! wiring (closure body in `register`) lives in `register.rs`. + +use serde_json::{json, Value}; + +use crate::config::InterceptorRule; +use crate::lifecycle::{build_pending_record, is_terminal_status}; +use crate::rules; +use crate::state::StateBus; +use crate::wire::{pending_key, Denial, IncomingCall}; + +/// What the subscriber should do with an incoming call. Decided by the +/// matching interceptor rule (authoritative) with a fallback to the run's +/// `approval_required` list when no rule exists. +#[derive(Debug, Clone, PartialEq, Eq)] +pub(crate) enum InterceptAction { + /// No rule, no `approval_required` listing — let the call through. + Pass, + /// Pause and create a pending record; no classifier consulted. + Pause, + /// Run the classifier first; on `ask`, pause; on `auto`, pass; on `deny`, block. + Classify { + classifier_fn: String, + classifier_timeout_ms: u64, + }, +} + +/// Pure decision: given a matching rule (or none) and whether the run +/// explicitly listed this function id in `approval_required`, what should +/// the subscriber do? Interceptor rules are authoritative — an operator +/// who registered a rule meant for every call to go through it, regardless +/// of per-run opt-in. +pub(crate) fn decide_intercept_action( + rule: Option<&InterceptorRule>, + requires_approval: bool, +) -> InterceptAction { + match rule { + Some(r) if r.classifier.as_ref().is_some_and(|s| !s.is_empty()) => { + InterceptAction::Classify { + classifier_fn: r.classifier.clone().unwrap(), + classifier_timeout_ms: r.classifier_timeout_ms, + } + } + Some(_) => InterceptAction::Pause, + None if requires_approval => InterceptAction::Pause, + None => InterceptAction::Pass, + } +} + +/// Outcome of the policy-rules pre-check that runs before the per-function +/// [`InterceptorRule`] flow. `Allow` and `Deny` short-circuit the +/// subscriber with a final reply; `FallThrough` defers to the existing +/// interceptor logic (classifier or pause). +#[derive(Debug, Clone, PartialEq, Eq)] +pub(crate) enum PolicyOutcome { + Allow, + Deny { + rule_permission: String, + rule_pattern: String, + }, + FallThrough, +} + +/// Apply the layered policy rules to an incoming function id. Pure +/// function — no I/O, no clock. Extracted from the subscriber closure +/// so the decision branch can be unit-tested independently. +pub(crate) fn apply_policy_rules( + rules: &rules::Ruleset, + function_id: &str, +) -> PolicyOutcome { + match rules::evaluate(function_id, "*", rules) { + Some(rule) => match rule.action { + rules::Action::Allow => PolicyOutcome::Allow, + rules::Action::Deny => PolicyOutcome::Deny { + rule_permission: rule.permission.clone(), + rule_pattern: rule.pattern.clone(), + }, + rules::Action::Ask => PolicyOutcome::FallThrough, + }, + None => PolicyOutcome::FallThrough, + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub(crate) enum ClassifierDecision { + Auto, + Deny(Denial), + Ask, +} + +/// Parse classifier JSON (`decision` tag: auto | deny | ask). On `deny` +/// the reply may carry `reason` (free-form classifier text); both that +/// and the calling `classifier_fn` get folded into a [`Denial::Policy`]. +pub(crate) fn interpret_classifier_reply( + value: &Value, + classifier_fn: &str, +) -> Result { + let tag = value.get("decision").and_then(Value::as_str).ok_or(())?; + match tag { + "auto" => Ok(ClassifierDecision::Auto), + "deny" => { + let classifier_reason = value + .get("reason") + .and_then(Value::as_str) + .unwrap_or("denied") + .to_string(); + Ok(ClassifierDecision::Deny(Denial::Policy { + classifier_reason, + classifier_fn: classifier_fn.to_string(), + })) + } + "ask" => Ok(ClassifierDecision::Ask), + _ => Err(()), + } +} + +/// Decide whether a call is gated; if so, write a pending record and return +/// the structured pending hook reply. If not gated, return `{block: false}` +/// and do nothing. +/// +/// Stamps `session_id` onto the persisted record so the timeout sweeper can +/// emit `approval_resolved` to the right session stream without consulting +/// the storage layer's keys. +/// +/// State-write failure is treated as fail-closed: the gate replies +/// `{block:true, status:"denied"}` so a transient kv outage cannot silently +/// bypass an approval check. +pub async fn handle_intercept( + bus: &dyn StateBus, + state_scope: &str, + call: &IncomingCall, + now_ms: u64, + timeout_ms: u64, + force_pending: bool, +) -> Value { + if !force_pending && !call.requires_approval() { + return json!({ "block": false }); + } + + // Defense in depth: if a record for this (session, call_id) already + // exists, don't blow it away. Re-intercept of an already-decided call + // would otherwise revert a terminal record back to `pending`, losing + // the audit trail and any `delivered_in_turn_id` stamp. + let key = pending_key(&call.session_id, &call.function_call_id); + if let Some(existing) = bus.get(state_scope, &key).await { + let status = existing + .get("status") + .and_then(Value::as_str) + .unwrap_or("") + .to_string(); + if is_terminal_status(&status) { + // Replay of an already-resolved call: the prior status carries + // the meaning. No fresh Denial is synthesized — consumers that + // need to render the historical decision read the persisted + // record via approval::lookup_record. + return json!({ + "block": true, + "status": status, + "replay": "already_resolved", + "call_id": call.function_call_id, + "function_id": call.function_id, + }); + } + if status == "pending" || status == "approved" { + // Replay of an in-flight intercept — keep the existing row, + // re-emit the pending reply. No state churn. + return json!({ + "block": true, + "status": "pending", + "replay": "in_flight", + "call_id": call.function_call_id, + "function_id": call.function_id, + }); + } + } + + let mut record = build_pending_record( + &call.function_call_id, + &call.function_id, + &call.args, + now_ms, + timeout_ms, + ); + if let Some(obj) = record.as_object_mut() { + obj.insert("session_id".into(), Value::String(call.session_id.clone())); + } + if let Err(err) = bus + .set( + state_scope, + &pending_key(&call.session_id, &call.function_call_id), + record, + ) + .await + { + tracing::error!( + "approval-gate: failed to write pending record for {}/{}: {err} — failing closed", + call.session_id, + call.function_call_id + ); + let denial = Denial::StateError { + phase: "intercept_write_pending".to_string(), + error: err.to_string(), + }; + return json!({ + "block": true, + "denial": denial, + "status": "denied", + "call_id": call.function_call_id, + "function_id": call.function_id, + }); + } + json!({ + "block": true, + "status": "pending", + "call_id": call.function_call_id, + "function_id": call.function_id, + }) +} diff --git a/approval-gate/src/lib.rs b/approval-gate/src/lib.rs index 238ccf50..b5b10b7f 100644 --- a/approval-gate/src/lib.rs +++ b/approval-gate/src/lib.rs @@ -3,6 +3,7 @@ //! waiting for the UI to call `approval::resolve` (or for a timeout). pub mod config; +pub mod intercept; pub mod lifecycle; pub mod manifest; pub mod record; @@ -11,6 +12,7 @@ pub mod state; pub mod wire; pub use config::{InterceptorRule, WorkerConfig}; +pub use intercept::handle_intercept; pub use lifecycle::{ build_pending_record, collect_timed_out_for_sweep, is_terminal_status, maybe_flip_timed_out, transition_record, transition_record_with_now, @@ -22,6 +24,11 @@ pub use state::{ pub use wire::{ block_reply_for, extract_call, pending_key, Decision, Denial, IncomingCall, WireDecision, }; + +use intercept::{ + apply_policy_rules, decide_intercept_action, interpret_classifier_reply, ClassifierDecision, + InterceptAction, PolicyOutcome, +}; use state::rule_for; #[cfg(test)] use state::merge_from_approval_marker_if_needed; @@ -44,108 +51,6 @@ pub const FN_LOOKUP_RECORD: &str = "approval::lookup_record"; /// Default `approval_state_scope` (matches [`WorkerConfig::default`]). pub const STATE_SCOPE: &str = "approvals"; -/// What the subscriber should do with an incoming call. Decided by the -/// matching interceptor rule (authoritative) with a fallback to the run's -/// `approval_required` list when no rule exists. -#[derive(Debug, Clone, PartialEq, Eq)] -pub(crate) enum InterceptAction { - /// No rule, no `approval_required` listing — let the call through. - Pass, - /// Pause and create a pending record; no classifier consulted. - Pause, - /// Run the classifier first; on `ask`, pause; on `auto`, pass; on `deny`, block. - Classify { - classifier_fn: String, - classifier_timeout_ms: u64, - }, -} - -/// Pure decision: given a matching rule (or none) and whether the run -/// explicitly listed this function id in `approval_required`, what should -/// the subscriber do? Interceptor rules are authoritative — an operator -/// who registered a rule meant for every call to go through it, regardless -/// of per-run opt-in. -pub(crate) fn decide_intercept_action( - rule: Option<&InterceptorRule>, - requires_approval: bool, -) -> InterceptAction { - match rule { - Some(r) if r.classifier.as_ref().is_some_and(|s| !s.is_empty()) => { - InterceptAction::Classify { - classifier_fn: r.classifier.clone().unwrap(), - classifier_timeout_ms: r.classifier_timeout_ms, - } - } - Some(_) => InterceptAction::Pause, - None if requires_approval => InterceptAction::Pause, - None => InterceptAction::Pass, - } -} - -/// Outcome of the policy-rules pre-check that runs before the per-function -/// [`config::InterceptorRule`] flow. `Allow` and `Deny` short-circuit the -/// subscriber with a final reply; `FallThrough` defers to the existing -/// interceptor logic (classifier or pause). -#[derive(Debug, Clone, PartialEq, Eq)] -pub(crate) enum PolicyOutcome { - Allow, - Deny { - rule_permission: String, - rule_pattern: String, - }, - FallThrough, -} - -/// Apply the layered policy rules to an incoming function id. Pure -/// function — no I/O, no clock. Extracted from [`register`]'s subscriber -/// closure so the decision branch can be unit-tested independently. -pub(crate) fn apply_policy_rules(rules: &rules::Ruleset, function_id: &str) -> PolicyOutcome { - match rules::evaluate(function_id, "*", rules) { - Some(rule) => match rule.action { - rules::Action::Allow => PolicyOutcome::Allow, - rules::Action::Deny => PolicyOutcome::Deny { - rule_permission: rule.permission.clone(), - rule_pattern: rule.pattern.clone(), - }, - rules::Action::Ask => PolicyOutcome::FallThrough, - }, - None => PolicyOutcome::FallThrough, - } -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub(crate) enum ClassifierDecision { - Auto, - Deny(Denial), - Ask, -} - -/// Parse classifier JSON (`decision` tag: auto | deny | ask). On `deny` -/// the reply may carry `reason` (free-form classifier text) and optionally -/// `classifier_fn` — both get folded into a [`Denial::Policy`]. -pub(crate) fn interpret_classifier_reply( - value: &Value, - classifier_fn: &str, -) -> Result { - let tag = value.get("decision").and_then(Value::as_str).ok_or(())?; - match tag { - "auto" => Ok(ClassifierDecision::Auto), - "deny" => { - let classifier_reason = value - .get("reason") - .and_then(Value::as_str) - .unwrap_or("denied") - .to_string(); - Ok(ClassifierDecision::Deny(Denial::Policy { - classifier_reason, - classifier_fn: classifier_fn.to_string(), - })) - } - "ask" => Ok(ClassifierDecision::Ask), - _ => Err(()), - } -} - pub struct Refs { pub resolve: FunctionRef, pub list_pending: FunctionRef, @@ -163,110 +68,6 @@ pub struct Refs { pub sweeper: tokio::task::JoinHandle<()>, } -/// Decide whether a call is gated; if so, write a pending record and return -/// the structured pending hook reply. If not gated, return `{block: false}` -/// and do nothing. -/// -/// Stamps `session_id` onto the persisted record so the timeout sweeper can -/// emit `approval_resolved` to the right session stream without consulting -/// the storage layer's keys. -/// -/// State-write failure is treated as fail-closed: the gate replies -/// `{block:true, status:"denied"}` so a transient kv outage cannot silently -/// bypass an approval check. -pub async fn handle_intercept( - bus: &dyn StateBus, - state_scope: &str, - call: &IncomingCall, - now_ms: u64, - timeout_ms: u64, - force_pending: bool, -) -> Value { - if !force_pending && !call.requires_approval() { - return json!({ "block": false }); - } - - // Defense in depth: if a record for this (session, call_id) already - // exists, don't blow it away. Re-intercept of an already-decided call - // would otherwise revert a terminal record back to `pending`, losing - // the audit trail and any `delivered_in_turn_id` stamp. Surfaced by - // the state-machine proptest in tests::state_machine_invariants. - let key = pending_key(&call.session_id, &call.function_call_id); - if let Some(existing) = bus.get(state_scope, &key).await { - let status = existing - .get("status") - .and_then(Value::as_str) - .unwrap_or("") - .to_string(); - if is_terminal_status(&status) { - // Replay of an already-resolved call: the prior status carries - // the meaning. No fresh Denial is synthesized — consumers that - // need to render the historical decision read the persisted - // record via approval::lookup_record. - return json!({ - "block": true, - "status": status, - "replay": "already_resolved", - "call_id": call.function_call_id, - "function_id": call.function_id, - }); - } - if status == "pending" || status == "approved" { - // Replay of an in-flight intercept — keep the existing row, - // re-emit the pending reply. No state churn. - return json!({ - "block": true, - "status": "pending", - "replay": "in_flight", - "call_id": call.function_call_id, - "function_id": call.function_id, - }); - } - } - - let mut record = build_pending_record( - &call.function_call_id, - &call.function_id, - &call.args, - now_ms, - timeout_ms, - ); - if let Some(obj) = record.as_object_mut() { - obj.insert("session_id".into(), Value::String(call.session_id.clone())); - } - if let Err(err) = bus - .set( - state_scope, - &pending_key(&call.session_id, &call.function_call_id), - record, - ) - .await - { - tracing::error!( - "approval-gate: failed to write pending record for {}/{}: {err} — failing closed", - call.session_id, - call.function_call_id - ); - let denial = Denial::StateError { - phase: "intercept_write_pending".to_string(), - error: err.to_string(), - }; - return json!({ - "block": true, - "denial": denial, - "status": "denied", - "call_id": call.function_call_id, - "function_id": call.function_id, - }); - } - json!({ - "block": true, - "status": "pending", - "call_id": call.function_call_id, - "function_id": call.function_id, - }) -} - /// Lookup a single approval record by session + call id (for shell bypass validation). pub async fn handle_lookup_record(bus: &dyn StateBus, state_scope: &str, payload: Value) -> Value { let session_id = payload From ff17905cbf87365fee84d5a0302fa830644d1e6a Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Fri, 15 May 2026 20:19:00 -0300 Subject: [PATCH 09/30] refactor(approval-gate): extract resolve.rs from lib.rs Fifth step of breaking lib.rs into focused modules. This commit lifts the approval::resolve flow into its own module. Moved (lib.rs -> resolve.rs): - handle_lookup_record() - handle_resolve() - cascade_allow_for_session() (the always=true sweep) - approve_and_execute() (shared by user-driven allow + cascade) handle_resolve and handle_lookup_record stay public via the crate root re-exports. approve_and_execute and cascade_allow_for_session stay private to the resolve module. Pure move, no behavior change. 141 approval-gate tests pass. --- approval-gate/src/lib.rs | 271 +-------------------------------- approval-gate/src/resolve.rs | 287 +++++++++++++++++++++++++++++++++++ 2 files changed, 290 insertions(+), 268 deletions(-) create mode 100644 approval-gate/src/resolve.rs diff --git a/approval-gate/src/lib.rs b/approval-gate/src/lib.rs index b5b10b7f..cfba72a6 100644 --- a/approval-gate/src/lib.rs +++ b/approval-gate/src/lib.rs @@ -7,6 +7,7 @@ pub mod intercept; pub mod lifecycle; pub mod manifest; pub mod record; +pub mod resolve; pub mod rules; pub mod state; pub mod wire; @@ -18,6 +19,7 @@ pub use lifecycle::{ transition_record, transition_record_with_now, }; pub use record::{Next, Record, Status}; +pub use resolve::{handle_lookup_record, handle_resolve}; pub use state::{ unverified_marker_targets, FunctionExecutor, IiiFunctionExecutor, IiiStateBus, StateBus, }; @@ -25,10 +27,7 @@ pub use wire::{ block_reply_for, extract_call, pending_key, Decision, Denial, IncomingCall, WireDecision, }; -use intercept::{ - apply_policy_rules, decide_intercept_action, interpret_classifier_reply, ClassifierDecision, - InterceptAction, PolicyOutcome, -}; +use intercept::{decide_intercept_action, interpret_classifier_reply, ClassifierDecision, InterceptAction, PolicyOutcome, apply_policy_rules}; use state::rule_for; #[cfg(test)] use state::merge_from_approval_marker_if_needed; @@ -68,270 +67,6 @@ pub struct Refs { pub sweeper: tokio::task::JoinHandle<()>, } -/// Lookup a single approval record by session + call id (for shell bypass validation). -pub async fn handle_lookup_record(bus: &dyn StateBus, state_scope: &str, payload: Value) -> Value { - let session_id = payload - .get("session_id") - .and_then(Value::as_str) - .unwrap_or(""); - let function_call_id = payload - .get("function_call_id") - .and_then(Value::as_str) - .unwrap_or(""); - if session_id.is_empty() || function_call_id.is_empty() { - return Value::Null; - } - let key = pending_key(session_id, function_call_id); - bus.get(state_scope, &key).await.unwrap_or(Value::Null) -} - -pub async fn handle_resolve( - bus: &dyn StateBus, - exec: &dyn FunctionExecutor, - state_scope: &str, - policy_rules: &RwLock, - payload: Value, - now_ms: u64, -) -> Value { - let session_id = payload - .get("session_id") - .and_then(Value::as_str) - .unwrap_or(""); - let function_call_id = payload - .get("function_call_id") - .or_else(|| payload.get("tool_call_id")) - .and_then(Value::as_str) - .unwrap_or(""); - if session_id.is_empty() || function_call_id.is_empty() { - return json!({ "ok": false, "error": "missing_id" }); - } - let decision: WireDecision = match payload.get("decision").cloned() { - Some(v) => match serde_json::from_value(v) { - Ok(d) => d, - Err(_) => return json!({ "ok": false, "error": "bad_decision" }), - }, - None => return json!({ "ok": false, "error": "bad_decision" }), - }; - let key = pending_key(session_id, function_call_id); - let Some(existing) = bus.get(state_scope, &key).await else { - return json!({ "ok": false, "error": "not_found" }); - }; - - // Lazy timeout flip (covered by Task 7 tests). - let existing = match maybe_flip_timed_out(&existing, now_ms) { - Some(flipped) => { - let _ = bus.set(state_scope, &key, flipped.clone()).await; - return json!({ "ok": false, "error": "timed_out" }); - } - None => existing, - }; - - if existing.get("status").and_then(Value::as_str) != Some("pending") { - return json!({ "ok": false, "error": "already_resolved" }); - } - - match decision { - WireDecision::Deny => { - // Caller supplies a structured Denial. Accepted shapes: - // { "decision": "deny", "denial": { "kind": "user_rejected", ... } } - // { "decision": "deny", "denial": { "kind": "user_corrected", "detail": { "feedback": "..." } } } - // Missing `denial` is treated as a bare UserRejected (no feedback) - // so the simplest UI flow stays one-click. - let denial = match payload.get("denial").cloned() { - Some(v) => match serde_json::from_value::(v) { - Ok(d) => d, - Err(_) => return json!({ "ok": false, "error": "bad_denial" }), - }, - None => Denial::UserRejected, - }; - let denied = transition_record(&existing, "denied", None, None, Some(denial)); - if let Err(e) = bus.set(state_scope, &key, denied).await { - tracing::error!("approval-gate: failed to write denied record: {e}"); - return json!({ "ok": false, "error": "state_write_failed" }); - } - json!({ "ok": true }) - } - WireDecision::Allow => { - if let Err(err) = approve_and_execute( - bus, - exec, - state_scope, - &existing, - session_id, - function_call_id, - ) - .await - { - tracing::error!( - "approval-gate: failed to execute approved call: {err}" - ); - return json!({ "ok": false, "error": "state_write_failed" }); - } - - // Optional cascade: when `always: true` is set on an allow - // reply, add a runtime Allow rule for this call's function id - // and resolve every other pending record in the same session - // that the new rule covers. v1 scope is function-id-only — - // the cascade rule's `pattern` is "*" to match the v1 rules - // surface. See [`crate::rules`]. - let cascaded = if payload - .get("always") - .and_then(Value::as_bool) - .unwrap_or(false) - { - let function_id = existing - .get("function_id") - .and_then(Value::as_str) - .unwrap_or("") - .to_string(); - cascade_allow_for_session( - bus, - exec, - state_scope, - policy_rules, - session_id, - function_call_id, - &function_id, - ) - .await - } else { - 0 - }; - - if cascaded > 0 { - json!({ "ok": true, "cascaded": cascaded }) - } else { - json!({ "ok": true }) - } - } - } -} - -/// Push an Allow rule for `function_id` into the shared policy ruleset, -/// then resolve every pending record in `session_id` (other than the one -/// just resolved by the caller) that the new rule covers. Returns the -/// number of records auto-resolved. -/// -/// The function id rule is appended once; if the user clicks "always -/// allow X" twice for the same X within a session, the second push is a -/// duplicate but harmless (last-wins still picks Allow). State-write -/// failures inside the loop are logged and skipped so a single bad -/// record can't prevent the rest of the cascade. -async fn cascade_allow_for_session( - bus: &dyn StateBus, - exec: &dyn FunctionExecutor, - state_scope: &str, - policy_rules: &RwLock, - session_id: &str, - originator_call_id: &str, - originator_function_id: &str, -) -> u64 { - // Push the new Allow rule under the write lock. Hold the guard only - // for the mutation, not across the .await in the sweep below. - { - let mut guard = policy_rules - .write() - .expect("approval-gate policy rules lock poisoned"); - guard.push(rules::Rule { - permission: originator_function_id.to_string(), - pattern: "*".to_string(), - action: rules::Action::Allow, - }); - } - - // Snapshot the session's pending records and re-evaluate each one - // against the now-updated rules. Use a read-clone so we don't hold - // the lock across .await. - let prefix = format!("{session_id}/"); - let session_records = bus.list_prefix(state_scope, &prefix).await; - let mut cascaded = 0u64; - for rec in session_records { - let rec_call_id = match rec.get("function_call_id").and_then(Value::as_str) { - Some(s) => s.to_string(), - None => continue, - }; - if rec_call_id == originator_call_id { - continue; - } - if rec.get("status").and_then(Value::as_str) != Some("pending") { - continue; - } - let fn_id = rec - .get("function_id") - .and_then(Value::as_str) - .unwrap_or("") - .to_string(); - let outcome = { - let guard = policy_rules - .read() - .expect("approval-gate policy rules lock poisoned"); - apply_policy_rules(&guard, &fn_id) - }; - if !matches!(outcome, PolicyOutcome::Allow) { - continue; - } - if let Err(err) = - approve_and_execute(bus, exec, state_scope, &rec, session_id, &rec_call_id).await - { - tracing::warn!( - session_id, - call_id = %rec_call_id, - "approval-gate: cascade auto-resolve failed: {err}" - ); - continue; - } - cascaded += 1; - } - cascaded -} - -/// Drive a pending record through the approved → invoke → executed/failed -/// flow. Pure plumbing — does not consult policy rules, does not check -/// the original status (caller must have verified it's pending). Used by -/// both the user-driven [`handle_resolve`] allow path and the -/// cascade-on-`always` sweep so the state transitions stay in one place. -/// -/// Returns `Err` only when a state write fails; the invocation result -/// itself (success or function-error) is captured on the record. The -/// caller decides how to surface a state-write failure (the existing -/// handlers map it to `{ok:false, error:"state_write_failed"}`). -pub(crate) async fn approve_and_execute( - bus: &dyn StateBus, - exec: &dyn FunctionExecutor, - state_scope: &str, - pending: &Value, - session_id: &str, - function_call_id: &str, -) -> Result<(), String> { - let function_id = pending - .get("function_id") - .and_then(Value::as_str) - .unwrap_or("") - .to_string(); - let args = pending.get("args").cloned().unwrap_or(json!({})); - let key = pending_key(session_id, function_call_id); - let approved = transition_record(pending, "approved", None, None, None); - // Best-effort intermediate write; if it fails we still try to invoke - // so the user-visible behavior matches the pre-extraction allow path. - let _ = bus.set(state_scope, &key, approved.clone()).await; - match exec - .invoke(&function_id, args, function_call_id, session_id) - .await - { - Ok(result) => { - let executed = transition_record(&approved, "executed", Some(result), None, None); - bus.set(state_scope, &key, executed) - .await - .map_err(|e| e.to_string()) - } - Err(error) => { - let failed = transition_record(&approved, "failed", None, Some(error), None); - bus.set(state_scope, &key, failed) - .await - .map_err(|e| e.to_string()) - } - } -} pub async fn handle_list_pending(bus: &dyn StateBus, state_scope: &str, payload: Value) -> Value { let session_id = payload diff --git a/approval-gate/src/resolve.rs b/approval-gate/src/resolve.rs new file mode 100644 index 00000000..7506bcd3 --- /dev/null +++ b/approval-gate/src/resolve.rs @@ -0,0 +1,287 @@ +//! Resolve flow — handles `approval::resolve` and the cascading-allow +//! behavior that fires when a reply carries `always: true`. +//! +//! [`handle_resolve`] is the main entry point. On allow it routes +//! through [`approve_and_execute`], which is also reused by the cascade +//! sweep ([`cascade_allow_for_session`]) so the approved → invoke → +//! executed/failed transitions stay in one place. [`handle_lookup_record`] +//! is the small read-only helper called by shell bypass validation. + +use std::sync::RwLock; + +use serde_json::{json, Value}; + +use crate::intercept::{apply_policy_rules, PolicyOutcome}; +use crate::lifecycle::{maybe_flip_timed_out, transition_record}; +use crate::rules; +use crate::state::{FunctionExecutor, StateBus}; +use crate::wire::{pending_key, Denial, WireDecision}; + +/// Lookup a single approval record by session + call id (for shell bypass validation). +pub async fn handle_lookup_record(bus: &dyn StateBus, state_scope: &str, payload: Value) -> Value { + let session_id = payload + .get("session_id") + .and_then(Value::as_str) + .unwrap_or(""); + let function_call_id = payload + .get("function_call_id") + .and_then(Value::as_str) + .unwrap_or(""); + if session_id.is_empty() || function_call_id.is_empty() { + return Value::Null; + } + let key = pending_key(session_id, function_call_id); + bus.get(state_scope, &key).await.unwrap_or(Value::Null) +} + +/// Resolve a pending approval. Wire-format errors return `{ok: false, +/// error: ""}`. Success returns `{ok: true}` plus an optional +/// `cascaded: N` count when an `always: true` reply triggered the +/// session sweep. +pub async fn handle_resolve( + bus: &dyn StateBus, + exec: &dyn FunctionExecutor, + state_scope: &str, + policy_rules: &RwLock, + payload: Value, + now_ms: u64, +) -> Value { + let session_id = payload + .get("session_id") + .and_then(Value::as_str) + .unwrap_or(""); + let function_call_id = payload + .get("function_call_id") + .or_else(|| payload.get("tool_call_id")) + .and_then(Value::as_str) + .unwrap_or(""); + if session_id.is_empty() || function_call_id.is_empty() { + return json!({ "ok": false, "error": "missing_id" }); + } + let decision: WireDecision = match payload.get("decision").cloned() { + Some(v) => match serde_json::from_value(v) { + Ok(d) => d, + Err(_) => return json!({ "ok": false, "error": "bad_decision" }), + }, + None => return json!({ "ok": false, "error": "bad_decision" }), + }; + let key = pending_key(session_id, function_call_id); + let Some(existing) = bus.get(state_scope, &key).await else { + return json!({ "ok": false, "error": "not_found" }); + }; + + // Lazy timeout flip: if the record is past expires_at, write the + // timed_out transition and refuse the resolve so the caller can't + // race the sweeper. + let existing = match maybe_flip_timed_out(&existing, now_ms) { + Some(flipped) => { + let _ = bus.set(state_scope, &key, flipped.clone()).await; + return json!({ "ok": false, "error": "timed_out" }); + } + None => existing, + }; + + if existing.get("status").and_then(Value::as_str) != Some("pending") { + return json!({ "ok": false, "error": "already_resolved" }); + } + + match decision { + WireDecision::Deny => { + // Caller supplies a structured Denial. Accepted shapes: + // { "decision": "deny", "denial": { "kind": "user_rejected", ... } } + // { "decision": "deny", "denial": { "kind": "user_corrected", "detail": { "feedback": "..." } } } + // Missing `denial` is treated as a bare UserRejected (no feedback) + // so the simplest UI flow stays one-click. + let denial = match payload.get("denial").cloned() { + Some(v) => match serde_json::from_value::(v) { + Ok(d) => d, + Err(_) => return json!({ "ok": false, "error": "bad_denial" }), + }, + None => Denial::UserRejected, + }; + let denied = transition_record(&existing, "denied", None, None, Some(denial)); + if let Err(e) = bus.set(state_scope, &key, denied).await { + tracing::error!("approval-gate: failed to write denied record: {e}"); + return json!({ "ok": false, "error": "state_write_failed" }); + } + json!({ "ok": true }) + } + WireDecision::Allow => { + if let Err(err) = approve_and_execute( + bus, + exec, + state_scope, + &existing, + session_id, + function_call_id, + ) + .await + { + tracing::error!("approval-gate: failed to execute approved call: {err}"); + return json!({ "ok": false, "error": "state_write_failed" }); + } + + // Optional cascade: when `always: true` is set on an allow + // reply, add a runtime Allow rule for this call's function id + // and resolve every other pending record in the same session + // that the new rule covers. v1 scope is function-id-only — + // the cascade rule's `pattern` is "*" to match the v1 rules + // surface. See [`crate::rules`]. + let cascaded = if payload + .get("always") + .and_then(Value::as_bool) + .unwrap_or(false) + { + let function_id = existing + .get("function_id") + .and_then(Value::as_str) + .unwrap_or("") + .to_string(); + cascade_allow_for_session( + bus, + exec, + state_scope, + policy_rules, + session_id, + function_call_id, + &function_id, + ) + .await + } else { + 0 + }; + + if cascaded > 0 { + json!({ "ok": true, "cascaded": cascaded }) + } else { + json!({ "ok": true }) + } + } + } +} + +/// Push an Allow rule for `function_id` into the shared policy ruleset, +/// then resolve every pending record in `session_id` (other than the one +/// just resolved by the caller) that the new rule covers. Returns the +/// number of records auto-resolved. +/// +/// The function id rule is appended once; if the user clicks "always +/// allow X" twice for the same X within a session, the second push is a +/// duplicate but harmless (last-wins still picks Allow). State-write +/// failures inside the loop are logged and skipped so a single bad +/// record can't prevent the rest of the cascade. +async fn cascade_allow_for_session( + bus: &dyn StateBus, + exec: &dyn FunctionExecutor, + state_scope: &str, + policy_rules: &RwLock, + session_id: &str, + originator_call_id: &str, + originator_function_id: &str, +) -> u64 { + // Push the new Allow rule under the write lock. Hold the guard only + // for the mutation, not across the .await in the sweep below. + { + let mut guard = policy_rules + .write() + .expect("approval-gate policy rules lock poisoned"); + guard.push(rules::Rule { + permission: originator_function_id.to_string(), + pattern: "*".to_string(), + action: rules::Action::Allow, + }); + } + + // Snapshot the session's pending records and re-evaluate each one + // against the now-updated rules. Use a read-clone so we don't hold + // the lock across .await. + let prefix = format!("{session_id}/"); + let session_records = bus.list_prefix(state_scope, &prefix).await; + let mut cascaded = 0u64; + for rec in session_records { + let rec_call_id = match rec.get("function_call_id").and_then(Value::as_str) { + Some(s) => s.to_string(), + None => continue, + }; + if rec_call_id == originator_call_id { + continue; + } + if rec.get("status").and_then(Value::as_str) != Some("pending") { + continue; + } + let fn_id = rec + .get("function_id") + .and_then(Value::as_str) + .unwrap_or("") + .to_string(); + let outcome = { + let guard = policy_rules + .read() + .expect("approval-gate policy rules lock poisoned"); + apply_policy_rules(&guard, &fn_id) + }; + if !matches!(outcome, PolicyOutcome::Allow) { + continue; + } + if let Err(err) = + approve_and_execute(bus, exec, state_scope, &rec, session_id, &rec_call_id).await + { + tracing::warn!( + session_id, + call_id = %rec_call_id, + "approval-gate: cascade auto-resolve failed: {err}" + ); + continue; + } + cascaded += 1; + } + cascaded +} + +/// Drive a pending record through the approved → invoke → executed/failed +/// flow. Pure plumbing — does not consult policy rules, does not check +/// the original status (caller must have verified it's pending). Used by +/// both the user-driven [`handle_resolve`] allow path and the +/// cascade-on-`always` sweep so the state transitions stay in one place. +/// +/// Returns `Err` only when a state write fails; the invocation result +/// itself (success or function-error) is captured on the record. The +/// caller decides how to surface a state-write failure (the existing +/// handlers map it to `{ok:false, error:"state_write_failed"}`). +pub(crate) async fn approve_and_execute( + bus: &dyn StateBus, + exec: &dyn FunctionExecutor, + state_scope: &str, + pending: &Value, + session_id: &str, + function_call_id: &str, +) -> Result<(), String> { + let function_id = pending + .get("function_id") + .and_then(Value::as_str) + .unwrap_or("") + .to_string(); + let args = pending.get("args").cloned().unwrap_or(json!({})); + let key = pending_key(session_id, function_call_id); + let approved = transition_record(pending, "approved", None, None, None); + // Best-effort intermediate write; if it fails we still try to invoke + // so the user-visible behavior matches the pre-extraction allow path. + let _ = bus.set(state_scope, &key, approved.clone()).await; + match exec + .invoke(&function_id, args, function_call_id, session_id) + .await + { + Ok(result) => { + let executed = transition_record(&approved, "executed", Some(result), None, None); + bus.set(state_scope, &key, executed) + .await + .map_err(|e| e.to_string()) + } + Err(error) => { + let failed = transition_record(&approved, "failed", None, Some(error), None); + bus.set(state_scope, &key, failed) + .await + .map_err(|e| e.to_string()) + } + } +} From c8e1cfb908d3b79f328f014730ff50695fcc0a0f Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Fri, 15 May 2026 20:21:05 -0300 Subject: [PATCH 10/30] refactor(approval-gate): extract delivery.rs from lib.rs Sixth step of breaking lib.rs into focused modules. This commit lifts the delivery-tracking RPCs into their own module. Moved (lib.rs -> delivery.rs): - handle_list_pending() - handle_list_undelivered() - handle_ack_delivered() - handle_consume_undelivered() - handle_flush_delivered() - handle_sweep_session() - LIST_UNDELIVERED_DEFAULT_LIMIT All public items stay re-exported from the crate root. Pure move, no behavior change. 141 approval-gate tests pass. --- approval-gate/src/delivery.rs | 309 ++++++++++++++++++++++++++++++++++ approval-gate/src/lib.rs | 290 +------------------------------ 2 files changed, 314 insertions(+), 285 deletions(-) create mode 100644 approval-gate/src/delivery.rs diff --git a/approval-gate/src/delivery.rs b/approval-gate/src/delivery.rs new file mode 100644 index 00000000..b6ed055c --- /dev/null +++ b/approval-gate/src/delivery.rs @@ -0,0 +1,309 @@ +//! Delivery-tracking handlers. +//! +//! The six RPCs that orchestrators call to read and acknowledge the +//! gate's terminal-status records, plus the sweep that retires pending +//! ones when a session ends. They share two invariants: +//! +//! - Stamping `delivered_in_turn_id` is idempotent — re-acking a record +//! that already has the stamp is a no-op. +//! - Lazy timeout flip: any read path through this module promotes +//! pending-but-expired records to `timed_out` before applying its +//! filter, so callers see expired calls surface on the same read they +//! would have used regardless. + +use serde_json::{json, Value}; + +use crate::lifecycle::{is_terminal_status, maybe_flip_timed_out, transition_record}; +use crate::state::StateBus; +use crate::wire::pending_key; + +/// List records currently in the `pending` status for a session. Used +/// by UIs to render the in-flight approval queue. +pub async fn handle_list_pending(bus: &dyn StateBus, state_scope: &str, payload: Value) -> Value { + let session_id = payload + .get("session_id") + .and_then(Value::as_str) + .unwrap_or(""); + if session_id.is_empty() { + return json!({ "pending": [] }); + } + let prefix = format!("{session_id}/"); + let all = bus.list_prefix(state_scope, &prefix).await; + let pending: Vec = all + .into_iter() + .filter(|v| v.get("status").and_then(Value::as_str) == Some("pending")) + .collect(); + json!({ "pending": pending }) +} + +/// Default cap for `handle_list_undelivered` responses. A single LLM turn +/// should never be asked to ingest more than this many stitched approval +/// messages; older entries beyond the cap stay unacked and are reported via +/// the `omitted` counter so the caller can render a summary line. +pub const LIST_UNDELIVERED_DEFAULT_LIMIT: usize = 50; + +/// Return terminal-status records for a session that haven't been stamped +/// with `delivered_in_turn_id`. Lazy timeout: pending records past +/// `expires_at` (as observed at `now_ms`) are flipped to `timed_out` before +/// the filter so they surface here in the same call. +/// +/// Sorted oldest-first by `resolved_at` (records missing `resolved_at` sort +/// last as `u64::MAX`). Capped at `limit` (default +/// [`LIST_UNDELIVERED_DEFAULT_LIMIT`]); the response always includes an +/// `omitted` field counting entries left behind. +pub async fn handle_list_undelivered( + bus: &dyn StateBus, + state_scope: &str, + payload: Value, + now_ms: u64, +) -> Value { + let session_id = payload + .get("session_id") + .and_then(Value::as_str) + .unwrap_or(""); + if session_id.is_empty() { + return json!({ "entries": [], "omitted": 0 }); + } + let limit = payload + .get("limit") + .and_then(Value::as_u64) + .map(|n| n as usize) + .unwrap_or(LIST_UNDELIVERED_DEFAULT_LIMIT); + let prefix = format!("{session_id}/"); + let all = bus.list_prefix(state_scope, &prefix).await; + let mut entries: Vec = Vec::new(); + for rec in all { + // Defensive scope: some bus backends ignore the prefix and return + // every record in `state_scope`. Drop anything not stamped with + // the session_id we're listing for. Orphan records lacking a + // session_id stamp are dropped (cannot be attributed); the + // migration path that used to recover them no longer exists. + match rec.get("session_id").and_then(Value::as_str) { + Some(sid) if sid == session_id => {} + _ => continue, + } + let rec = if let Some(flipped) = maybe_flip_timed_out(&rec, now_ms) { + let call_id = flipped + .get("function_call_id") + .and_then(Value::as_str) + .unwrap_or(""); + let _ = bus + .set( + state_scope, + &pending_key(session_id, call_id), + flipped.clone(), + ) + .await; + flipped + } else { + rec + }; + let status = rec.get("status").and_then(Value::as_str).unwrap_or(""); + if !is_terminal_status(status) { + continue; + } + if rec + .get("delivered_in_turn_id") + .is_some_and(|v| !v.is_null()) + { + continue; + } + entries.push(rec); + } + entries.sort_by_key(|e| { + e.get("resolved_at") + .and_then(Value::as_u64) + .unwrap_or(u64::MAX) + }); + let total = entries.len(); + let omitted = total.saturating_sub(limit); + entries.truncate(limit); + json!({ "entries": entries, "omitted": omitted }) +} + +/// Stamp `delivered_in_turn_id` on terminal-status records named in +/// `call_ids` for the given session. Idempotent: records already stamped +/// (non-null `delivered_in_turn_id`) are not overwritten. Unknown call ids +/// are silently skipped. +pub async fn handle_ack_delivered(bus: &dyn StateBus, state_scope: &str, payload: Value) -> Value { + let session_id = payload + .get("session_id") + .and_then(Value::as_str) + .unwrap_or(""); + let turn_id = payload.get("turn_id").and_then(Value::as_str).unwrap_or(""); + let call_ids: Vec = payload + .get("call_ids") + .and_then(|v| v.as_array()) + .map(|arr| { + arr.iter() + .filter_map(|v| v.as_str().map(str::to_string)) + .collect() + }) + .unwrap_or_default(); + if session_id.is_empty() || turn_id.is_empty() || call_ids.is_empty() { + return json!({ "ok": true, "stamped": 0 }); + } + let mut stamped = 0_u64; + for cid in call_ids { + let key = pending_key(session_id, &cid); + let Some(rec) = bus.get(state_scope, &key).await else { + continue; + }; + if rec + .get("delivered_in_turn_id") + .is_some_and(|v| !v.is_null()) + { + continue; + } + let mut next = rec; + next.as_object_mut().unwrap().insert( + "delivered_in_turn_id".into(), + Value::String(turn_id.to_string()), + ); + if bus.set(state_scope, &key, next).await.is_ok() { + stamped += 1; + } + } + json!({ "ok": true, "stamped": stamped }) +} + +/// Atomic list+ack: returns the same entries `handle_list_undelivered` would +/// surface (subject to the same FIFO+cap rules) and stamps each one with +/// `delivered_in_turn_id` before returning. Eliminates the list→LLM→ack +/// race window: if the caller crashes after receiving the response, the +/// entries are still considered delivered and will not resurface, which is +/// acceptable because terminal records are informational (the side-effect +/// already executed inside the gate). +/// +/// Required payload: `{ session_id, turn_id, limit? }`. +pub async fn handle_consume_undelivered( + bus: &dyn StateBus, + state_scope: &str, + payload: Value, + now_ms: u64, +) -> Value { + let turn_id = payload.get("turn_id").and_then(Value::as_str).unwrap_or(""); + if turn_id.is_empty() { + return json!({ "ok": false, "error": "missing_turn_id", "entries": [], "omitted": 0 }); + } + let listed = handle_list_undelivered(bus, state_scope, payload.clone(), now_ms).await; + let session_id = payload + .get("session_id") + .and_then(Value::as_str) + .unwrap_or(""); + let entries = listed["entries"].as_array().cloned().unwrap_or_default(); + let omitted = listed["omitted"].as_u64().unwrap_or(0); + for rec in &entries { + let cid = rec + .get("function_call_id") + .and_then(Value::as_str) + .unwrap_or(""); + if cid.is_empty() { + continue; + } + let key = pending_key(session_id, cid); + let mut stamped = rec.clone(); + stamped.as_object_mut().unwrap().insert( + "delivered_in_turn_id".into(), + Value::String(turn_id.to_string()), + ); + let _ = bus.set(state_scope, &key, stamped).await; + } + json!({ "ok": true, "entries": entries, "omitted": omitted }) +} + +/// One-shot drain: stamp every terminal-status record in `session_id` that +/// lacks `delivered_in_turn_id`. Intended for operator recovery after a +/// large backlog accumulates (e.g. when the orchestrator was offline or +/// `consume_undelivered` was unreachable). Pending records are untouched — +/// use `sweep_session` if you want to expire them first. +pub async fn handle_flush_delivered( + bus: &dyn StateBus, + state_scope: &str, + payload: Value, +) -> Value { + let session_id = payload + .get("session_id") + .and_then(Value::as_str) + .unwrap_or(""); + let turn_id = payload.get("turn_id").and_then(Value::as_str).unwrap_or(""); + if session_id.is_empty() || turn_id.is_empty() { + return json!({ "ok": false, "error": "missing_session_or_turn_id", "stamped": 0 }); + } + let prefix = format!("{session_id}/"); + let all = bus.list_prefix(state_scope, &prefix).await; + let mut stamped = 0_u64; + for rec in all { + let status = rec.get("status").and_then(Value::as_str).unwrap_or(""); + if !is_terminal_status(status) { + continue; + } + if rec + .get("delivered_in_turn_id") + .is_some_and(|v| !v.is_null()) + { + continue; + } + let cid = rec + .get("function_call_id") + .and_then(Value::as_str) + .map(str::to_string) + .unwrap_or_default(); + if cid.is_empty() { + continue; + } + let mut next = rec; + next.as_object_mut().unwrap().insert( + "delivered_in_turn_id".into(), + Value::String(turn_id.to_string()), + ); + if bus + .set(state_scope, &pending_key(session_id, &cid), next) + .await + .is_ok() + { + stamped += 1; + } + } + json!({ "ok": true, "stamped": stamped }) +} + +/// Sweep all still-pending approvals for a session to timed_out. +/// +/// The `timed_out` status is self-describing per the Denial refactor — +/// callers no longer pass (or get back) a reason string. If you need to +/// distinguish *why* a session was swept (delete vs. abort vs. timeout), +/// the calling worker already has that context and should log it there. +pub async fn handle_sweep_session(bus: &dyn StateBus, state_scope: &str, payload: Value) -> Value { + let session_id = payload + .get("session_id") + .and_then(Value::as_str) + .unwrap_or(""); + if session_id.is_empty() { + return json!({ "ok": false, "error": "missing_session_id", "swept": 0 }); + } + let prefix = format!("{session_id}/"); + let all = bus.list_prefix(state_scope, &prefix).await; + let mut swept = 0_u64; + for rec in all { + if rec.get("status").and_then(Value::as_str) != Some("pending") { + continue; + } + let call_id = rec + .get("function_call_id") + .and_then(Value::as_str) + .unwrap_or(""); + if call_id.is_empty() { + continue; + } + let flipped = transition_record(&rec, "timed_out", None, None, None); + if bus + .set(state_scope, &pending_key(session_id, call_id), flipped) + .await + .is_ok() + { + swept += 1; + } + } + json!({ "ok": true, "swept": swept }) +} diff --git a/approval-gate/src/lib.rs b/approval-gate/src/lib.rs index cfba72a6..fcee94ab 100644 --- a/approval-gate/src/lib.rs +++ b/approval-gate/src/lib.rs @@ -3,6 +3,7 @@ //! waiting for the UI to call `approval::resolve` (or for a timeout). pub mod config; +pub mod delivery; pub mod intercept; pub mod lifecycle; pub mod manifest; @@ -13,6 +14,10 @@ pub mod state; pub mod wire; pub use config::{InterceptorRule, WorkerConfig}; +pub use delivery::{ + handle_ack_delivered, handle_consume_undelivered, handle_flush_delivered, handle_list_pending, + handle_list_undelivered, handle_sweep_session, LIST_UNDELIVERED_DEFAULT_LIMIT, +}; pub use intercept::handle_intercept; pub use lifecycle::{ build_pending_record, collect_timed_out_for_sweep, is_terminal_status, maybe_flip_timed_out, @@ -68,291 +73,6 @@ pub struct Refs { } -pub async fn handle_list_pending(bus: &dyn StateBus, state_scope: &str, payload: Value) -> Value { - let session_id = payload - .get("session_id") - .and_then(Value::as_str) - .unwrap_or(""); - if session_id.is_empty() { - return json!({ "pending": [] }); - } - let prefix = format!("{session_id}/"); - let all = bus.list_prefix(state_scope, &prefix).await; - let pending: Vec = all - .into_iter() - .filter(|v| v.get("status").and_then(Value::as_str) == Some("pending")) - .collect(); - json!({ "pending": pending }) -} - -/// Default cap for `handle_list_undelivered` responses. A single LLM turn -/// should never be asked to ingest more than this many stitched approval -/// messages; older entries beyond the cap stay unacked and are reported via -/// the `omitted` counter so the caller can render a summary line. -pub const LIST_UNDELIVERED_DEFAULT_LIMIT: usize = 50; - -/// Return terminal-status records for a session that haven't been stamped -/// with `delivered_in_turn_id`. Lazy timeout: pending records past -/// `expires_at` (as observed at `now_ms`) are flipped to `timed_out` before -/// the filter so they surface here in the same call. -/// -/// Sorted oldest-first by `resolved_at` (records missing `resolved_at` sort -/// last as `u64::MAX`). Capped at `limit` (default -/// [`LIST_UNDELIVERED_DEFAULT_LIMIT`]); the response always includes an -/// `omitted` field counting entries left behind. -pub async fn handle_list_undelivered( - bus: &dyn StateBus, - state_scope: &str, - payload: Value, - now_ms: u64, -) -> Value { - let session_id = payload - .get("session_id") - .and_then(Value::as_str) - .unwrap_or(""); - if session_id.is_empty() { - return json!({ "entries": [], "omitted": 0 }); - } - let limit = payload - .get("limit") - .and_then(Value::as_u64) - .map(|n| n as usize) - .unwrap_or(LIST_UNDELIVERED_DEFAULT_LIMIT); - let prefix = format!("{session_id}/"); - let all = bus.list_prefix(state_scope, &prefix).await; - let mut entries: Vec = Vec::new(); - for rec in all { - // Defensive scope: some bus backends ignore the prefix and return - // every record in `state_scope`. Drop anything not stamped with - // the session_id we're listing for. Orphan records lacking a - // session_id stamp are dropped (cannot be attributed); the - // migration path that used to recover them no longer exists. - match rec.get("session_id").and_then(Value::as_str) { - Some(sid) if sid == session_id => {} - _ => continue, - } - let rec = if let Some(flipped) = maybe_flip_timed_out(&rec, now_ms) { - let call_id = flipped - .get("function_call_id") - .and_then(Value::as_str) - .unwrap_or(""); - let _ = bus - .set( - state_scope, - &pending_key(session_id, call_id), - flipped.clone(), - ) - .await; - flipped - } else { - rec - }; - let status = rec.get("status").and_then(Value::as_str).unwrap_or(""); - if !is_terminal_status(status) { - continue; - } - if rec - .get("delivered_in_turn_id") - .is_some_and(|v| !v.is_null()) - { - continue; - } - entries.push(rec); - } - entries.sort_by_key(|e| { - e.get("resolved_at") - .and_then(Value::as_u64) - .unwrap_or(u64::MAX) - }); - let total = entries.len(); - let omitted = total.saturating_sub(limit); - entries.truncate(limit); - json!({ "entries": entries, "omitted": omitted }) -} - -/// Stamp `delivered_in_turn_id` on terminal-status records named in -/// `call_ids` for the given session. Idempotent: records already stamped -/// (non-null `delivered_in_turn_id`) are not overwritten. Unknown call ids -/// are silently skipped. -pub async fn handle_ack_delivered(bus: &dyn StateBus, state_scope: &str, payload: Value) -> Value { - let session_id = payload - .get("session_id") - .and_then(Value::as_str) - .unwrap_or(""); - let turn_id = payload.get("turn_id").and_then(Value::as_str).unwrap_or(""); - let call_ids: Vec = payload - .get("call_ids") - .and_then(|v| v.as_array()) - .map(|arr| { - arr.iter() - .filter_map(|v| v.as_str().map(str::to_string)) - .collect() - }) - .unwrap_or_default(); - if session_id.is_empty() || turn_id.is_empty() || call_ids.is_empty() { - return json!({ "ok": true, "stamped": 0 }); - } - let mut stamped = 0_u64; - for cid in call_ids { - let key = pending_key(session_id, &cid); - let Some(rec) = bus.get(state_scope, &key).await else { - continue; - }; - if rec - .get("delivered_in_turn_id") - .is_some_and(|v| !v.is_null()) - { - continue; - } - let mut next = rec; - next.as_object_mut().unwrap().insert( - "delivered_in_turn_id".into(), - Value::String(turn_id.to_string()), - ); - if bus.set(state_scope, &key, next).await.is_ok() { - stamped += 1; - } - } - json!({ "ok": true, "stamped": stamped }) -} - -/// Atomic list+ack: returns the same entries `handle_list_undelivered` would -/// surface (subject to the same FIFO+cap rules) and stamps each one with -/// `delivered_in_turn_id` before returning. Eliminates the list→LLM→ack -/// race window: if the caller crashes after receiving the response, the -/// entries are still considered delivered and will not resurface, which is -/// acceptable because terminal records are informational (the side-effect -/// already executed inside the gate). -/// -/// Required payload: `{ session_id, turn_id, limit? }`. -pub async fn handle_consume_undelivered( - bus: &dyn StateBus, - state_scope: &str, - payload: Value, - now_ms: u64, -) -> Value { - let turn_id = payload.get("turn_id").and_then(Value::as_str).unwrap_or(""); - if turn_id.is_empty() { - return json!({ "ok": false, "error": "missing_turn_id", "entries": [], "omitted": 0 }); - } - let listed = handle_list_undelivered(bus, state_scope, payload.clone(), now_ms).await; - let session_id = payload - .get("session_id") - .and_then(Value::as_str) - .unwrap_or(""); - let entries = listed["entries"].as_array().cloned().unwrap_or_default(); - let omitted = listed["omitted"].as_u64().unwrap_or(0); - for rec in &entries { - let cid = rec - .get("function_call_id") - .and_then(Value::as_str) - .unwrap_or(""); - if cid.is_empty() { - continue; - } - let key = pending_key(session_id, cid); - let mut stamped = rec.clone(); - stamped.as_object_mut().unwrap().insert( - "delivered_in_turn_id".into(), - Value::String(turn_id.to_string()), - ); - let _ = bus.set(state_scope, &key, stamped).await; - } - json!({ "ok": true, "entries": entries, "omitted": omitted }) -} - -/// One-shot drain: stamp every terminal-status record in `session_id` that -/// lacks `delivered_in_turn_id`. Intended for operator recovery after a -/// large backlog accumulates (e.g. when the orchestrator was offline or -/// `consume_undelivered` was unreachable). Pending records are untouched — -/// use `sweep_session` if you want to expire them first. -pub async fn handle_flush_delivered(bus: &dyn StateBus, state_scope: &str, payload: Value) -> Value { - let session_id = payload - .get("session_id") - .and_then(Value::as_str) - .unwrap_or(""); - let turn_id = payload.get("turn_id").and_then(Value::as_str).unwrap_or(""); - if session_id.is_empty() || turn_id.is_empty() { - return json!({ "ok": false, "error": "missing_session_or_turn_id", "stamped": 0 }); - } - let prefix = format!("{session_id}/"); - let all = bus.list_prefix(state_scope, &prefix).await; - let mut stamped = 0_u64; - for rec in all { - let status = rec.get("status").and_then(Value::as_str).unwrap_or(""); - if !is_terminal_status(status) { - continue; - } - if rec - .get("delivered_in_turn_id") - .is_some_and(|v| !v.is_null()) - { - continue; - } - let cid = rec - .get("function_call_id") - .and_then(Value::as_str) - .map(str::to_string) - .unwrap_or_default(); - if cid.is_empty() { - continue; - } - let mut next = rec; - next.as_object_mut().unwrap().insert( - "delivered_in_turn_id".into(), - Value::String(turn_id.to_string()), - ); - if bus - .set(state_scope, &pending_key(session_id, &cid), next) - .await - .is_ok() - { - stamped += 1; - } - } - json!({ "ok": true, "stamped": stamped }) -} - -/// Sweep all still-pending approvals for a session to timed_out. -/// -/// The `timed_out` status is self-describing per the Denial refactor — -/// callers no longer pass (or get back) a reason string. If you need to -/// distinguish *why* a session was swept (delete vs. abort vs. timeout), -/// the calling worker already has that context and should log it there. -pub async fn handle_sweep_session(bus: &dyn StateBus, state_scope: &str, payload: Value) -> Value { - let session_id = payload - .get("session_id") - .and_then(Value::as_str) - .unwrap_or(""); - if session_id.is_empty() { - return json!({ "ok": false, "error": "missing_session_id", "swept": 0 }); - } - let prefix = format!("{session_id}/"); - let all = bus.list_prefix(state_scope, &prefix).await; - let mut swept = 0_u64; - for rec in all { - if rec.get("status").and_then(Value::as_str) != Some("pending") { - continue; - } - let call_id = rec - .get("function_call_id") - .and_then(Value::as_str) - .unwrap_or(""); - if call_id.is_empty() { - continue; - } - let flipped = transition_record(&rec, "timed_out", None, None, None); - if bus - .set(state_scope, &pending_key(session_id, call_id), flipped) - .await - .is_ok() - { - swept += 1; - } - } - json!({ "ok": true, "swept": swept }) -} - fn uuid_like() -> String { // Lightweight unique-ish id without pulling uuid in: ns timestamp + counter. use std::sync::atomic::{AtomicU64, Ordering}; From 29f4c955a889818d173d1d6c829bc93a31c019d1 Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Fri, 15 May 2026 20:22:31 -0300 Subject: [PATCH 11/30] refactor(approval-gate): extract sweeper.rs from lib.rs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Seventh step of breaking lib.rs into focused modules. This commit lifts the periodic timeout sweeper and the small iii-stream helpers it owns into their own module. Moved (lib.rs -> sweeper.rs): - spawn_timeout_sweeper() - timeout_resolved_event() - write_event() - write_hook_reply() - uuid_like() All five stay pub(crate) — only the register() wiring in lib.rs and the resolve flow need them. Pure move, no behavior change. 141 approval-gate tests pass. --- approval-gate/src/lib.rs | 107 +--------------------------- approval-gate/src/sweeper.rs | 133 +++++++++++++++++++++++++++++++++++ 2 files changed, 136 insertions(+), 104 deletions(-) create mode 100644 approval-gate/src/sweeper.rs diff --git a/approval-gate/src/lib.rs b/approval-gate/src/lib.rs index fcee94ab..b249aa9e 100644 --- a/approval-gate/src/lib.rs +++ b/approval-gate/src/lib.rs @@ -11,8 +11,11 @@ pub mod record; pub mod resolve; pub mod rules; pub mod state; +pub mod sweeper; pub mod wire; +use sweeper::{spawn_timeout_sweeper, timeout_resolved_event, write_event, write_hook_reply}; + pub use config::{InterceptorRule, WorkerConfig}; pub use delivery::{ handle_ack_delivered, handle_consume_undelivered, handle_flush_delivered, handle_list_pending, @@ -73,110 +76,6 @@ pub struct Refs { } -fn uuid_like() -> String { - // Lightweight unique-ish id without pulling uuid in: ns timestamp + counter. - use std::sync::atomic::{AtomicU64, Ordering}; - static C: AtomicU64 = AtomicU64::new(0); - let n = C.fetch_add(1, Ordering::Relaxed); - let t = std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .map(|d| d.as_nanos()) - .unwrap_or(0); - format!("{t:x}-{n:x}") -} - -async fn write_event(iii: &III, session_id: &str, event: &Value) { - let _ = iii - .trigger(TriggerRequest { - function_id: "stream::set".into(), - payload: json!({ - "stream_name": "agent::events", - "group_id": session_id, - "item_id": format!("approval-{}", uuid_like()), - "data": event, - }), - action: None, - timeout_ms: None, - }) - .await; -} - -/// Build the `approval_resolved` event a sweeper emits when it auto-flips an -/// expired pending record. Pure — caller pumps the result onto the stream. -fn timeout_resolved_event(function_call_id: &str) -> Value { - // Timed-out approvals carry no Denial — the `status: "timed_out"` is - // self-describing per the Denial refactor. Consumers (turn-orchestrator - // stitching, UIs) render the timeout from the status alone. - json!({ - "type": "approval_resolved", - "function_call_id": function_call_id, - "tool_call_id": function_call_id, - "decision": "deny", - "status": "timed_out", - }) -} - -/// Spawn the periodic timeout sweeper. The task ticks every `interval_ms`, -/// scans the configured state scope, and for any pending record whose -/// `expires_at` is in the past: writes the flipped record back and emits an -/// `approval_resolved` (status=timed_out) frame on `agent::events/`. -/// -/// The previous design relied on lazy timeout flips during -/// `handle_resolve`/`handle_list_undelivered`. Operators who never opened the -/// UI for a session would leave its pending rows in `pending` forever and -/// the paused turn-orchestrator would never see a decision. Active sweeping -/// closes that hole. -fn spawn_timeout_sweeper( - iii: III, - bus: Arc, - state_scope: String, - interval_ms: u64, -) -> tokio::task::JoinHandle<()> { - tokio::spawn(async move { - let mut ticker = - tokio::time::interval(std::time::Duration::from_millis(interval_ms.max(50))); - ticker.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip); - // Drop the immediate first tick so we don't sweep before any - // pending row could possibly exist. - ticker.tick().await; - loop { - ticker.tick().await; - let now_ms = std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .map(|d| d.as_millis() as u64) - .unwrap_or(0); - let all = bus.list_prefix(&state_scope, "").await; - for (key, flipped, session_id, call_id) in collect_timed_out_for_sweep(&all, now_ms) { - if let Err(err) = bus.set(&state_scope, &key, flipped).await { - tracing::warn!( - "approval-gate sweeper: failed to flip {key} → timed_out: {err}" - ); - continue; - } - write_event(&iii, &session_id, &timeout_resolved_event(&call_id)).await; - } - } - }) -} - -async fn write_hook_reply(iii: &III, stream_name: &str, event_id: &str, reply: &Value) { - if stream_name.is_empty() || event_id.is_empty() { - return; - } - let _ = iii - .trigger(TriggerRequest { - function_id: "stream::set".into(), - payload: json!({ - "stream_name": stream_name, - "group_id": event_id, - "item_id": uuid_like(), - "data": reply, - }), - action: None, - timeout_ms: None, - }) - .await; -} pub fn register(iii: &III, cfg: &WorkerConfig) -> anyhow::Result { let rules: Arc> = Arc::new(cfg.interceptors.clone()); diff --git a/approval-gate/src/sweeper.rs b/approval-gate/src/sweeper.rs new file mode 100644 index 00000000..660ec219 --- /dev/null +++ b/approval-gate/src/sweeper.rs @@ -0,0 +1,133 @@ +//! Periodic timeout sweeper + stream-event helpers. +//! +//! The sweeper runs as a background task: every `interval_ms` it scans +//! the configured state scope, promotes any pending record past its +//! `expires_at` to `timed_out`, and emits the resulting +//! `approval_resolved` event on `agent::events/` so the +//! orchestrator sees the timeout without having to poll. +//! +//! [`write_event`] and [`write_hook_reply`] are the two iii stream +//! writes the gate makes; they live here because the sweeper is their +//! primary caller (the resolve flow also uses them, but their shape is +//! tied to the events-stream contract that the sweeper owns). + +use std::sync::Arc; + +use iii_sdk::{TriggerRequest, III}; +use serde_json::{json, Value}; + +use crate::lifecycle::collect_timed_out_for_sweep; +use crate::state::StateBus; + +/// Lightweight unique-ish id without pulling uuid in: ns timestamp + counter. +/// Used as the `item_id` for stream writes so two appends from the same +/// process don't collide. +pub(crate) fn uuid_like() -> String { + use std::sync::atomic::{AtomicU64, Ordering}; + static C: AtomicU64 = AtomicU64::new(0); + let n = C.fetch_add(1, Ordering::Relaxed); + let t = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_nanos()) + .unwrap_or(0); + format!("{t:x}-{n:x}") +} + +/// Append `event` to the `agent::events` stream for `session_id`. Used by +/// the sweeper (timeout flips) and by the resolve closure (post-resolve +/// `approval_resolved` frame). Fire-and-forget: errors are swallowed +/// because the persisted record is the source of truth — orchestrators +/// re-derive state from `approval::list_undelivered` if a frame is lost. +pub(crate) async fn write_event(iii: &III, session_id: &str, event: &Value) { + let _ = iii + .trigger(TriggerRequest { + function_id: "stream::set".into(), + payload: json!({ + "stream_name": "agent::events", + "group_id": session_id, + "item_id": format!("approval-{}", uuid_like()), + "data": event, + }), + action: None, + timeout_ms: None, + }) + .await; +} + +/// Build the `approval_resolved` event a sweeper emits when it auto-flips an +/// expired pending record. Pure — caller pumps the result onto the stream. +pub(crate) fn timeout_resolved_event(function_call_id: &str) -> Value { + // Timed-out approvals carry no Denial — the `status: "timed_out"` is + // self-describing per the Denial refactor. Consumers (turn-orchestrator + // stitching, UIs) render the timeout from the status alone. + json!({ + "type": "approval_resolved", + "function_call_id": function_call_id, + "tool_call_id": function_call_id, + "decision": "deny", + "status": "timed_out", + }) +} + +/// Spawn the periodic timeout sweeper. The task ticks every `interval_ms`, +/// scans the configured state scope, and for any pending record whose +/// `expires_at` is in the past: writes the flipped record back and emits an +/// `approval_resolved` (status=timed_out) frame on `agent::events/`. +/// +/// Active sweeping closes the gap left by lazy flips: operators who never +/// open the UI for a session would otherwise leave its pending rows in +/// `pending` forever and the paused orchestrator would never see a +/// decision. +pub(crate) fn spawn_timeout_sweeper( + iii: III, + bus: Arc, + state_scope: String, + interval_ms: u64, +) -> tokio::task::JoinHandle<()> { + tokio::spawn(async move { + let mut ticker = + tokio::time::interval(std::time::Duration::from_millis(interval_ms.max(50))); + ticker.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip); + // Drop the immediate first tick so we don't sweep before any + // pending row could possibly exist. + ticker.tick().await; + loop { + ticker.tick().await; + let now_ms = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_millis() as u64) + .unwrap_or(0); + let all = bus.list_prefix(&state_scope, "").await; + for (key, flipped, session_id, call_id) in collect_timed_out_for_sweep(&all, now_ms) { + if let Err(err) = bus.set(&state_scope, &key, flipped).await { + tracing::warn!( + "approval-gate sweeper: failed to flip {key} → timed_out: {err}" + ); + continue; + } + write_event(&iii, &session_id, &timeout_resolved_event(&call_id)).await; + } + } + }) +} + +/// Append a hook reply onto `stream_name` keyed by `event_id`. No-op when +/// either id is empty so a malformed envelope can't crash the gate. +pub(crate) async fn write_hook_reply(iii: &III, stream_name: &str, event_id: &str, reply: &Value) { + if stream_name.is_empty() || event_id.is_empty() { + return; + } + let _ = iii + .trigger(TriggerRequest { + function_id: "stream::set".into(), + payload: json!({ + "stream_name": stream_name, + "group_id": event_id, + "item_id": uuid_like(), + "data": reply, + }), + action: None, + timeout_ms: None, + }) + .await; +} From b6e34026e392b77a12f2823302a3e92cea134532 Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Fri, 15 May 2026 20:28:46 -0300 Subject: [PATCH 12/30] refactor(approval-gate): extract register.rs from lib.rs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Final step of breaking lib.rs into focused modules. This commit lifts the iii function/trigger wiring — the heart of the worker startup path — into its own module. Moved (lib.rs -> register.rs): - register() (~420 lines: the subscriber closure + 8 function registrations + trigger registration + sweeper spawn) - Refs struct (the handle bag returned to the binary) - FN_RESOLVE / FN_LIST_PENDING / FN_LIST_UNDELIVERED / FN_CONSUME_UNDELIVERED / FN_ACK_DELIVERED / FN_FLUSH_DELIVERED / FN_SWEEP_SESSION / FN_LOOKUP_RECORD constants - STATE_SCOPE constant lib.rs is now ~50 lines of module declarations + re-exports + ~2580 lines of inline tests. The test mod uses super::* plus local serde_json and std::sync imports. All public items stay re-exported from the crate root so the binary's `use approval_gate::register;` keeps working unchanged. Pure move, no behavior change. 141 approval-gate tests pass; turn-orchestrator + harness + shell verified green (turn-orchestrator's pre-existing dual_write flake notwithstanding). --- approval-gate/src/lib.rs | 479 ++------------------------------- approval-gate/src/register.rs | 489 ++++++++++++++++++++++++++++++++++ 2 files changed, 505 insertions(+), 463 deletions(-) create mode 100644 approval-gate/src/register.rs diff --git a/approval-gate/src/lib.rs b/approval-gate/src/lib.rs index b249aa9e..3630eb59 100644 --- a/approval-gate/src/lib.rs +++ b/approval-gate/src/lib.rs @@ -8,14 +8,13 @@ pub mod intercept; pub mod lifecycle; pub mod manifest; pub mod record; +pub mod register; pub mod resolve; pub mod rules; pub mod state; pub mod sweeper; pub mod wire; -use sweeper::{spawn_timeout_sweeper, timeout_resolved_event, write_event, write_hook_reply}; - pub use config::{InterceptorRule, WorkerConfig}; pub use delivery::{ handle_ack_delivered, handle_consume_undelivered, handle_flush_delivered, handle_list_pending, @@ -27,6 +26,10 @@ pub use lifecycle::{ transition_record, transition_record_with_now, }; pub use record::{Next, Record, Status}; +pub use register::{ + register, Refs, FN_ACK_DELIVERED, FN_CONSUME_UNDELIVERED, FN_FLUSH_DELIVERED, FN_LIST_PENDING, + FN_LIST_UNDELIVERED, FN_LOOKUP_RECORD, FN_RESOLVE, FN_SWEEP_SESSION, STATE_SCOPE, +}; pub use resolve::{handle_lookup_record, handle_resolve}; pub use state::{ unverified_marker_targets, FunctionExecutor, IiiFunctionExecutor, IiiStateBus, StateBus, @@ -35,473 +38,23 @@ pub use wire::{ block_reply_for, extract_call, pending_key, Decision, Denial, IncomingCall, WireDecision, }; -use intercept::{decide_intercept_action, interpret_classifier_reply, ClassifierDecision, InterceptAction, PolicyOutcome, apply_policy_rules}; -use state::rule_for; +// Test-only re-imports so the inline `mod tests` below keeps working +// without an unreasonable churn pass over its assertions. #[cfg(test)] -use state::merge_from_approval_marker_if_needed; - -use std::sync::{Arc, RwLock}; - -use iii_sdk::{ - FunctionRef, IIIError, RegisterFunctionMessage, RegisterTriggerInput, TriggerRequest, III, +use intercept::{ + apply_policy_rules, decide_intercept_action, interpret_classifier_reply, ClassifierDecision, + InterceptAction, PolicyOutcome, }; -use serde_json::{json, Value}; - -pub const FN_RESOLVE: &str = "approval::resolve"; -pub const FN_LIST_PENDING: &str = "approval::list_pending"; -pub const FN_LIST_UNDELIVERED: &str = "approval::list_undelivered"; -pub const FN_CONSUME_UNDELIVERED: &str = "approval::consume_undelivered"; -pub const FN_ACK_DELIVERED: &str = "approval::ack_delivered"; -pub const FN_FLUSH_DELIVERED: &str = "approval::flush_delivered"; -pub const FN_SWEEP_SESSION: &str = "approval::sweep_session"; -pub const FN_LOOKUP_RECORD: &str = "approval::lookup_record"; -/// Default `approval_state_scope` (matches [`WorkerConfig::default`]). -pub const STATE_SCOPE: &str = "approvals"; - -pub struct Refs { - pub resolve: FunctionRef, - pub list_pending: FunctionRef, - pub list_undelivered: FunctionRef, - pub consume_undelivered: FunctionRef, - pub ack_delivered: FunctionRef, - pub flush_delivered: FunctionRef, - pub sweep_session: FunctionRef, - pub lookup_record: FunctionRef, - pub subscriber_fn: FunctionRef, - pub subscriber_trigger: iii_sdk::Trigger, - /// Background task that flips expired pending records to `timed_out` and - /// emits the corresponding `approval_resolved` events. Kept alive by - /// virtue of being held here; aborts when the worker shuts down. - pub sweeper: tokio::task::JoinHandle<()>, -} - - - -pub fn register(iii: &III, cfg: &WorkerConfig) -> anyhow::Result { - let rules: Arc> = Arc::new(cfg.interceptors.clone()); - // Layered policy rules consulted before the per-function interceptor - // flow. Wrapped in RwLock so a user reply with `always: true` on - // `approval::resolve` can push a new Allow rule at runtime (see the - // cascade in `handle_resolve`). See [`crate::rules`]. - let policy_rules: Arc> = - Arc::new(RwLock::new(cfg.rules.clone())); - - // Fail fast on honor-system markers: any interceptor that asks the gate - // to inject `__from_approval` MUST also assert the target validates it. - // Without that assertion the marker is purely decorative and the gate - // has no way to know whether bypass-through-direct-trigger is contained. - let unverified = unverified_marker_targets(rules.as_slice()); - if !unverified.is_empty() { - return Err(anyhow::anyhow!( - "approval-gate: refusing to start — interceptors with inject_approval_marker=true \ - must also set marker_target_verified=true (target is asserted to validate \ - __from_approval against approval::lookup_record). Unverified: {unverified:?}" - )); - } - - for rule in rules.iter() { - if let Some(cid) = rule.classifier.as_deref() { - if cid == FN_LOOKUP_RECORD - || cid == FN_RESOLVE - || cid == FN_LIST_PENDING - || cid == FN_LIST_UNDELIVERED - || cid == FN_ACK_DELIVERED - || cid == FN_SWEEP_SESSION - { - tracing::warn!( - "approval-gate: interceptor for {:?} uses classifier {:?} which aliases an approval endpoint; fix config", - rule.function_id, - cid - ); - } - } - } - - let bus: Arc = Arc::new(IiiStateBus(iii.clone())); - let timeout_ms = cfg.default_timeout_ms; - let topic = cfg.topic.clone(); - let state_scope = cfg.approval_state_scope.clone(); - - let bus_for_resolve = bus.clone(); - let scope_resolve = state_scope.clone(); - let exec_for_resolve: Arc = Arc::new(IiiFunctionExecutor { - iii: iii.clone(), - rules: rules.clone(), - }); - let iii_for_resolve = iii.clone(); - let policy_rules_for_resolve = policy_rules.clone(); - let resolve = iii.register_function(( - RegisterFunctionMessage::with_id(FN_RESOLVE.into()).with_description( - "Resolve a pending approval. On allow, invokes the underlying function; \ - on deny, records the denial. With `always: true` on an allow reply, \ - a runtime rule is added so future calls to this function id auto-allow, \ - and the session's other pending calls newly matching are cascade-resolved. \ - The result is stitched into the agent's next turn as a system message." - .into(), - ), - move |payload: Value| { - let bus = bus_for_resolve.clone(); - let exec = exec_for_resolve.clone(); - let scope_resolve = scope_resolve.clone(); - let iii = iii_for_resolve.clone(); - let policy_rules = policy_rules_for_resolve.clone(); - async move { - let now_ms = std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .map(|d| d.as_millis() as u64) - .unwrap_or(0); - let resp = handle_resolve( - bus.as_ref(), - exec.as_ref(), - &scope_resolve, - &policy_rules, - payload.clone(), - now_ms, - ) - .await; - - if resp.get("ok").and_then(Value::as_bool) == Some(true) { - let session_id = payload - .get("session_id") - .and_then(Value::as_str) - .unwrap_or(""); - let call_id = payload - .get("function_call_id") - .or_else(|| payload.get("tool_call_id")) - .and_then(Value::as_str) - .unwrap_or(""); - if !session_id.is_empty() && !call_id.is_empty() { - let key = pending_key(session_id, call_id); - if let Some(final_rec) = bus.get(&scope_resolve, &key).await { - let mut evt = json!({ - "type": "approval_resolved", - "function_call_id": call_id, - "tool_call_id": call_id, - }); - if let Some(status) = final_rec.get("status").and_then(Value::as_str) { - evt["decision"] = match status { - "executed" | "approved" => json!("allow"), - _ => json!("deny"), - }; - evt["status"] = json!(status); - } - if let Some(r) = final_rec.get("result") { - evt["result"] = json!(r); - } - if let Some(e) = final_rec.get("error") { - evt["error"] = json!(e); - } - if let Some(denial) = final_rec.get("denial") { - evt["denial"] = denial.clone(); - } - write_event(&iii, session_id, &evt).await; - } - } - } - Ok::<_, IIIError>(resp) - } - }, - )); - - let bus_for_list = bus.clone(); - let scope_list = state_scope.clone(); - let list_pending = iii.register_function(( - RegisterFunctionMessage::with_id(FN_LIST_PENDING.into()) - .with_description("Return pending approvals for a session.".into()), - move |payload: Value| { - let bus = bus_for_list.clone(); - let scope_list = scope_list.clone(); - async move { - Ok::<_, IIIError>(handle_list_pending(bus.as_ref(), &scope_list, payload).await) - } - }, - )); - - let bus_for_list_undelivered = bus.clone(); - let scope_list_undelivered = state_scope.clone(); - let list_undelivered = iii.register_function(( - RegisterFunctionMessage::with_id(FN_LIST_UNDELIVERED.into()).with_description( - "Return resolved approval records for a session that haven't yet been stitched \ - into an LLM turn. Lazy-flips expired pendings to timed_out." - .into(), - ), - move |payload: Value| { - let bus = bus_for_list_undelivered.clone(); - let scope = scope_list_undelivered.clone(); - async move { - let now_ms = std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .map(|d| d.as_millis() as u64) - .unwrap_or(0); - Ok::<_, IIIError>( - handle_list_undelivered(bus.as_ref(), &scope, payload, now_ms).await, - ) - } - }, - )); - - let bus_for_consume = bus.clone(); - let scope_consume = state_scope.clone(); - let consume_undelivered = iii.register_function(( - RegisterFunctionMessage::with_id(FN_CONSUME_UNDELIVERED.into()).with_description( - "Atomic list+ack of resolved approval records. Returns the same FIFO-capped \ - slice as list_undelivered AND stamps each entry with delivered_in_turn_id \ - before returning. Required payload: {session_id, turn_id, limit?}." - .into(), - ), - move |payload: Value| { - let bus = bus_for_consume.clone(); - let scope = scope_consume.clone(); - async move { - let now_ms = std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .map(|d| d.as_millis() as u64) - .unwrap_or(0); - Ok::<_, IIIError>( - handle_consume_undelivered(bus.as_ref(), &scope, payload, now_ms).await, - ) - } - }, - )); - - let bus_for_ack = bus.clone(); - let scope_ack = state_scope.clone(); - let ack_delivered = - iii.register_function(( - RegisterFunctionMessage::with_id(FN_ACK_DELIVERED.into()).with_description( - "Stamp delivered_in_turn_id on resolved approvals so they aren't replayed \ - in subsequent turns. Idempotent." - .into(), - ), - move |payload: Value| { - let bus = bus_for_ack.clone(); - let scope = scope_ack.clone(); - async move { - Ok::<_, IIIError>(handle_ack_delivered(bus.as_ref(), &scope, payload).await) - } - }, - )); - - let bus_for_flush = bus.clone(); - let scope_flush = state_scope.clone(); - let flush_delivered = iii.register_function(( - RegisterFunctionMessage::with_id(FN_FLUSH_DELIVERED.into()).with_description( - "Stamp every unacked terminal approval record in a session as \ - delivered. One-shot operator recovery for backlog accumulation. \ - Required payload: {session_id, turn_id}." - .into(), - ), - move |payload: Value| { - let bus = bus_for_flush.clone(); - let scope = scope_flush.clone(); - async move { - Ok::<_, IIIError>(handle_flush_delivered(bus.as_ref(), &scope, payload).await) - } - }, - )); - - let bus_for_sweep = bus.clone(); - let scope_sweep = state_scope.clone(); - let sweep_session = - iii.register_function(( - RegisterFunctionMessage::with_id(FN_SWEEP_SESSION.into()).with_description( - "Sweep all pending approvals for a session to timed_out. \ - Called when a session is deleted." - .into(), - ), - move |payload: Value| { - let bus = bus_for_sweep.clone(); - let scope = scope_sweep.clone(); - async move { - Ok::<_, IIIError>(handle_sweep_session(bus.as_ref(), &scope, payload).await) - } - }, - )); - - let bus_for_lookup = bus.clone(); - let scope_lookup = state_scope.clone(); - let lookup_record = - iii.register_function(( - RegisterFunctionMessage::with_id(FN_LOOKUP_RECORD.into()).with_description( - "Return the approval state-store record for a session/function_call_id pair; \ - null when absent. Used by shell bypass validation." - .into(), - ), - move |payload: Value| { - let bus = bus_for_lookup.clone(); - let scope = scope_lookup.clone(); - async move { - Ok::<_, IIIError>(handle_lookup_record(bus.as_ref(), &scope, payload).await) - } - }, - )); - - let iii_for_sub = iii.clone(); - let bus_for_sub = bus.clone(); - let subscriber_scope = state_scope.clone(); - let rules_for_sub = rules.clone(); - let policy_rules_for_sub = policy_rules.clone(); - let subscriber_fn = iii.register_function(( - RegisterFunctionMessage::with_id("policy::approval_gate".into()) - .with_description("Pause function calls listed in approval_required.".into()), - move |envelope: Value| { - let iii = iii_for_sub.clone(); - let bus = bus_for_sub.clone(); - let sc = subscriber_scope.clone(); - let intercept_rules = rules_for_sub.clone(); - let policy_rules = policy_rules_for_sub.clone(); - async move { - let Some(call) = extract_call(&envelope) else { - return Ok::<_, IIIError>(json!({ "block": false })); - }; - let now_ms = std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .map(|d| d.as_millis() as u64) - .unwrap_or(0); - - // Layered policy rules run first. Allow / Deny short-circuit; - // Ask (and no-match) falls through to the existing per-function - // interceptor flow. Pattern is "*" in v1 — see `crate::rules`. - // Read-lock is acquired and dropped inside a block so the - // guard never crosses an `.await` (std::sync::RwLock is not - // async-safe to hold across suspension points). - let policy_outcome = { - let guard = policy_rules - .read() - .expect("approval-gate policy rules lock poisoned"); - apply_policy_rules(&guard, &call.function_id) - }; - match policy_outcome { - PolicyOutcome::Allow => { - return Ok::<_, IIIError>(json!({ "block": false })); - } - PolicyOutcome::Deny { - rule_permission, - rule_pattern, - } => { - let denial = Denial::Policy { - classifier_reason: format!( - "rule {rule_permission} {rule_pattern} denies" - ), - classifier_fn: "approval-gate::rules".to_string(), - }; - return Ok::<_, IIIError>(json!({ - "block": true, - "denial": denial, - "status": "denied", - "call_id": call.function_call_id, - "function_id": call.function_id, - })); - } - PolicyOutcome::FallThrough => {} - } - - let action = decide_intercept_action( - rule_for(intercept_rules.as_slice(), &call.function_id), - call.requires_approval(), - ); - let reply = match action { - InterceptAction::Pass => json!({ "block": false }), - InterceptAction::Pause => { - handle_intercept(bus.as_ref(), &sc, &call, now_ms, timeout_ms, false).await - } - InterceptAction::Classify { - classifier_fn, - classifier_timeout_ms, - } => match iii - .trigger(TriggerRequest { - function_id: classifier_fn.clone(), - payload: call.args.clone(), - action: None, - timeout_ms: Some(classifier_timeout_ms), - }) - .await - { - Ok(v) => match interpret_classifier_reply(&v, &classifier_fn) { - Ok(ClassifierDecision::Auto) => json!({ "block": false }), - Ok(ClassifierDecision::Deny(denial)) => json!({ - "block": true, - "denial": denial, - "status": "denied", - "call_id": call.function_call_id, - "function_id": call.function_id, - }), - Ok(ClassifierDecision::Ask) | Err(()) => { - handle_intercept( - bus.as_ref(), - &sc, - &call, - now_ms, - timeout_ms, - true, - ) - .await - } - }, - Err(_) => { - handle_intercept(bus.as_ref(), &sc, &call, now_ms, timeout_ms, true) - .await - } - }, - }; - - if reply.get("status").and_then(Value::as_str) == Some("pending") { - write_event( - &iii, - &call.session_id, - &json!({ - "type": "approval_requested", - "function_call_id": call.function_call_id, - "tool_call_id": call.function_call_id, - "function_id": call.function_id, - "tool_name": call.function_id, - "args": call.args, - "expires_at": now_ms.saturating_add(timeout_ms), - }), - ) - .await; - } - write_hook_reply(&iii, &call.reply_stream, &call.event_id, &reply).await; - Ok(reply) - } - }, - )); - - let subscriber_trigger = iii - .register_trigger(RegisterTriggerInput { - trigger_type: "durable:subscriber".into(), - function_id: "policy::approval_gate".into(), - config: json!({ "topic": topic }), - metadata: None, - }) - .map_err(|e| anyhow::anyhow!(e.to_string()))?; - - let sweeper = spawn_timeout_sweeper( - iii.clone(), - bus.clone(), - state_scope.clone(), - cfg.sweeper_interval_ms, - ); - - Ok(Refs { - resolve, - list_pending, - list_undelivered, - consume_undelivered, - ack_delivered, - flush_delivered, - sweep_session, - lookup_record, - subscriber_fn, - subscriber_trigger, - sweeper, - }) -} +#[cfg(test)] +use state::{merge_from_approval_marker_if_needed, rule_for}; +#[cfg(test)] +use sweeper::timeout_resolved_event; #[cfg(test)] mod tests { use super::*; - use serde_json::json; + use serde_json::{json, Value}; + use std::sync::{Arc, RwLock}; /// Empty policy ruleset for tests that exercise [`handle_resolve`] /// without cascading. Each call freshly constructs the lock so unit diff --git a/approval-gate/src/register.rs b/approval-gate/src/register.rs new file mode 100644 index 00000000..a6d9617c --- /dev/null +++ b/approval-gate/src/register.rs @@ -0,0 +1,489 @@ +//! iii function/trigger wiring. +//! +//! [`register`] is the entry point the binary calls at startup. It +//! constructs the shared `StateBus` + `FunctionExecutor`, hooks every +//! `approval::*` function id, registers the `policy::approval_gate` +//! subscriber on the configured topic, spawns the timeout sweeper, and +//! returns a [`Refs`] handle whose contents keep all the function +//! registrations and the sweeper task alive for the worker's lifetime. +//! +//! The subscriber closure is the only piece of non-trivial logic in +//! this module — it composes the three decision layers documented in +//! [`crate::intercept`] and writes the resulting hook reply onto the +//! envelope's reply stream. + +use std::sync::{Arc, RwLock}; + +use iii_sdk::{ + FunctionRef, IIIError, RegisterFunctionMessage, RegisterTriggerInput, TriggerRequest, III, +}; +use serde_json::{json, Value}; + +use crate::config::{InterceptorRule, WorkerConfig}; +use crate::delivery::{ + handle_ack_delivered, handle_consume_undelivered, handle_flush_delivered, handle_list_pending, + handle_list_undelivered, handle_sweep_session, +}; +use crate::intercept::{ + apply_policy_rules, decide_intercept_action, handle_intercept, interpret_classifier_reply, + ClassifierDecision, InterceptAction, PolicyOutcome, +}; +use crate::resolve::{handle_lookup_record, handle_resolve}; +use crate::rules; +use crate::state::{ + rule_for, unverified_marker_targets, FunctionExecutor, IiiFunctionExecutor, IiiStateBus, + StateBus, +}; +use crate::sweeper::{spawn_timeout_sweeper, write_event, write_hook_reply}; +use crate::wire::{extract_call, pending_key, Denial}; + +/// The iii function ids registered by [`register`]. Operators must not +/// alias these on any classifier — the boot guard logs a warning when +/// a misconfiguration is detected, see [`register`]. +pub const FN_RESOLVE: &str = "approval::resolve"; +pub const FN_LIST_PENDING: &str = "approval::list_pending"; +pub const FN_LIST_UNDELIVERED: &str = "approval::list_undelivered"; +pub const FN_CONSUME_UNDELIVERED: &str = "approval::consume_undelivered"; +pub const FN_ACK_DELIVERED: &str = "approval::ack_delivered"; +pub const FN_FLUSH_DELIVERED: &str = "approval::flush_delivered"; +pub const FN_SWEEP_SESSION: &str = "approval::sweep_session"; +pub const FN_LOOKUP_RECORD: &str = "approval::lookup_record"; + +/// Default `approval_state_scope` (matches [`WorkerConfig::default`]). +pub const STATE_SCOPE: &str = "approvals"; + +/// Handles returned from [`register`]; holding them keeps every iii +/// function registration and the background sweeper task alive. +pub struct Refs { + pub resolve: FunctionRef, + pub list_pending: FunctionRef, + pub list_undelivered: FunctionRef, + pub consume_undelivered: FunctionRef, + pub ack_delivered: FunctionRef, + pub flush_delivered: FunctionRef, + pub sweep_session: FunctionRef, + pub lookup_record: FunctionRef, + pub subscriber_fn: FunctionRef, + pub subscriber_trigger: iii_sdk::Trigger, + /// Background task that flips expired pending records to `timed_out` and + /// emits the corresponding `approval_resolved` events. Kept alive by + /// virtue of being held here; aborts when the worker shuts down. + pub sweeper: tokio::task::JoinHandle<()>, +} + +pub fn register(iii: &III, cfg: &WorkerConfig) -> anyhow::Result { + let rules: Arc> = Arc::new(cfg.interceptors.clone()); + // Layered policy rules consulted before the per-function interceptor + // flow. Wrapped in RwLock so a user reply with `always: true` on + // `approval::resolve` can push a new Allow rule at runtime (see the + // cascade in `handle_resolve`). See [`crate::rules`]. + let policy_rules: Arc> = Arc::new(RwLock::new(cfg.rules.clone())); + + // Fail fast on honor-system markers: any interceptor that asks the gate + // to inject `__from_approval` MUST also assert the target validates it. + // Without that assertion the marker is purely decorative and the gate + // has no way to know whether bypass-through-direct-trigger is contained. + let unverified = unverified_marker_targets(rules.as_slice()); + if !unverified.is_empty() { + return Err(anyhow::anyhow!( + "approval-gate: refusing to start — interceptors with inject_approval_marker=true \ + must also set marker_target_verified=true (target is asserted to validate \ + __from_approval against approval::lookup_record). Unverified: {unverified:?}" + )); + } + + for rule in rules.iter() { + if let Some(cid) = rule.classifier.as_deref() { + if cid == FN_LOOKUP_RECORD + || cid == FN_RESOLVE + || cid == FN_LIST_PENDING + || cid == FN_LIST_UNDELIVERED + || cid == FN_ACK_DELIVERED + || cid == FN_SWEEP_SESSION + { + tracing::warn!( + "approval-gate: interceptor for {:?} uses classifier {:?} which aliases an approval endpoint; fix config", + rule.function_id, + cid + ); + } + } + } + + let bus: Arc = Arc::new(IiiStateBus(iii.clone())); + let timeout_ms = cfg.default_timeout_ms; + let topic = cfg.topic.clone(); + let state_scope = cfg.approval_state_scope.clone(); + + let bus_for_resolve = bus.clone(); + let scope_resolve = state_scope.clone(); + let exec_for_resolve: Arc = Arc::new(IiiFunctionExecutor { + iii: iii.clone(), + rules: rules.clone(), + }); + let iii_for_resolve = iii.clone(); + let policy_rules_for_resolve = policy_rules.clone(); + let resolve = iii.register_function(( + RegisterFunctionMessage::with_id(FN_RESOLVE.into()).with_description( + "Resolve a pending approval. On allow, invokes the underlying function; \ + on deny, records the denial. With `always: true` on an allow reply, \ + a runtime rule is added so future calls to this function id auto-allow, \ + and the session's other pending calls newly matching are cascade-resolved. \ + The result is stitched into the agent's next turn as a system message." + .into(), + ), + move |payload: Value| { + let bus = bus_for_resolve.clone(); + let exec = exec_for_resolve.clone(); + let scope_resolve = scope_resolve.clone(); + let iii = iii_for_resolve.clone(); + let policy_rules = policy_rules_for_resolve.clone(); + async move { + let now_ms = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_millis() as u64) + .unwrap_or(0); + let resp = handle_resolve( + bus.as_ref(), + exec.as_ref(), + &scope_resolve, + &policy_rules, + payload.clone(), + now_ms, + ) + .await; + + if resp.get("ok").and_then(Value::as_bool) == Some(true) { + let session_id = payload + .get("session_id") + .and_then(Value::as_str) + .unwrap_or(""); + let call_id = payload + .get("function_call_id") + .or_else(|| payload.get("tool_call_id")) + .and_then(Value::as_str) + .unwrap_or(""); + if !session_id.is_empty() && !call_id.is_empty() { + let key = pending_key(session_id, call_id); + if let Some(final_rec) = bus.get(&scope_resolve, &key).await { + let mut evt = json!({ + "type": "approval_resolved", + "function_call_id": call_id, + "tool_call_id": call_id, + }); + if let Some(status) = final_rec.get("status").and_then(Value::as_str) { + evt["decision"] = match status { + "executed" | "approved" => json!("allow"), + _ => json!("deny"), + }; + evt["status"] = json!(status); + } + if let Some(r) = final_rec.get("result") { + evt["result"] = json!(r); + } + if let Some(e) = final_rec.get("error") { + evt["error"] = json!(e); + } + if let Some(denial) = final_rec.get("denial") { + evt["denial"] = denial.clone(); + } + write_event(&iii, session_id, &evt).await; + } + } + } + Ok::<_, IIIError>(resp) + } + }, + )); + + let bus_for_list = bus.clone(); + let scope_list = state_scope.clone(); + let list_pending = iii.register_function(( + RegisterFunctionMessage::with_id(FN_LIST_PENDING.into()) + .with_description("Return pending approvals for a session.".into()), + move |payload: Value| { + let bus = bus_for_list.clone(); + let scope_list = scope_list.clone(); + async move { + Ok::<_, IIIError>(handle_list_pending(bus.as_ref(), &scope_list, payload).await) + } + }, + )); + + let bus_for_list_undelivered = bus.clone(); + let scope_list_undelivered = state_scope.clone(); + let list_undelivered = iii.register_function(( + RegisterFunctionMessage::with_id(FN_LIST_UNDELIVERED.into()).with_description( + "Return resolved approval records for a session that haven't yet been stitched \ + into an LLM turn. Lazy-flips expired pendings to timed_out." + .into(), + ), + move |payload: Value| { + let bus = bus_for_list_undelivered.clone(); + let scope = scope_list_undelivered.clone(); + async move { + let now_ms = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_millis() as u64) + .unwrap_or(0); + Ok::<_, IIIError>( + handle_list_undelivered(bus.as_ref(), &scope, payload, now_ms).await, + ) + } + }, + )); + + let bus_for_consume = bus.clone(); + let scope_consume = state_scope.clone(); + let consume_undelivered = iii.register_function(( + RegisterFunctionMessage::with_id(FN_CONSUME_UNDELIVERED.into()).with_description( + "Atomic list+ack of resolved approval records. Returns the same FIFO-capped \ + slice as list_undelivered AND stamps each entry with delivered_in_turn_id \ + before returning. Required payload: {session_id, turn_id, limit?}." + .into(), + ), + move |payload: Value| { + let bus = bus_for_consume.clone(); + let scope = scope_consume.clone(); + async move { + let now_ms = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_millis() as u64) + .unwrap_or(0); + Ok::<_, IIIError>( + handle_consume_undelivered(bus.as_ref(), &scope, payload, now_ms).await, + ) + } + }, + )); + + let bus_for_ack = bus.clone(); + let scope_ack = state_scope.clone(); + let ack_delivered = iii.register_function(( + RegisterFunctionMessage::with_id(FN_ACK_DELIVERED.into()).with_description( + "Stamp delivered_in_turn_id on resolved approvals so they aren't replayed \ + in subsequent turns. Idempotent." + .into(), + ), + move |payload: Value| { + let bus = bus_for_ack.clone(); + let scope = scope_ack.clone(); + async move { + Ok::<_, IIIError>(handle_ack_delivered(bus.as_ref(), &scope, payload).await) + } + }, + )); + + let bus_for_flush = bus.clone(); + let scope_flush = state_scope.clone(); + let flush_delivered = iii.register_function(( + RegisterFunctionMessage::with_id(FN_FLUSH_DELIVERED.into()).with_description( + "Stamp every unacked terminal approval record in a session as \ + delivered. One-shot operator recovery for backlog accumulation. \ + Required payload: {session_id, turn_id}." + .into(), + ), + move |payload: Value| { + let bus = bus_for_flush.clone(); + let scope = scope_flush.clone(); + async move { + Ok::<_, IIIError>(handle_flush_delivered(bus.as_ref(), &scope, payload).await) + } + }, + )); + + let bus_for_sweep = bus.clone(); + let scope_sweep = state_scope.clone(); + let sweep_session = iii.register_function(( + RegisterFunctionMessage::with_id(FN_SWEEP_SESSION.into()).with_description( + "Sweep all pending approvals for a session to timed_out. \ + Called when a session is deleted." + .into(), + ), + move |payload: Value| { + let bus = bus_for_sweep.clone(); + let scope = scope_sweep.clone(); + async move { + Ok::<_, IIIError>(handle_sweep_session(bus.as_ref(), &scope, payload).await) + } + }, + )); + + let bus_for_lookup = bus.clone(); + let scope_lookup = state_scope.clone(); + let lookup_record = iii.register_function(( + RegisterFunctionMessage::with_id(FN_LOOKUP_RECORD.into()).with_description( + "Return the approval state-store record for a session/function_call_id pair; \ + null when absent. Used by shell bypass validation." + .into(), + ), + move |payload: Value| { + let bus = bus_for_lookup.clone(); + let scope = scope_lookup.clone(); + async move { + Ok::<_, IIIError>(handle_lookup_record(bus.as_ref(), &scope, payload).await) + } + }, + )); + + let iii_for_sub = iii.clone(); + let bus_for_sub = bus.clone(); + let subscriber_scope = state_scope.clone(); + let rules_for_sub = rules.clone(); + let policy_rules_for_sub = policy_rules.clone(); + let subscriber_fn = iii.register_function(( + RegisterFunctionMessage::with_id("policy::approval_gate".into()) + .with_description("Pause function calls listed in approval_required.".into()), + move |envelope: Value| { + let iii = iii_for_sub.clone(); + let bus = bus_for_sub.clone(); + let sc = subscriber_scope.clone(); + let intercept_rules = rules_for_sub.clone(); + let policy_rules = policy_rules_for_sub.clone(); + async move { + let Some(call) = extract_call(&envelope) else { + return Ok::<_, IIIError>(json!({ "block": false })); + }; + let now_ms = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_millis() as u64) + .unwrap_or(0); + + // Layered policy rules run first. Allow / Deny short-circuit; + // Ask (and no-match) falls through to the existing per-function + // interceptor flow. Pattern is "*" in v1 — see `crate::rules`. + // Read-lock is acquired and dropped inside a block so the + // guard never crosses an `.await` (std::sync::RwLock is not + // async-safe to hold across suspension points). + let policy_outcome = { + let guard = policy_rules + .read() + .expect("approval-gate policy rules lock poisoned"); + apply_policy_rules(&guard, &call.function_id) + }; + match policy_outcome { + PolicyOutcome::Allow => { + return Ok::<_, IIIError>(json!({ "block": false })); + } + PolicyOutcome::Deny { + rule_permission, + rule_pattern, + } => { + let denial = Denial::Policy { + classifier_reason: format!( + "rule {rule_permission} {rule_pattern} denies" + ), + classifier_fn: "approval-gate::rules".to_string(), + }; + return Ok::<_, IIIError>(json!({ + "block": true, + "denial": denial, + "status": "denied", + "call_id": call.function_call_id, + "function_id": call.function_id, + })); + } + PolicyOutcome::FallThrough => {} + } + + let action = decide_intercept_action( + rule_for(intercept_rules.as_slice(), &call.function_id), + call.requires_approval(), + ); + let reply = match action { + InterceptAction::Pass => json!({ "block": false }), + InterceptAction::Pause => { + handle_intercept(bus.as_ref(), &sc, &call, now_ms, timeout_ms, false).await + } + InterceptAction::Classify { + classifier_fn, + classifier_timeout_ms, + } => match iii + .trigger(TriggerRequest { + function_id: classifier_fn.clone(), + payload: call.args.clone(), + action: None, + timeout_ms: Some(classifier_timeout_ms), + }) + .await + { + Ok(v) => match interpret_classifier_reply(&v, &classifier_fn) { + Ok(ClassifierDecision::Auto) => json!({ "block": false }), + Ok(ClassifierDecision::Deny(denial)) => json!({ + "block": true, + "denial": denial, + "status": "denied", + "call_id": call.function_call_id, + "function_id": call.function_id, + }), + Ok(ClassifierDecision::Ask) | Err(()) => { + handle_intercept( + bus.as_ref(), + &sc, + &call, + now_ms, + timeout_ms, + true, + ) + .await + } + }, + Err(_) => { + handle_intercept(bus.as_ref(), &sc, &call, now_ms, timeout_ms, true) + .await + } + }, + }; + + if reply.get("status").and_then(Value::as_str) == Some("pending") { + write_event( + &iii, + &call.session_id, + &json!({ + "type": "approval_requested", + "function_call_id": call.function_call_id, + "tool_call_id": call.function_call_id, + "function_id": call.function_id, + "tool_name": call.function_id, + "args": call.args, + "expires_at": now_ms.saturating_add(timeout_ms), + }), + ) + .await; + } + write_hook_reply(&iii, &call.reply_stream, &call.event_id, &reply).await; + Ok(reply) + } + }, + )); + + let subscriber_trigger = iii + .register_trigger(RegisterTriggerInput { + trigger_type: "durable:subscriber".into(), + function_id: "policy::approval_gate".into(), + config: json!({ "topic": topic }), + metadata: None, + }) + .map_err(|e| anyhow::anyhow!(e.to_string()))?; + + let sweeper = spawn_timeout_sweeper( + iii.clone(), + bus.clone(), + state_scope.clone(), + cfg.sweeper_interval_ms, + ); + + Ok(Refs { + resolve, + list_pending, + list_undelivered, + consume_undelivered, + ack_delivered, + flush_delivered, + sweep_session, + lookup_record, + subscriber_fn, + subscriber_trigger, + sweeper, + }) +} From 60e7d6ab6cb9c7afd75e9ee265c54aecd9e1765e Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Fri, 15 May 2026 21:09:50 -0300 Subject: [PATCH 13/30] refactor(approval-gate): move pub(crate)-using tests inline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit First step of relocating the 2580-line inline test block from lib.rs into more focused homes. This commit moves the 18 tests that touch pub(crate) helpers — ones that can't live in tests/*.rs because integration tests can only see pub items — into the #[cfg(test)] mod tests block of whichever production module owns the helper they exercise. intercept.rs gained: - interpret_classifier_reply_reads_decision_tags - decide_intercept_action_* (5 tests) - apply_policy_rules_* (5 tests) state.rs gained: - merge_from_approval_* (4 tests) - rule_for_returns_matching_rule / rule_for_returns_none_when_absent sweeper.rs gained: - timeout_resolved_event_shape lib.rs loses those 18 tests. 141 approval-gate tests pass (no duplicate counts — each test runs once). turn-orchestrator + harness + shell verified green. Public-API tests (the remaining ~96 in lib.rs) move to tests/*.rs in a follow-up commit so they can share fakes via a common module. --- approval-gate/src/intercept.rs | 178 ++++++++++++++++++++++ approval-gate/src/lib.rs | 266 --------------------------------- approval-gate/src/state.rs | 78 ++++++++++ approval-gate/src/sweeper.rs | 18 +++ 4 files changed, 274 insertions(+), 266 deletions(-) diff --git a/approval-gate/src/intercept.rs b/approval-gate/src/intercept.rs index ef0e4663..7cd58628 100644 --- a/approval-gate/src/intercept.rs +++ b/approval-gate/src/intercept.rs @@ -235,3 +235,181 @@ pub async fn handle_intercept( "function_id": call.function_id, }) } + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + + #[test] + fn interpret_classifier_reply_reads_decision_tags() { + assert!(matches!( + interpret_classifier_reply(&json!({"decision": "auto"}), "shell::classify_argv"), + Ok(ClassifierDecision::Auto) + )); + match interpret_classifier_reply( + &json!({"decision":"deny","reason":"nope"}), + "shell::classify_argv", + ) { + Ok(ClassifierDecision::Deny(Denial::Policy { + classifier_reason, + classifier_fn, + })) => { + assert_eq!(classifier_reason, "nope"); + assert_eq!(classifier_fn, "shell::classify_argv"); + } + o => panic!("expected Policy denial {:?}", o), + } + assert!(matches!( + interpret_classifier_reply( + &json!({"decision":"ask","summary":"x"}), + "shell::classify_argv" + ), + Ok(ClassifierDecision::Ask) + )); + assert!(interpret_classifier_reply(&json!({}), "shell::classify_argv").is_err()); + } + + /// An operator-registered rule is authoritative: every call to that + /// function id runs through the classifier, even when the run's + /// `approval_required` list is empty. + #[test] + fn decide_intercept_action_classifies_when_rule_has_classifier_regardless_of_approval_required( + ) { + let rule = InterceptorRule { + function_id: "shell::exec".into(), + classifier: Some("shell::classify_argv".into()), + classifier_timeout_ms: 2000, + inject_approval_marker: true, + marker_target_verified: true, + }; + let action = decide_intercept_action(Some(&rule), false); + assert_eq!( + action, + InterceptAction::Classify { + classifier_fn: "shell::classify_argv".into(), + classifier_timeout_ms: 2000, + } + ); + assert_eq!(action, decide_intercept_action(Some(&rule), true)); + } + + #[test] + fn decide_intercept_action_pauses_when_rule_has_no_classifier_regardless_of_approval_required() + { + let rule = InterceptorRule { + function_id: "shell::fs::write".into(), + classifier: None, + classifier_timeout_ms: 2000, + inject_approval_marker: false, + marker_target_verified: false, + }; + assert_eq!( + decide_intercept_action(Some(&rule), false), + InterceptAction::Pause + ); + assert_eq!( + decide_intercept_action(Some(&rule), true), + InterceptAction::Pause + ); + } + + #[test] + fn decide_intercept_action_pauses_when_no_rule_but_run_listed_approval_required() { + assert_eq!(decide_intercept_action(None, true), InterceptAction::Pause); + } + + #[test] + fn decide_intercept_action_passes_when_no_rule_and_not_approval_required() { + assert_eq!(decide_intercept_action(None, false), InterceptAction::Pass); + } + + #[test] + fn decide_intercept_action_classifier_empty_string_treated_as_no_classifier() { + let rule = InterceptorRule { + function_id: "shell::exec".into(), + classifier: Some(String::new()), + classifier_timeout_ms: 2000, + inject_approval_marker: false, + marker_target_verified: false, + }; + assert_eq!( + decide_intercept_action(Some(&rule), false), + InterceptAction::Pause + ); + } + + #[test] + fn apply_policy_rules_empty_ruleset_falls_through() { + let rs: rules::Ruleset = vec![]; + assert_eq!( + apply_policy_rules(&rs, "shell::exec"), + PolicyOutcome::FallThrough + ); + } + + #[test] + fn apply_policy_rules_allow_short_circuits() { + let rs: rules::Ruleset = vec![rules::Rule { + permission: "shell::exec".into(), + pattern: "*".into(), + action: rules::Action::Allow, + }]; + assert_eq!(apply_policy_rules(&rs, "shell::exec"), PolicyOutcome::Allow); + } + + #[test] + fn apply_policy_rules_deny_carries_matched_rule_identity() { + let rs: rules::Ruleset = vec![rules::Rule { + permission: "shell::*".into(), + pattern: "*".into(), + action: rules::Action::Deny, + }]; + assert_eq!( + apply_policy_rules(&rs, "shell::fs::write"), + PolicyOutcome::Deny { + rule_permission: "shell::*".into(), + rule_pattern: "*".into(), + } + ); + } + + #[test] + fn apply_policy_rules_ask_falls_through_to_interceptor_flow() { + // Ask means "no decision from this layer — let the next handle it". + let rs: rules::Ruleset = vec![rules::Rule { + permission: "shell::exec".into(), + pattern: "*".into(), + action: rules::Action::Ask, + }]; + assert_eq!( + apply_policy_rules(&rs, "shell::exec"), + PolicyOutcome::FallThrough + ); + } + + #[test] + fn apply_policy_rules_last_matching_wins() { + // Later-listed more-specific rule overrides earlier permissive default. + let rs: rules::Ruleset = vec![ + rules::Rule { + permission: "*".into(), + pattern: "*".into(), + action: rules::Action::Allow, + }, + rules::Rule { + permission: "shell::exec".into(), + pattern: "*".into(), + action: rules::Action::Deny, + }, + ]; + assert!(matches!( + apply_policy_rules(&rs, "shell::exec"), + PolicyOutcome::Deny { .. } + )); + assert_eq!( + apply_policy_rules(&rs, "approval::resolve"), + PolicyOutcome::Allow + ); + } +} diff --git a/approval-gate/src/lib.rs b/approval-gate/src/lib.rs index 3630eb59..3bf09aa8 100644 --- a/approval-gate/src/lib.rs +++ b/approval-gate/src/lib.rs @@ -675,236 +675,6 @@ mod tests { assert_eq!(FN_LOOKUP_RECORD, "approval::lookup_record"); } - #[test] - fn interpret_classifier_reply_reads_decision_tags() { - assert!(matches!( - interpret_classifier_reply(&json!({"decision": "auto"}), "shell::classify_argv"), - Ok(ClassifierDecision::Auto) - )); - match interpret_classifier_reply( - &json!({"decision":"deny","reason":"nope"}), - "shell::classify_argv", - ) { - Ok(ClassifierDecision::Deny(Denial::Policy { - classifier_reason, - classifier_fn, - })) => { - assert_eq!(classifier_reason, "nope"); - assert_eq!(classifier_fn, "shell::classify_argv"); - } - o => panic!("expected Policy denial {:?}", o), - } - assert!(matches!( - interpret_classifier_reply( - &json!({"decision":"ask","summary":"x"}), - "shell::classify_argv" - ), - Ok(ClassifierDecision::Ask) - )); - assert!(interpret_classifier_reply(&json!({}), "shell::classify_argv").is_err()); - } - - #[test] - fn merge_from_approval_inserts_marker_when_inject_true() { - let m = merge_from_approval_marker_if_needed( - true, - json!({"command": "git"}), - "call-1", - "sess-1", - ); - let inner = m.get("__from_approval").unwrap(); - assert_eq!(inner["call_id"], "call-1"); - assert_eq!(inner["session_id"], "sess-1"); - assert_eq!(m["command"], "git"); - } - - #[test] - fn merge_from_approval_noop_when_inject_false() { - let j = json!({"a": 1}); - let out = merge_from_approval_marker_if_needed(false, j.clone(), "c", "s"); - assert_eq!(out, j); - } - - #[test] - fn rule_for_returns_matching_rule() { - let rules = vec![ - InterceptorRule { - function_id: "shell::exec".into(), - classifier: Some("shell::classify_argv".into()), - classifier_timeout_ms: 2000, - inject_approval_marker: true, - marker_target_verified: true, - }, - InterceptorRule { - function_id: "other::fn".into(), - classifier: None, - classifier_timeout_ms: 2000, - inject_approval_marker: false, - marker_target_verified: false, - }, - ]; - let r = rule_for(&rules, "shell::exec").expect("match"); - assert_eq!(r.classifier.as_deref(), Some("shell::classify_argv")); - assert!(r.inject_approval_marker); - } - - #[test] - fn rule_for_returns_none_when_absent() { - let rules = vec![InterceptorRule { - function_id: "x::y".into(), - classifier: None, - classifier_timeout_ms: 2000, - inject_approval_marker: false, - marker_target_verified: false, - }]; - assert!(rule_for(&rules, "missing::id").is_none()); - } - - /// An operator-registered rule is authoritative: every call to that - /// function id runs through the classifier, even when the run's - /// `approval_required` list is empty. This is the inverted contract - /// vs. the original "approval_required ANDs the rule" gate. - #[test] - fn decide_intercept_action_classifies_when_rule_has_classifier_regardless_of_approval_required() { - let rule = InterceptorRule { - function_id: "shell::exec".into(), - classifier: Some("shell::classify_argv".into()), - classifier_timeout_ms: 2000, - inject_approval_marker: true, - marker_target_verified: true, - }; - let action = decide_intercept_action(Some(&rule), false); - assert_eq!( - action, - InterceptAction::Classify { - classifier_fn: "shell::classify_argv".into(), - classifier_timeout_ms: 2000, - } - ); - assert_eq!(action, decide_intercept_action(Some(&rule), true)); - } - - #[test] - fn decide_intercept_action_pauses_when_rule_has_no_classifier_regardless_of_approval_required() { - let rule = InterceptorRule { - function_id: "shell::fs::write".into(), - classifier: None, - classifier_timeout_ms: 2000, - inject_approval_marker: false, - marker_target_verified: false, - }; - assert_eq!( - decide_intercept_action(Some(&rule), false), - InterceptAction::Pause - ); - assert_eq!( - decide_intercept_action(Some(&rule), true), - InterceptAction::Pause - ); - } - - #[test] - fn decide_intercept_action_pauses_when_no_rule_but_run_listed_approval_required() { - assert_eq!(decide_intercept_action(None, true), InterceptAction::Pause); - } - - #[test] - fn decide_intercept_action_passes_when_no_rule_and_not_approval_required() { - assert_eq!(decide_intercept_action(None, false), InterceptAction::Pass); - } - - #[test] - fn apply_policy_rules_empty_ruleset_falls_through() { - let rs: rules::Ruleset = vec![]; - assert_eq!( - apply_policy_rules(&rs, "shell::exec"), - PolicyOutcome::FallThrough - ); - } - - #[test] - fn apply_policy_rules_allow_short_circuits() { - let rs: rules::Ruleset = vec![rules::Rule { - permission: "shell::exec".into(), - pattern: "*".into(), - action: rules::Action::Allow, - }]; - assert_eq!( - apply_policy_rules(&rs, "shell::exec"), - PolicyOutcome::Allow - ); - } - - #[test] - fn apply_policy_rules_deny_carries_matched_rule_identity() { - let rs: rules::Ruleset = vec![rules::Rule { - permission: "shell::*".into(), - pattern: "*".into(), - action: rules::Action::Deny, - }]; - assert_eq!( - apply_policy_rules(&rs, "shell::fs::write"), - PolicyOutcome::Deny { - rule_permission: "shell::*".into(), - rule_pattern: "*".into(), - } - ); - } - - #[test] - fn apply_policy_rules_ask_falls_through_to_interceptor_flow() { - // Ask means "no decision from this layer — let the next handle it". - let rs: rules::Ruleset = vec![rules::Rule { - permission: "shell::exec".into(), - pattern: "*".into(), - action: rules::Action::Ask, - }]; - assert_eq!( - apply_policy_rules(&rs, "shell::exec"), - PolicyOutcome::FallThrough - ); - } - - #[test] - fn apply_policy_rules_last_matching_wins() { - // Later-listed more-specific rule overrides earlier permissive default. - let rs: rules::Ruleset = vec![ - rules::Rule { - permission: "*".into(), - pattern: "*".into(), - action: rules::Action::Allow, - }, - rules::Rule { - permission: "shell::exec".into(), - pattern: "*".into(), - action: rules::Action::Deny, - }, - ]; - assert!(matches!( - apply_policy_rules(&rs, "shell::exec"), - PolicyOutcome::Deny { .. } - )); - assert_eq!( - apply_policy_rules(&rs, "approval::resolve"), - PolicyOutcome::Allow - ); - } - - #[test] - fn decide_intercept_action_classifier_empty_string_treated_as_no_classifier() { - let rule = InterceptorRule { - function_id: "shell::exec".into(), - classifier: Some(String::new()), - classifier_timeout_ms: 2000, - inject_approval_marker: false, - marker_target_verified: false, - }; - assert_eq!( - decide_intercept_action(Some(&rule), false), - InterceptAction::Pause - ); - } - #[test] fn is_terminal_status_returns_true_for_terminal_states() { assert!(is_terminal_status("executed")); @@ -2059,19 +1829,6 @@ mod tests { ); } - #[test] - fn timeout_resolved_event_shape() { - let evt = timeout_resolved_event("tc-1"); - assert_eq!(evt["type"], "approval_resolved"); - assert_eq!(evt["function_call_id"], "tc-1"); - assert_eq!(evt["tool_call_id"], "tc-1"); - assert_eq!(evt["decision"], "deny"); - assert_eq!(evt["status"], "timed_out"); - // timed_out is self-describing — no Denial / no legacy reason. - assert!(evt.get("decision_reason").is_none()); - assert!(evt.get("denial").is_none()); - } - #[test] fn unverified_marker_targets_lists_unasserted_rules() { let rules = vec![ @@ -2126,29 +1883,6 @@ mod tests { // Each test corresponds to a mutant the test suite previously didn't // catch. Test name → mutated line in src/lib.rs. - #[test] - fn merge_from_approval_wraps_null_args_in_marker_only() { - // mutant L48: replace `other.is_null()` match guard - let out = merge_from_approval_marker_if_needed(true, Value::Null, "c1", "s1"); - assert!(out.get("__from_approval").is_some()); - assert!( - out.get("payload").is_none(), - "null-arg branch must NOT wrap as payload" - ); - } - - #[test] - fn merge_from_approval_wraps_scalar_args_in_payload() { - // mutant L48: same guard, the other branch - let out = merge_from_approval_marker_if_needed(true, json!("scalar"), "c1", "s1"); - assert!(out.get("__from_approval").is_some()); - assert_eq!( - out.get("payload"), - Some(&json!("scalar")), - "scalar-arg branch must wrap original under `payload`" - ); - } - #[tokio::test] async fn handle_intercept_replay_of_terminal_record_returns_already_resolved() { // mutant L331: replace `==` with `!=` in the replay defense — if diff --git a/approval-gate/src/state.rs b/approval-gate/src/state.rs index 26aa58a3..5eb960e3 100644 --- a/approval-gate/src/state.rs +++ b/approval-gate/src/state.rs @@ -177,3 +177,81 @@ pub fn unverified_marker_targets(rules: &[InterceptorRule]) -> Vec<&str> { .map(|r| r.function_id.as_str()) .collect() } + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + + #[test] + fn merge_from_approval_inserts_marker_when_inject_true() { + let m = merge_from_approval_marker_if_needed( + true, + json!({"command": "git"}), + "call-1", + "sess-1", + ); + let inner = m.get("__from_approval").unwrap(); + assert_eq!(inner["call_id"], "call-1"); + assert_eq!(inner["session_id"], "sess-1"); + assert_eq!(m["command"], "git"); + } + + #[test] + fn merge_from_approval_noop_when_inject_false() { + let j = json!({"a": 1}); + let out = merge_from_approval_marker_if_needed(false, j.clone(), "c", "s"); + assert_eq!(out, j); + } + + #[test] + fn merge_from_approval_wraps_null_args_in_marker_only() { + let m = merge_from_approval_marker_if_needed(true, Value::Null, "c1", "s1"); + let obj = m.as_object().unwrap(); + assert_eq!(obj.len(), 1); + assert!(obj.contains_key("__from_approval")); + } + + #[test] + fn merge_from_approval_wraps_scalar_args_in_payload() { + let out = merge_from_approval_marker_if_needed(true, json!("scalar"), "c1", "s1"); + assert_eq!(out["payload"], json!("scalar")); + assert_eq!(out["__from_approval"]["call_id"], "c1"); + assert_eq!(out["__from_approval"]["session_id"], "s1"); + } + + #[test] + fn rule_for_returns_matching_rule() { + let rules = vec![ + InterceptorRule { + function_id: "shell::exec".into(), + classifier: Some("shell::classify_argv".into()), + classifier_timeout_ms: 2000, + inject_approval_marker: true, + marker_target_verified: true, + }, + InterceptorRule { + function_id: "other::fn".into(), + classifier: None, + classifier_timeout_ms: 2000, + inject_approval_marker: false, + marker_target_verified: false, + }, + ]; + let r = rule_for(&rules, "shell::exec").expect("match"); + assert_eq!(r.classifier.as_deref(), Some("shell::classify_argv")); + assert!(r.inject_approval_marker); + } + + #[test] + fn rule_for_returns_none_when_absent() { + let rules = vec![InterceptorRule { + function_id: "x::y".into(), + classifier: None, + classifier_timeout_ms: 2000, + inject_approval_marker: false, + marker_target_verified: false, + }]; + assert!(rule_for(&rules, "missing::id").is_none()); + } +} diff --git a/approval-gate/src/sweeper.rs b/approval-gate/src/sweeper.rs index 660ec219..3cf8012c 100644 --- a/approval-gate/src/sweeper.rs +++ b/approval-gate/src/sweeper.rs @@ -131,3 +131,21 @@ pub(crate) async fn write_hook_reply(iii: &III, stream_name: &str, event_id: &st }) .await; } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn timeout_resolved_event_shape() { + let evt = timeout_resolved_event("tc-1"); + assert_eq!(evt["type"], "approval_resolved"); + assert_eq!(evt["function_call_id"], "tc-1"); + assert_eq!(evt["tool_call_id"], "tc-1"); + assert_eq!(evt["decision"], "deny"); + assert_eq!(evt["status"], "timed_out"); + // timed_out is self-describing — no Denial / no legacy reason. + assert!(evt.get("decision_reason").is_none()); + assert!(evt.get("denial").is_none()); + } +} From cf95b3e2c71ac5102231fcb2f7fbb7aa1d49c0e4 Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Fri, 15 May 2026 21:15:18 -0300 Subject: [PATCH 14/30] refactor(approval-gate): move public-API tests to tests/*.rs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Final test split. The 86 remaining inline tests in lib.rs that exercise only the crate's `pub` surface now live in tests/*.rs, organized by the area they cover. The state-machine proptest gets its own file. lib.rs drops from 2369 to 52 lines (just module declarations + re-exports). New tests/ layout: - common/mod.rs — shared fakes: FakeExecutor, InMemoryStateBus, FailingStateBus, sample_call(), empty_policy_rules() - lifecycle.rs — 19 tests: build_pending_record, transition_record, maybe_flip_timed_out, collect_timed_out_for_sweep, is_terminal_status, pending_key - wire.rs — 10 tests: extract_call, block_reply_for, requires_approval - intercept.rs — 9 tests: handle_intercept (replays, fail-closed, session_id stamping, force_pending) - resolve.rs — 19 tests: handle_resolve, cascade-on-`always`, handle_lookup_record - delivery.rs — 25 tests: list_pending / list_undelivered / ack_delivered / consume_undelivered / flush_delivered / sweep_session - misc.rs — 4 tests: FN_* constants, unverified_marker_targets, FakeExecutor smoke test - state_machine.rs — the proptest with the four lifecycle invariants Each tests/*.rs uses `mod common;` for shared fakes — only one source of truth for in-memory bus/executor stand-ins. 141 approval-gate tests pass across 13 test binaries (previously 6). turn-orchestrator + harness + shell verified green. --- approval-gate/src/lib.rs | 2318 -------------------------- approval-gate/tests/common/mod.rs | 140 ++ approval-gate/tests/delivery.rs | 729 ++++++++ approval-gate/tests/intercept.rs | 218 +++ approval-gate/tests/lifecycle.rs | 271 +++ approval-gate/tests/misc.rs | 87 + approval-gate/tests/resolve.rs | 673 ++++++++ approval-gate/tests/state_machine.rs | 209 +++ approval-gate/tests/wire.rs | 156 ++ 9 files changed, 2483 insertions(+), 2318 deletions(-) create mode 100644 approval-gate/tests/common/mod.rs create mode 100644 approval-gate/tests/delivery.rs create mode 100644 approval-gate/tests/intercept.rs create mode 100644 approval-gate/tests/lifecycle.rs create mode 100644 approval-gate/tests/misc.rs create mode 100644 approval-gate/tests/resolve.rs create mode 100644 approval-gate/tests/state_machine.rs create mode 100644 approval-gate/tests/wire.rs diff --git a/approval-gate/src/lib.rs b/approval-gate/src/lib.rs index 3bf09aa8..e181db08 100644 --- a/approval-gate/src/lib.rs +++ b/approval-gate/src/lib.rs @@ -49,2321 +49,3 @@ use intercept::{ use state::{merge_from_approval_marker_if_needed, rule_for}; #[cfg(test)] use sweeper::timeout_resolved_event; - -#[cfg(test)] -mod tests { - use super::*; - use serde_json::{json, Value}; - use std::sync::{Arc, RwLock}; - - /// Empty policy ruleset for tests that exercise [`handle_resolve`] - /// without cascading. Each call freshly constructs the lock so unit - /// tests stay independent — there's no shared mutable state. - fn empty_policy_rules() -> std::sync::RwLock { - std::sync::RwLock::new(crate::rules::Ruleset::new()) - } - - #[test] - fn maybe_flip_timed_out_returns_some_when_pending_and_expired() { - let rec = build_pending_record("tc-1", "shell::fs::write", &json!({}), 1_000, 60_000); - let flipped = maybe_flip_timed_out(&rec, 70_000).expect("should flip"); - assert_eq!(flipped["status"], "timed_out"); - // Timeout carries no Denial — the status alone explains the outcome. - assert!(flipped.get("denial").is_none()); - assert!(flipped.get("decision_reason").is_none()); - } - - #[test] - fn maybe_flip_timed_out_returns_none_when_pending_and_not_expired() { - let rec = build_pending_record("tc-1", "shell::fs::write", &json!({}), 1_000, 60_000); - assert!(maybe_flip_timed_out(&rec, 60_000).is_none()); - assert!(maybe_flip_timed_out(&rec, 1_500).is_none()); - } - - #[test] - fn maybe_flip_timed_out_returns_none_when_not_pending() { - let rec = json!({ - "function_call_id": "tc-1", - "status": "executed", - "expires_at": 1_000_u64, - }); - assert!(maybe_flip_timed_out(&rec, 999_999_999).is_none()); - } - - #[test] - fn transition_record_stamps_resolved_at_for_terminal_status() { - let base = build_pending_record("c1", "shell::fs::write", &json!({}), 1_000, 60_000); - let rec = transition_record_with_now( - &base, - "executed", - Some(json!({"ok": true})), - None, - None, - 12_345, - ); - assert_eq!(rec["resolved_at"].as_u64(), Some(12_345)); - } - - #[test] - fn transition_record_preserves_existing_resolved_at_on_relift() { - let base = build_pending_record("c1", "shell::fs::write", &json!({}), 1_000, 60_000); - let first = transition_record_with_now( - &base, - "executed", - Some(json!({"ok": true})), - None, - None, - 12_345, - ); - let second = transition_record_with_now( - &first, - "executed", - Some(json!({"ok": true})), - None, - None, - 99_999, - ); - assert_eq!(second["resolved_at"].as_u64(), Some(12_345)); - } - - #[test] - fn transition_record_does_not_stamp_resolved_at_for_intermediate_status() { - let base = build_pending_record("c1", "shell::fs::write", &json!({}), 1_000, 60_000); - let rec = - transition_record_with_now(&base, "approved", None, None, None, 12_345); - assert!(rec.get("resolved_at").is_none()); - } - - #[tokio::test] - async fn handle_list_undelivered_caps_at_default_limit_and_reports_omitted() { - let bus = InMemoryStateBus::new(); - for i in 0..75 { - let cid = format!("c{i}"); - let mut rec = transition_record_with_now( - &build_pending_record(&cid, "shell::fs::write", &json!({}), 1_000, 60_000), - "executed", - Some(json!({"ok": true})), - None, - None, - 1_000 + i as u64, - ); - rec.as_object_mut() - .unwrap() - .insert("session_id".into(), Value::String("s1".into())); - bus.set(STATE_SCOPE, &pending_key("s1", &cid), rec) - .await - .unwrap(); - } - let resp = - handle_list_undelivered(&bus, STATE_SCOPE, json!({"session_id": "s1"}), 100_000).await; - assert_eq!(resp["entries"].as_array().unwrap().len(), 50); - assert_eq!(resp["omitted"].as_u64(), Some(25)); - } - - #[tokio::test] - async fn handle_list_undelivered_honors_explicit_limit() { - let bus = InMemoryStateBus::new(); - for i in 0..10 { - let cid = format!("c{i}"); - let mut rec = transition_record_with_now( - &build_pending_record(&cid, "shell::fs::write", &json!({}), 1_000, 60_000), - "executed", - Some(json!({"ok": true})), - None, - None, - 1_000 + i as u64, - ); - rec.as_object_mut() - .unwrap() - .insert("session_id".into(), Value::String("s1".into())); - bus.set(STATE_SCOPE, &pending_key("s1", &cid), rec) - .await - .unwrap(); - } - let resp = handle_list_undelivered( - &bus, - STATE_SCOPE, - json!({"session_id": "s1", "limit": 3}), - 100_000, - ) - .await; - assert_eq!(resp["entries"].as_array().unwrap().len(), 3); - assert_eq!(resp["omitted"].as_u64(), Some(7)); - } - - #[tokio::test] - async fn handle_list_undelivered_returns_oldest_first_by_resolved_at() { - let bus = InMemoryStateBus::new(); - for (i, ts) in [(0_u32, 5_000_u64), (1, 1_000), (2, 3_000)] { - let cid = format!("c{i}"); - let mut rec = transition_record_with_now( - &build_pending_record(&cid, "shell::fs::write", &json!({}), 1_000, 60_000), - "executed", - Some(json!({"ok": true})), - None, - None, - ts, - ); - rec.as_object_mut() - .unwrap() - .insert("session_id".into(), Value::String("s1".into())); - bus.set(STATE_SCOPE, &pending_key("s1", &cid), rec) - .await - .unwrap(); - } - let resp = handle_list_undelivered( - &bus, - STATE_SCOPE, - json!({"session_id": "s1", "limit": 10}), - 100_000, - ) - .await; - let entries = resp["entries"].as_array().unwrap(); - let ids: Vec<&str> = entries - .iter() - .map(|e| e["function_call_id"].as_str().unwrap()) - .collect(); - assert_eq!(ids, vec!["c1", "c2", "c0"]); - } - - #[tokio::test] - async fn handle_list_undelivered_omitted_is_zero_when_under_limit() { - let bus = InMemoryStateBus::new(); - let mut rec = transition_record_with_now( - &build_pending_record("c1", "shell::fs::write", &json!({}), 1_000, 60_000), - "executed", - Some(json!({"ok": true})), - None, - None, - 1_500, - ); - rec.as_object_mut() - .unwrap() - .insert("session_id".into(), Value::String("s1".into())); - bus.set(STATE_SCOPE, &pending_key("s1", "c1"), rec) - .await - .unwrap(); - let resp = - handle_list_undelivered(&bus, STATE_SCOPE, json!({"session_id": "s1"}), 100_000).await; - assert_eq!(resp["entries"].as_array().unwrap().len(), 1); - assert_eq!(resp["omitted"].as_u64(), Some(0)); - } - - #[tokio::test] - async fn handle_consume_undelivered_stamps_returned_entries() { - let bus = InMemoryStateBus::new(); - for i in 0..3 { - let cid = format!("c{i}"); - let mut rec = transition_record_with_now( - &build_pending_record(&cid, "shell::fs::write", &json!({}), 1_000, 60_000), - "executed", - Some(json!({"ok": true})), - None, - None, - 1_000 + i as u64, - ); - rec.as_object_mut() - .unwrap() - .insert("session_id".into(), Value::String("s1".into())); - bus.set(STATE_SCOPE, &pending_key("s1", &cid), rec) - .await - .unwrap(); - } - let resp = handle_consume_undelivered( - &bus, - STATE_SCOPE, - json!({"session_id": "s1", "turn_id": "turn-7", "limit": 10}), - 100_000, - ) - .await; - assert_eq!(resp["ok"], json!(true)); - assert_eq!(resp["entries"].as_array().unwrap().len(), 3); - assert_eq!(resp["omitted"].as_u64(), Some(0)); - let next = - handle_list_undelivered(&bus, STATE_SCOPE, json!({"session_id": "s1"}), 100_000).await; - assert_eq!(next["entries"].as_array().unwrap().len(), 0); - } - - #[tokio::test] - async fn handle_consume_undelivered_respects_limit_and_leaves_remainder() { - let bus = InMemoryStateBus::new(); - for i in 0..5 { - let cid = format!("c{i}"); - let mut rec = transition_record_with_now( - &build_pending_record(&cid, "shell::fs::write", &json!({}), 1_000, 60_000), - "executed", - Some(json!({"ok": true})), - None, - None, - 1_000 + i as u64, - ); - rec.as_object_mut() - .unwrap() - .insert("session_id".into(), Value::String("s1".into())); - bus.set(STATE_SCOPE, &pending_key("s1", &cid), rec) - .await - .unwrap(); - } - let resp = handle_consume_undelivered( - &bus, - STATE_SCOPE, - json!({"session_id": "s1", "turn_id": "turn-7", "limit": 2}), - 100_000, - ) - .await; - assert_eq!(resp["entries"].as_array().unwrap().len(), 2); - assert_eq!(resp["omitted"].as_u64(), Some(3)); - let next = - handle_list_undelivered(&bus, STATE_SCOPE, json!({"session_id": "s1"}), 100_000).await; - assert_eq!(next["entries"].as_array().unwrap().len(), 3); - } - - #[tokio::test] - async fn handle_consume_undelivered_missing_turn_id_returns_error() { - let bus = InMemoryStateBus::new(); - let resp = handle_consume_undelivered( - &bus, - STATE_SCOPE, - json!({"session_id": "s1"}), - 100_000, - ) - .await; - assert_eq!(resp["ok"], json!(false)); - assert_eq!(resp["error"], json!("missing_turn_id")); - } - - #[tokio::test] - async fn handle_flush_delivered_stamps_all_unacked_terminals() { - let bus = InMemoryStateBus::new(); - for i in 0..5 { - let cid = format!("c{i}"); - let mut rec = transition_record_with_now( - &build_pending_record(&cid, "shell::fs::write", &json!({}), 1_000, 60_000), - "executed", - Some(json!({"ok": true})), - None, - None, - 1_000 + i as u64, - ); - rec.as_object_mut() - .unwrap() - .insert("session_id".into(), Value::String("s1".into())); - bus.set(STATE_SCOPE, &pending_key("s1", &cid), rec) - .await - .unwrap(); - } - let resp = handle_flush_delivered( - &bus, - STATE_SCOPE, - json!({"session_id": "s1", "turn_id": "manual-flush"}), - ) - .await; - assert_eq!(resp["ok"], json!(true)); - assert_eq!(resp["stamped"].as_u64(), Some(5)); - let next = - handle_list_undelivered(&bus, STATE_SCOPE, json!({"session_id": "s1"}), 100_000).await; - assert_eq!(next["entries"].as_array().unwrap().len(), 0); - } - - #[tokio::test] - async fn handle_flush_delivered_skips_pending_records() { - let bus = InMemoryStateBus::new(); - bus.set( - STATE_SCOPE, - &pending_key("s1", "c1"), - build_pending_record("c1", "shell::fs::write", &json!({}), 1_000, 60_000), - ) - .await - .unwrap(); - let resp = handle_flush_delivered( - &bus, - STATE_SCOPE, - json!({"session_id": "s1", "turn_id": "manual-flush"}), - ) - .await; - assert_eq!(resp["stamped"].as_u64(), Some(0)); - let still = bus - .get(STATE_SCOPE, &pending_key("s1", "c1")) - .await - .unwrap(); - assert_eq!(still["status"].as_str(), Some("pending")); - assert!(still.get("delivered_in_turn_id").is_none()); - } - - #[tokio::test] - async fn handle_flush_delivered_idempotent_on_already_stamped() { - let bus = InMemoryStateBus::new(); - let mut rec = transition_record_with_now( - &build_pending_record("c1", "shell::fs::write", &json!({}), 1_000, 60_000), - "executed", - Some(json!({"ok": true})), - None, - None, - 1_500, - ); - { - let obj = rec.as_object_mut().unwrap(); - obj.insert( - "delivered_in_turn_id".into(), - Value::String("turn-prev".into()), - ); - obj.insert("session_id".into(), Value::String("s1".into())); - } - bus.set(STATE_SCOPE, &pending_key("s1", "c1"), rec) - .await - .unwrap(); - let resp = handle_flush_delivered( - &bus, - STATE_SCOPE, - json!({"session_id": "s1", "turn_id": "manual-flush"}), - ) - .await; - assert_eq!(resp["stamped"].as_u64(), Some(0)); - let still = bus - .get(STATE_SCOPE, &pending_key("s1", "c1")) - .await - .unwrap(); - assert_eq!(still["delivered_in_turn_id"].as_str(), Some("turn-prev")); - } - - #[tokio::test] - async fn handle_list_undelivered_returns_terminal_records_with_no_delivered_stamp() { - let bus = InMemoryStateBus::new(); - let mut r1 = transition_record( - &build_pending_record("c1", "shell::fs::write", &json!({}), 1_000, 60_000), - "executed", - Some(json!({"ok": true})), - None, - None, - ); - r1.as_object_mut() - .unwrap() - .insert("session_id".into(), Value::String("s1".into())); - bus.set(STATE_SCOPE, &pending_key("s1", "c1"), r1) - .await - .unwrap(); - let mut r2 = transition_record( - &build_pending_record("c2", "shell::fs::write", &json!({}), 1_000, 60_000), - "denied", - None, - None, - Some(Denial::UserCorrected { - feedback: "nope".into(), - }), - ); - r2.as_object_mut() - .unwrap() - .insert("session_id".into(), Value::String("s1".into())); - bus.set(STATE_SCOPE, &pending_key("s1", "c2"), r2) - .await - .unwrap(); - - let resp = - handle_list_undelivered(&bus, STATE_SCOPE, json!({"session_id": "s1"}), 100_000).await; - let entries = resp["entries"].as_array().unwrap(); - assert_eq!(entries.len(), 2); - assert_eq!(resp["omitted"].as_u64(), Some(0)); - } - - #[tokio::test] - async fn handle_list_undelivered_excludes_pending_records() { - let bus = InMemoryStateBus::new(); - bus.set( - STATE_SCOPE, - &pending_key("s1", "c1"), - build_pending_record("c1", "shell::fs::write", &json!({}), 1_000, 60_000), - ) - .await - .unwrap(); - - let resp = - handle_list_undelivered(&bus, STATE_SCOPE, json!({"session_id": "s1"}), 1_500).await; - assert_eq!(resp["entries"].as_array().unwrap().len(), 0); - } - - #[tokio::test] - async fn handle_list_undelivered_empty_session_returns_empty() { - let bus = InMemoryStateBus::new(); - let resp = - handle_list_undelivered(&bus, STATE_SCOPE, json!({"session_id": "s1"}), 1_500).await; - assert_eq!(resp["entries"], json!([])); - } - - #[tokio::test] - async fn handle_list_undelivered_excludes_records_stamped_with_delivered_turn_id() { - let bus = InMemoryStateBus::new(); - let mut rec = transition_record( - &build_pending_record("c1", "shell::fs::write", &json!({}), 1_000, 60_000), - "executed", - Some(json!({"ok": true})), - None, - None, - ); - { - let obj = rec.as_object_mut().unwrap(); - obj.insert( - "delivered_in_turn_id".into(), - Value::String("turn-prev".into()), - ); - obj.insert("session_id".into(), Value::String("s1".into())); - } - bus.set(STATE_SCOPE, &pending_key("s1", "c1"), rec) - .await - .unwrap(); - - let mut r2 = transition_record( - &build_pending_record("c2", "shell::fs::write", &json!({}), 1_000, 60_000), - "executed", - Some(json!({"ok": true})), - None, - None, - ); - r2.as_object_mut() - .unwrap() - .insert("session_id".into(), Value::String("s1".into())); - bus.set(STATE_SCOPE, &pending_key("s1", "c2"), r2) - .await - .unwrap(); - - let resp = - handle_list_undelivered(&bus, STATE_SCOPE, json!({"session_id": "s1"}), 100_000).await; - let entries = resp["entries"].as_array().unwrap(); - assert_eq!(entries.len(), 1); - assert_eq!(entries[0]["function_call_id"], "c2"); - } - - #[tokio::test] - async fn handle_list_undelivered_returns_empty_when_session_id_missing() { - let bus = InMemoryStateBus::new(); - let resp = handle_list_undelivered(&bus, STATE_SCOPE, json!({}), 1_500).await; - assert_eq!(resp["entries"], json!([])); - } - - #[tokio::test] - async fn handle_ack_delivered_stamps_records_with_turn_id() { - let bus = InMemoryStateBus::new(); - bus.set( - STATE_SCOPE, - &pending_key("s1", "c1"), - transition_record( - &build_pending_record("c1", "shell::fs::write", &json!({}), 1_000, 60_000), - "executed", - Some(json!({"ok": true})), - None, - None, - ), - ) - .await - .unwrap(); - - let resp = handle_ack_delivered( - &bus, - STATE_SCOPE, - json!({ - "session_id": "s1", - "call_ids": ["c1"], - "turn_id": "turn-1", - }), - ) - .await; - assert_eq!(resp["ok"], json!(true)); - assert_eq!(resp["stamped"], json!(1)); - - let rec = bus - .get(STATE_SCOPE, &pending_key("s1", "c1")) - .await - .unwrap(); - assert_eq!(rec["delivered_in_turn_id"], "turn-1"); - } - - #[tokio::test] - async fn handle_ack_delivered_is_idempotent_keeps_first_turn_id() { - let bus = InMemoryStateBus::new(); - bus.set( - STATE_SCOPE, - &pending_key("s1", "c1"), - transition_record( - &build_pending_record("c1", "shell::fs::write", &json!({}), 1_000, 60_000), - "executed", - Some(json!({"ok": true})), - None, - None, - ), - ) - .await - .unwrap(); - - let _ = handle_ack_delivered( - &bus, - STATE_SCOPE, - json!({ - "session_id": "s1", "call_ids": ["c1"], "turn_id": "turn-first", - }), - ) - .await; - let resp = handle_ack_delivered( - &bus, - STATE_SCOPE, - json!({ - "session_id": "s1", "call_ids": ["c1"], "turn_id": "turn-second", - }), - ) - .await; - assert_eq!(resp["stamped"], json!(0), "second ack must not re-stamp"); - - let rec = bus - .get(STATE_SCOPE, &pending_key("s1", "c1")) - .await - .unwrap(); - assert_eq!(rec["delivered_in_turn_id"], "turn-first"); - } - - #[tokio::test] - async fn handle_ack_delivered_skips_unknown_call_ids_silently() { - let bus = InMemoryStateBus::new(); - let resp = handle_ack_delivered( - &bus, - STATE_SCOPE, - json!({ - "session_id": "s1", "call_ids": ["ghost"], "turn_id": "turn-1", - }), - ) - .await; - assert_eq!(resp["ok"], json!(true)); - assert_eq!(resp["stamped"], json!(0)); - } - - #[tokio::test] - async fn handle_resolve_on_expired_pending_flips_to_timed_out_and_ignores_decision() { - let bus = InMemoryStateBus::new(); - let exec = FakeExecutor::default(); - bus.set( - STATE_SCOPE, - &pending_key("s1", "tc-1"), - build_pending_record("tc-1", "shell::fs::write", &json!({}), 1_000, 60_000), - ) - .await - .unwrap(); - - let resp = handle_resolve( - &bus, - &exec, - STATE_SCOPE, - &empty_policy_rules(), - json!({"session_id":"s1","function_call_id":"tc-1","decision":"allow"}), - 70_000, - ) - .await; - assert_eq!(resp["ok"], json!(false)); - assert_eq!(resp["error"], "timed_out"); - - assert!(exec.calls.lock().unwrap().is_empty()); - - let rec = bus - .get(STATE_SCOPE, &pending_key("s1", "tc-1")) - .await - .unwrap(); - assert_eq!(rec["status"], "timed_out"); - } - - #[test] - fn fn_constants_match_spec_strings() { - assert_eq!(FN_RESOLVE, "approval::resolve"); - assert_eq!(FN_LIST_PENDING, "approval::list_pending"); - assert_eq!(FN_LIST_UNDELIVERED, "approval::list_undelivered"); - assert_eq!(FN_ACK_DELIVERED, "approval::ack_delivered"); - assert_eq!(FN_LOOKUP_RECORD, "approval::lookup_record"); - } - - #[test] - fn is_terminal_status_returns_true_for_terminal_states() { - assert!(is_terminal_status("executed")); - assert!(is_terminal_status("failed")); - assert!(is_terminal_status("denied")); - assert!(is_terminal_status("timed_out")); - } - - #[test] - fn is_terminal_status_returns_false_for_in_progress_states() { - assert!(!is_terminal_status("pending")); - assert!(!is_terminal_status("approved")); - assert!(!is_terminal_status("anything_else")); - assert!(!is_terminal_status("")); - } - - #[test] - fn pending_key_includes_session_and_tool_call_id() { - assert_eq!(pending_key("s1", "tc-1"), "s1/tc-1"); - } - - #[test] - fn extract_call_reads_session_id_and_function_call_from_envelope() { - let envelope = json!({ - "event_id": "evt-1", - "reply_stream": "rs-1", - "payload": { - "function_call": { "id": "tc-1", "function_id": "write", "arguments": {"path": "/tmp/x"} }, - "approval_required": ["write"], - "session_id": "s1", - } - }); - let call = extract_call(&envelope).expect("decoded"); - assert_eq!(call.session_id, "s1"); - assert_eq!(call.function_call_id, "tc-1"); - assert_eq!(call.function_id, "write"); - assert_eq!(call.event_id, "evt-1"); - assert_eq!(call.reply_stream, "rs-1"); - assert!(call.approval_required.iter().any(|s| s == "write")); - } - - #[test] - fn extract_call_accepts_legacy_tool_call_envelope_with_name() { - let envelope = json!({ - "event_id": "evt-1", - "reply_stream": "rs-1", - "payload": { - "tool_call": { "id": "tc-1", "name": "write", "arguments": {} }, - "approval_required": ["write"], - "session_id": "s1", - } - }); - let call = extract_call(&envelope).expect("decoded"); - assert_eq!(call.function_call_id, "tc-1"); - assert_eq!(call.function_id, "write"); - } - - #[test] - fn requires_approval_only_for_listed_functions() { - let call = IncomingCall { - session_id: "s1".into(), - function_call_id: "tc-1".into(), - function_id: "ls".into(), - args: json!({}), - approval_required: vec!["write".into()], - event_id: "e".into(), - reply_stream: "r".into(), - }; - assert!(!call.requires_approval()); - - let call2 = IncomingCall { - function_id: "write".into(), - ..call - }; - assert!(call2.requires_approval()); - } - - #[test] - fn build_pending_record_sets_status_and_expiry() { - let now = 1_000_000; - let rec = build_pending_record("tc-1", "write", &json!({"x": 1}), now, 60_000); - assert_eq!(rec["status"], "pending"); - assert_eq!(rec["function_call_id"], "tc-1"); - assert_eq!(rec["expires_at"], 1_060_000); - } - - #[test] - fn block_reply_for_decision_allow_does_not_block() { - let reply = block_reply_for(&Decision::Allow); - assert_eq!(reply["block"], false); - } - - #[test] - fn block_reply_for_deny_emits_structured_denial() { - let reply = block_reply_for(&Decision::Deny(Denial::UserRejected)); - assert_eq!(reply["block"], true); - assert_eq!(reply["denial"]["kind"], "user_rejected"); - assert!(reply.as_object().unwrap().get("reason").is_none()); - } - - #[test] - fn block_reply_for_policy_deny_carries_classifier_detail() { - let reply = block_reply_for(&Decision::Deny(Denial::Policy { - classifier_reason: "command matches denylist".into(), - classifier_fn: "shell::classify_argv".into(), - })); - assert_eq!(reply["block"], true); - assert_eq!(reply["denial"]["kind"], "policy"); - assert_eq!( - reply["denial"]["detail"]["classifier_reason"], - "command matches denylist" - ); - assert_eq!( - reply["denial"]["detail"]["classifier_fn"], - "shell::classify_argv" - ); - } - - #[test] - fn block_reply_for_user_corrected_carries_feedback() { - let reply = block_reply_for(&Decision::Deny(Denial::UserCorrected { - feedback: "use git diff instead".into(), - })); - assert_eq!(reply["denial"]["kind"], "user_corrected"); - assert_eq!( - reply["denial"]["detail"]["feedback"], - "use git diff instead" - ); - } - - #[test] - fn extract_call_returns_none_when_function_call_absent() { - let envelope = json!({ - "event_id": "evt-1", - "reply_stream": "rs-1", - "payload": { "session_id": "s1", "approval_required": ["write"] } - }); - assert!(extract_call(&envelope).is_none()); - } - - #[test] - fn extract_call_returns_none_when_session_id_absent() { - let envelope = json!({ - "event_id": "evt-1", - "reply_stream": "rs-1", - "payload": { - "tool_call": { "id": "tc-1", "name": "write", "arguments": {} } - } - }); - assert!(extract_call(&envelope).is_none()); - } - - #[test] - fn block_reply_for_allow_omits_denial_and_reason() { - let reply = block_reply_for(&Decision::Allow); - assert_eq!(reply["block"], false); - assert!( - reply.get("reason").is_none(), - "Allow must not include reason: {reply}" - ); - assert!( - reply.get("denial").is_none(), - "Allow must not include denial: {reply}" - ); - } - - use std::sync::Mutex; - - fn sample_call() -> IncomingCall { - IncomingCall { - session_id: "s1".into(), - function_call_id: "tc-1".into(), - function_id: "shell::fs::write".into(), - args: json!({"path": "/tmp/a"}), - approval_required: vec!["shell::fs::write".into()], - event_id: "evt-1".into(), - reply_stream: "rs-1".into(), - } - } - - #[tokio::test] - async fn handle_intercept_returns_pending_envelope_when_call_is_gated() { - let bus = InMemoryStateBus::new(); - let call = sample_call(); - let reply = handle_intercept(&bus, STATE_SCOPE, &call, 1_000, 60_000, false).await; - assert_eq!(reply["block"], json!(true)); - assert_eq!(reply["status"], json!("pending")); - assert_eq!(reply["call_id"], json!("tc-1")); - assert_eq!(reply["function_id"], json!("shell::fs::write")); - // Pending status is self-describing — no `reason` or `denial` field - // is emitted while the call is in-flight. - assert!(reply.get("reason").is_none()); - assert!(reply.get("denial").is_none()); - } - - #[tokio::test] - async fn handle_intercept_writes_pending_record_to_state() { - let bus = InMemoryStateBus::new(); - let call = sample_call(); - let _ = handle_intercept(&bus, STATE_SCOPE, &call, 1_000, 60_000, false).await; - let key = pending_key(&call.session_id, &call.function_call_id); - let rec = bus - .get(STATE_SCOPE, &key) - .await - .expect("pending record written"); - assert_eq!(rec["status"], "pending"); - assert_eq!(rec["function_call_id"], "tc-1"); - assert_eq!(rec["expires_at"], 61_000); - } - - #[tokio::test] - async fn handle_intercept_passes_through_when_call_is_not_gated() { - let bus = InMemoryStateBus::new(); - let mut call = sample_call(); - call.approval_required = vec!["other".into()]; - let reply = handle_intercept(&bus, STATE_SCOPE, &call, 1_000, 60_000, false).await; - assert_eq!(reply["block"], json!(false)); - let key = pending_key(&call.session_id, &call.function_call_id); - assert!( - bus.get(STATE_SCOPE, &key).await.is_none(), - "no record written" - ); - } - - #[tokio::test] - async fn handle_intercept_force_pending_writes_when_not_on_required_list() { - let bus = InMemoryStateBus::new(); - let mut call = sample_call(); - call.approval_required = vec!["other".into()]; - let reply = handle_intercept(&bus, STATE_SCOPE, &call, 1_000, 60_000, true).await; - assert_eq!(reply["block"], json!(true)); - assert_eq!(reply["status"], json!("pending")); - let key = pending_key(&call.session_id, &call.function_call_id); - assert!(bus.get(STATE_SCOPE, &key).await.is_some()); - } - - #[tokio::test] - async fn handle_lookup_record_returns_null_when_missing() { - let bus = InMemoryStateBus::new(); - let v = handle_lookup_record( - &bus, - STATE_SCOPE, - json!({"session_id": "s1", "function_call_id": "c1"}), - ) - .await; - assert!(v.is_null()); - } - - #[tokio::test] - async fn handle_lookup_record_returns_record_when_present() { - let bus = InMemoryStateBus::new(); - let call = sample_call(); - let _ = handle_intercept(&bus, STATE_SCOPE, &call, 1_000, 60_000, false).await; - let v = handle_lookup_record( - &bus, - STATE_SCOPE, - json!({"session_id": "s1", "function_call_id": "tc-1"}), - ) - .await; - assert_eq!(v["status"], json!("pending")); - assert_eq!(v["function_id"], json!("shell::fs::write")); - } - - #[derive(Default)] - struct FakeExecutor { - calls: Mutex>, - response: Mutex>>, - } - - #[async_trait::async_trait] - impl FunctionExecutor for FakeExecutor { - async fn invoke( - &self, - function_id: &str, - args: Value, - function_call_id: &str, - session_id: &str, - ) -> Result { - self.calls.lock().unwrap().push(( - function_id.to_string(), - args, - function_call_id.to_string(), - session_id.to_string(), - )); - self.response - .lock() - .unwrap() - .clone() - .unwrap_or_else(|| Ok(json!({"ok": true}))) - } - } - - #[tokio::test] - async fn handle_resolve_allow_invokes_function_and_records_executed() { - let bus = InMemoryStateBus::new(); - let exec = FakeExecutor::default(); - bus.set( - STATE_SCOPE, - &pending_key("s1", "tc-1"), - build_pending_record( - "tc-1", - "shell::fs::write", - &json!({"path":"/a"}), - 1_000, - 60_000, - ), - ) - .await - .unwrap(); - - let resp = handle_resolve( - &bus, - &exec, - STATE_SCOPE, - &empty_policy_rules(), - json!({ - "session_id": "s1", - "function_call_id": "tc-1", - "decision": "allow", - }), - 1_500, - ) - .await; - assert_eq!(resp["ok"], json!(true)); - - let calls = exec.calls.lock().unwrap().clone(); - assert_eq!(calls.len(), 1); - assert_eq!(calls[0].0, "shell::fs::write"); - assert_eq!(calls[0].1, json!({"path":"/a"})); - assert_eq!(calls[0].2, "tc-1"); - assert_eq!(calls[0].3, "s1"); - - let rec = bus - .get(STATE_SCOPE, &pending_key("s1", "tc-1")) - .await - .unwrap(); - assert_eq!(rec["status"], "executed"); - assert_eq!(rec["result"], json!({"ok": true})); - } - - #[tokio::test] - async fn allow_without_always_does_not_cascade() { - // Two pending shell::exec calls in the same session. Resolving - // the first with allow (always=false) must NOT touch the second. - let bus = InMemoryStateBus::new(); - let exec = FakeExecutor::default(); - for cid in ["tc-1", "tc-2"] { - let mut rec = build_pending_record(cid, "shell::exec", &json!({}), 1_000, 60_000); - rec.as_object_mut() - .unwrap() - .insert("session_id".into(), json!("s1")); - bus.set(STATE_SCOPE, &pending_key("s1", cid), rec) - .await - .unwrap(); - } - let rules = empty_policy_rules(); - let resp = handle_resolve( - &bus, - &exec, - STATE_SCOPE, - &rules, - json!({ - "session_id": "s1", - "function_call_id": "tc-1", - "decision": "allow", - }), - 1_500, - ) - .await; - assert_eq!(resp["ok"], true); - assert!( - resp.get("cascaded").is_none(), - "cascaded field must be omitted when always was not set: {resp}" - ); - let other = bus - .get(STATE_SCOPE, &pending_key("s1", "tc-2")) - .await - .unwrap(); - assert_eq!(other["status"], "pending"); - assert_eq!(rules.read().unwrap().len(), 0, "rule must not be pushed"); - } - - #[tokio::test] - async fn allow_with_always_pushes_rule_and_cascades_same_session_pending() { - // Three pending calls in session s1: two shell::exec, one - // shell::fs::write. Resolving the first shell::exec with - // always=true must: - // 1. Push an Allow rule for shell::exec - // 2. Auto-resolve the other shell::exec pending in this session - // 3. Leave the shell::fs::write pending untouched - let bus = InMemoryStateBus::new(); - let exec = FakeExecutor::default(); - for (cid, fn_id) in [ - ("tc-1", "shell::exec"), - ("tc-2", "shell::exec"), - ("tc-3", "shell::fs::write"), - ] { - let mut rec = build_pending_record(cid, fn_id, &json!({}), 1_000, 60_000); - rec.as_object_mut() - .unwrap() - .insert("session_id".into(), json!("s1")); - bus.set(STATE_SCOPE, &pending_key("s1", cid), rec) - .await - .unwrap(); - } - let rules = empty_policy_rules(); - - let resp = handle_resolve( - &bus, - &exec, - STATE_SCOPE, - &rules, - json!({ - "session_id": "s1", - "function_call_id": "tc-1", - "decision": "allow", - "always": true, - }), - 1_500, - ) - .await; - assert_eq!(resp["ok"], true); - assert_eq!( - resp["cascaded"], json!(1), - "tc-2 should cascade; tc-1 originator excluded; tc-3 not matched" - ); - - // The Allow rule for shell::exec is now in the shared ruleset. - let pushed = rules.read().unwrap(); - assert_eq!(pushed.len(), 1); - assert_eq!(pushed[0].permission, "shell::exec"); - assert_eq!(pushed[0].action, rules::Action::Allow); - drop(pushed); - - // Originator and cascaded record both transitioned to executed. - let r1 = bus - .get(STATE_SCOPE, &pending_key("s1", "tc-1")) - .await - .unwrap(); - let r2 = bus - .get(STATE_SCOPE, &pending_key("s1", "tc-2")) - .await - .unwrap(); - let r3 = bus - .get(STATE_SCOPE, &pending_key("s1", "tc-3")) - .await - .unwrap(); - assert_eq!(r1["status"], "executed"); - assert_eq!(r2["status"], "executed"); - assert_eq!( - r3["status"], "pending", - "non-matching function_id must stay pending: {r3}" - ); - - // Executor was invoked twice: originator + cascaded. - assert_eq!(exec.calls.lock().unwrap().len(), 2); - } - - #[tokio::test] - async fn cascade_does_not_cross_session_boundary() { - // tc-1 in session s1, tc-2 in session s2 — both shell::exec. - // Resolving s1/tc-1 with always must not touch s2/tc-2. - let bus = InMemoryStateBus::new(); - let exec = FakeExecutor::default(); - for (session, cid) in [("s1", "tc-1"), ("s2", "tc-2")] { - let mut rec = build_pending_record(cid, "shell::exec", &json!({}), 1_000, 60_000); - rec.as_object_mut() - .unwrap() - .insert("session_id".into(), json!(session)); - bus.set(STATE_SCOPE, &pending_key(session, cid), rec) - .await - .unwrap(); - } - let rules = empty_policy_rules(); - - let resp = handle_resolve( - &bus, - &exec, - STATE_SCOPE, - &rules, - json!({ - "session_id": "s1", - "function_call_id": "tc-1", - "decision": "allow", - "always": true, - }), - 1_500, - ) - .await; - assert_eq!(resp["ok"], true); - assert!( - resp.get("cascaded").is_none() || resp["cascaded"] == json!(0), - "no record in s1 to cascade onto; tc-2 in s2 must NOT be touched: {resp}" - ); - - let other_session = bus - .get(STATE_SCOPE, &pending_key("s2", "tc-2")) - .await - .unwrap(); - assert_eq!(other_session["status"], "pending"); - assert_eq!( - exec.calls.lock().unwrap().len(), - 1, - "only the originator should have been invoked" - ); - } - - #[tokio::test] - async fn cascade_skips_originator_record() { - // Single pending record. always=true must not double-resolve it. - let bus = InMemoryStateBus::new(); - let exec = FakeExecutor::default(); - let mut rec = build_pending_record("tc-1", "shell::exec", &json!({}), 1_000, 60_000); - rec.as_object_mut() - .unwrap() - .insert("session_id".into(), json!("s1")); - bus.set(STATE_SCOPE, &pending_key("s1", "tc-1"), rec) - .await - .unwrap(); - let rules = empty_policy_rules(); - - let resp = handle_resolve( - &bus, - &exec, - STATE_SCOPE, - &rules, - json!({ - "session_id": "s1", - "function_call_id": "tc-1", - "decision": "allow", - "always": true, - }), - 1_500, - ) - .await; - assert_eq!(resp["ok"], true); - // Originator counts under the existing allow path, not the cascade. - assert!(resp.get("cascaded").is_none() || resp["cascaded"] == json!(0)); - assert_eq!(exec.calls.lock().unwrap().len(), 1); - } - - #[tokio::test] - async fn cascade_skips_already_resolved_records_in_session() { - // Two records in s1: tc-1 pending, tc-2 already terminal. The - // cascade must skip tc-2. - let bus = InMemoryStateBus::new(); - let exec = FakeExecutor::default(); - let mut r1 = build_pending_record("tc-1", "shell::exec", &json!({}), 1_000, 60_000); - r1.as_object_mut() - .unwrap() - .insert("session_id".into(), json!("s1")); - bus.set(STATE_SCOPE, &pending_key("s1", "tc-1"), r1) - .await - .unwrap(); - let mut r2 = build_pending_record("tc-2", "shell::exec", &json!({}), 1_000, 60_000); - r2.as_object_mut() - .unwrap() - .insert("session_id".into(), json!("s1")); - let r2_done = transition_record(&r2, "executed", Some(json!({"ok": true})), None, None); - bus.set(STATE_SCOPE, &pending_key("s1", "tc-2"), r2_done) - .await - .unwrap(); - - let rules = empty_policy_rules(); - let resp = handle_resolve( - &bus, - &exec, - STATE_SCOPE, - &rules, - json!({ - "session_id": "s1", - "function_call_id": "tc-1", - "decision": "allow", - "always": true, - }), - 1_500, - ) - .await; - assert_eq!(resp["ok"], true); - // tc-2 is terminal — not pending — so cascade skips it. - assert!(resp.get("cascaded").is_none() || resp["cascaded"] == json!(0)); - } - - #[tokio::test] - async fn handle_resolve_deny_does_not_invoke_function() { - let bus = InMemoryStateBus::new(); - let exec = FakeExecutor::default(); - bus.set( - STATE_SCOPE, - &pending_key("s1", "tc-1"), - build_pending_record("tc-1", "shell::fs::write", &json!({}), 1_000, 60_000), - ) - .await - .unwrap(); - - let resp = handle_resolve( - &bus, - &exec, - STATE_SCOPE, - &empty_policy_rules(), - json!({ - "session_id": "s1", - "function_call_id": "tc-1", - "decision": "deny", - "denial": { - "kind": "user_corrected", - "detail": { "feedback": "not authorized" } - }, - }), - 1_500, - ) - .await; - assert_eq!(resp["ok"], json!(true)); - - assert!(exec.calls.lock().unwrap().is_empty()); - - let rec = bus - .get(STATE_SCOPE, &pending_key("s1", "tc-1")) - .await - .unwrap(); - assert_eq!(rec["status"], "denied"); - assert_eq!(rec["denial"]["kind"], "user_corrected"); - assert_eq!(rec["denial"]["detail"]["feedback"], "not authorized"); - } - - #[tokio::test] - async fn handle_resolve_allow_records_failed_when_function_errors() { - let bus = InMemoryStateBus::new(); - let exec = FakeExecutor::default(); - *exec.response.lock().unwrap() = Some(Err("EACCES".into())); - bus.set( - STATE_SCOPE, - &pending_key("s1", "tc-1"), - build_pending_record("tc-1", "shell::fs::write", &json!({}), 1_000, 60_000), - ) - .await - .unwrap(); - - let resp = handle_resolve( - &bus, - &exec, - STATE_SCOPE, - &empty_policy_rules(), - json!({"session_id":"s1","function_call_id":"tc-1","decision":"allow"}), - 1_500, - ) - .await; - assert_eq!(resp["ok"], json!(true)); - - let rec = bus - .get(STATE_SCOPE, &pending_key("s1", "tc-1")) - .await - .unwrap(); - assert_eq!(rec["status"], "failed"); - assert_eq!(rec["error"], "EACCES"); - } - - #[tokio::test] - async fn fake_executor_records_calls() { - let exec = FakeExecutor::default(); - let out = exec - .invoke("shell::fs::write", json!({"x": 1}), "cid", "sid") - .await - .unwrap(); - assert_eq!(out, json!({"ok": true})); - let calls = exec.calls.lock().unwrap().clone(); - assert_eq!(calls.len(), 1); - assert_eq!(calls[0].0, "shell::fs::write"); - assert_eq!(calls[0].2, "cid"); - assert_eq!(calls[0].3, "sid"); - } - - struct InMemoryStateBus { - store: Mutex>, - } - - impl InMemoryStateBus { - fn new() -> Self { - Self { - store: Mutex::new(std::collections::HashMap::new()), - } - } - } - - #[async_trait::async_trait] - impl StateBus for InMemoryStateBus { - async fn set(&self, scope: &str, key: &str, value: Value) -> Result<(), iii_sdk::IIIError> { - self.store - .lock() - .unwrap() - .insert(format!("{scope}/{key}"), value); - Ok(()) - } - async fn get(&self, scope: &str, key: &str) -> Option { - self.store - .lock() - .unwrap() - .get(&format!("{scope}/{key}")) - .cloned() - } - async fn list_prefix(&self, scope: &str, prefix: &str) -> Vec { - let map = self.store.lock().unwrap(); - map.iter() - .filter(|(k, _)| k.starts_with(&format!("{scope}/{prefix}"))) - .map(|(_, v)| v.clone()) - .collect() - } - } - - #[tokio::test] - async fn resolve_flips_status_when_pending() { - let bus = InMemoryStateBus::new(); - bus.set( - STATE_SCOPE, - &pending_key("s1", "tc-1"), - build_pending_record("tc-1", "write", &json!({}), 0, 60_000), - ) - .await - .unwrap(); - - let exec = FakeExecutor::default(); - let out = handle_resolve( - &bus, - &exec, - STATE_SCOPE, - &empty_policy_rules(), - json!({ - "function_call_id": "tc-1", - "session_id": "s1", - "decision": "allow", - }), - 1_500, - ) - .await; - - assert_eq!(out["ok"], true); - let stored = bus - .get(STATE_SCOPE, &pending_key("s1", "tc-1")) - .await - .unwrap(); - assert_eq!(stored["status"], "executed"); - } - - #[tokio::test] - async fn resolve_accepts_legacy_tool_call_id_field() { - let bus = InMemoryStateBus::new(); - bus.set( - STATE_SCOPE, - &pending_key("s1", "tc-1"), - build_pending_record("tc-1", "write", &json!({}), 0, 60_000), - ) - .await - .unwrap(); - - let exec = FakeExecutor::default(); - let out = handle_resolve( - &bus, - &exec, - STATE_SCOPE, - &empty_policy_rules(), - json!({ - "tool_call_id": "tc-1", - "session_id": "s1", - "decision": "allow", - }), - 1_500, - ) - .await; - - assert_eq!(out["ok"], true); - } - - #[tokio::test] - async fn resolve_rejects_already_resolved_entry() { - let bus = InMemoryStateBus::new(); - let mut rec = build_pending_record("tc-1", "write", &json!({}), 0, 60_000); - rec["status"] = json!("allow"); - bus.set(STATE_SCOPE, &pending_key("s1", "tc-1"), rec) - .await - .unwrap(); - - let exec = FakeExecutor::default(); - let out = handle_resolve( - &bus, - &exec, - STATE_SCOPE, - &empty_policy_rules(), - json!({"function_call_id": "tc-1", "session_id": "s1", "decision": "deny"}), - 1_500, - ) - .await; - assert_eq!(out["ok"], false); - assert_eq!(out["error"], "already_resolved"); - } - - #[tokio::test] - async fn list_pending_returns_only_pending_for_session() { - let bus = InMemoryStateBus::new(); - bus.set( - STATE_SCOPE, - &pending_key("s1", "tc-1"), - build_pending_record("tc-1", "write", &json!({}), 0, 60_000), - ) - .await - .unwrap(); - let mut resolved = build_pending_record("tc-2", "write", &json!({}), 0, 60_000); - resolved["status"] = json!("allow"); - bus.set(STATE_SCOPE, &pending_key("s1", "tc-2"), resolved) - .await - .unwrap(); - bus.set( - STATE_SCOPE, - &pending_key("other", "tc-3"), - build_pending_record("tc-3", "write", &json!({}), 0, 60_000), - ) - .await - .unwrap(); - - let out = handle_list_pending(&bus, STATE_SCOPE, json!({ "session_id": "s1" })).await; - let items = out["pending"].as_array().unwrap(); - assert_eq!(items.len(), 1); - assert_eq!(items[0]["function_call_id"], "tc-1"); - } - - #[tokio::test] - async fn resolve_deny_without_denial_defaults_to_user_rejected() { - let bus = InMemoryStateBus::new(); - let _ = bus - .set( - STATE_SCOPE, - &pending_key("s1", "tc-1"), - build_pending_record("tc-1", "write", &json!({}), 0, 60_000), - ) - .await; - - let exec = FakeExecutor::default(); - let out = handle_resolve( - &bus, - &exec, - STATE_SCOPE, - &empty_policy_rules(), - json!({ - "session_id": "s1", - "function_call_id": "tc-1", - "decision": "deny", - }), - 1_500, - ) - .await; - assert_eq!(out["ok"], true); - - let stored = bus - .get(STATE_SCOPE, &pending_key("s1", "tc-1")) - .await - .unwrap(); - assert_eq!(stored["status"], "denied"); - assert_eq!(stored["denial"]["kind"], "user_rejected"); - } - - #[tokio::test] - async fn resolve_deny_rejects_malformed_denial() { - let bus = InMemoryStateBus::new(); - let _ = bus - .set( - STATE_SCOPE, - &pending_key("s1", "tc-1"), - build_pending_record("tc-1", "write", &json!({}), 0, 60_000), - ) - .await; - - let exec = FakeExecutor::default(); - let out = handle_resolve( - &bus, - &exec, - STATE_SCOPE, - &empty_policy_rules(), - json!({ - "session_id": "s1", - "function_call_id": "tc-1", - "decision": "deny", - "denial": { "kind": "not_a_real_kind" }, - }), - 1_500, - ) - .await; - assert_eq!(out["ok"], false); - assert_eq!(out["error"], "bad_denial"); - } - - #[test] - fn transition_record_to_executed_attaches_result() { - let base = build_pending_record( - "tc-1", - "shell::fs::write", - &json!({"path":"/a"}), - 1_000, - 60_000, - ); - let rec = transition_record(&base, "executed", Some(json!({"ok": true})), None, None); - assert_eq!(rec["status"], "executed"); - assert_eq!(rec["result"], json!({"ok": true})); - assert!(rec.get("error").is_none() || rec["error"].is_null()); - assert_eq!(rec["function_call_id"], "tc-1"); - assert_eq!(rec["function_id"], "shell::fs::write"); - } - - #[test] - fn transition_record_to_failed_attaches_error() { - let base = build_pending_record("tc-1", "shell::fs::write", &json!({}), 1_000, 60_000); - let rec = transition_record(&base, "failed", None, Some("EACCES".into()), None); - assert_eq!(rec["status"], "failed"); - assert_eq!(rec["error"], "EACCES"); - assert!(rec.get("result").is_none() || rec["result"].is_null()); - } - - #[test] - fn transition_record_to_denied_attaches_structured_denial() { - let base = build_pending_record("tc-1", "shell::fs::write", &json!({}), 1_000, 60_000); - let rec = transition_record( - &base, - "denied", - None, - None, - Some(Denial::Policy { - classifier_reason: "not authorized".into(), - classifier_fn: "shell::classify_argv".into(), - }), - ); - assert_eq!(rec["status"], "denied"); - assert_eq!(rec["denial"]["kind"], "policy"); - assert_eq!(rec["denial"]["detail"]["classifier_reason"], "not authorized"); - assert!( - rec.get("decision_reason").is_none(), - "legacy decision_reason must not be written: {rec}" - ); - } - - #[test] - fn transition_record_to_timed_out_carries_no_denial() { - // Timeout status is self-describing — no Denial attached. - let base = build_pending_record("tc-1", "shell::fs::write", &json!({}), 1_000, 60_000); - let rec = transition_record(&base, "timed_out", None, None, None); - assert_eq!(rec["status"], "timed_out"); - assert!(rec.get("denial").is_none()); - assert!(rec.get("decision_reason").is_none()); - } - - #[test] - fn transition_record_preserves_delivered_in_turn_id_when_set() { - let mut base = build_pending_record("tc-1", "shell::fs::write", &json!({}), 1_000, 60_000); - base.as_object_mut().unwrap().insert( - "delivered_in_turn_id".into(), - Value::String("turn-X".into()), - ); - let rec = transition_record(&base, "executed", Some(json!({"ok": true})), None, None); - assert_eq!(rec["delivered_in_turn_id"], "turn-X"); - } - - #[tokio::test] - async fn handle_sweep_session_flips_pending_records_to_timed_out() { - let bus = InMemoryStateBus::new(); - bus.set( - STATE_SCOPE, - &pending_key("s1", "c1"), - build_pending_record("c1", "shell::fs::write", &json!({}), 1_000, 60_000), - ) - .await - .unwrap(); - - let resp = handle_sweep_session(&bus, STATE_SCOPE, json!({"session_id": "s1"})).await; - assert_eq!(resp["swept"], json!(1)); - - let rec = bus - .get(STATE_SCOPE, &pending_key("s1", "c1")) - .await - .unwrap(); - assert_eq!(rec["status"], "timed_out"); - // sweep_session no longer stamps a reason string — timed_out is - // self-describing per the Denial refactor. - assert!(rec.get("denial").is_none()); - assert!(rec.get("decision_reason").is_none()); - } - - #[tokio::test] - async fn handle_sweep_session_ignores_legacy_reason_payload_field() { - // Old callers may still pass `reason` — approval-gate accepts the - // payload but does not persist it. Behavior is identical to a - // bare {session_id} payload. - let bus = InMemoryStateBus::new(); - bus.set( - STATE_SCOPE, - &pending_key("s1", "c1"), - build_pending_record("c1", "shell::fs::write", &json!({}), 1_000, 60_000), - ) - .await - .unwrap(); - let resp = handle_sweep_session( - &bus, - STATE_SCOPE, - json!({"session_id": "s1", "reason": "run_stopped"}), - ) - .await; - assert_eq!(resp["swept"], json!(1)); - let rec = bus - .get(STATE_SCOPE, &pending_key("s1", "c1")) - .await - .unwrap(); - assert_eq!(rec["status"], "timed_out"); - assert!(rec.get("denial").is_none()); - } - - #[tokio::test] - async fn handle_sweep_session_skips_non_pending_records() { - let bus = InMemoryStateBus::new(); - bus.set( - STATE_SCOPE, - &pending_key("s1", "c1"), - transition_record( - &build_pending_record("c1", "shell::fs::write", &json!({}), 1_000, 60_000), - "executed", - Some(json!({"ok": true})), - None, - None, - ), - ) - .await - .unwrap(); - - let resp = handle_sweep_session(&bus, STATE_SCOPE, json!({"session_id": "s1"})).await; - assert_eq!(resp["swept"], json!(0)); - - let rec = bus - .get(STATE_SCOPE, &pending_key("s1", "c1")) - .await - .unwrap(); - assert_eq!(rec["status"], "executed"); - } - - #[tokio::test] - async fn handle_sweep_session_returns_error_when_session_id_missing() { - let bus = InMemoryStateBus::new(); - let resp = handle_sweep_session(&bus, STATE_SCOPE, json!({})).await; - assert_eq!(resp["ok"], json!(false)); - assert_eq!(resp["error"], "missing_session_id"); - assert_eq!(resp["swept"], json!(0)); - } - - // ── New reliability fixes ───────────────────────────────────────────── - - /// A bus that always refuses writes, to exercise fail-closed semantics. - struct FailingStateBus; - - #[async_trait::async_trait] - impl StateBus for FailingStateBus { - async fn set( - &self, - _scope: &str, - _key: &str, - _value: Value, - ) -> Result<(), iii_sdk::IIIError> { - Err(iii_sdk::IIIError::Runtime("kv unreachable".into())) - } - async fn get(&self, _scope: &str, _key: &str) -> Option { - None - } - async fn list_prefix(&self, _scope: &str, _prefix: &str) -> Vec { - Vec::new() - } - } - - #[tokio::test] - async fn handle_intercept_fails_closed_on_state_write_error() { - let bus = FailingStateBus; - let call = sample_call(); - let reply = handle_intercept(&bus, STATE_SCOPE, &call, 1_000, 60_000, false).await; - assert_eq!( - reply["block"], - json!(true), - "state write failure must NOT fail-open" - ); - assert_eq!(reply["status"], json!("denied")); - assert_eq!(reply["denial"]["kind"], json!("state_error")); - assert_eq!( - reply["denial"]["detail"]["phase"], - json!("intercept_write_pending") - ); - // The underlying error message is present but its exact text is - // bus-implementation-specific; just check it's non-empty. - assert!( - reply["denial"]["detail"]["error"] - .as_str() - .map(|s| !s.is_empty()) - .unwrap_or(false), - "state_error detail must include error message: {reply}" - ); - assert_eq!(reply["function_id"], json!("shell::fs::write")); - } - - #[tokio::test] - async fn handle_intercept_stamps_session_id_into_pending_record() { - let bus = InMemoryStateBus::new(); - let call = sample_call(); - let _ = handle_intercept(&bus, STATE_SCOPE, &call, 1_000, 60_000, false).await; - let rec = bus - .get( - STATE_SCOPE, - &pending_key(&call.session_id, &call.function_call_id), - ) - .await - .expect("pending record"); - assert_eq!(rec["session_id"], json!(call.session_id)); - } - - #[test] - fn collect_timed_out_for_sweep_returns_expired_records_with_session_id() { - let mut rec = build_pending_record("tc-1", "shell::fs::write", &json!({}), 0, 60_000); - rec.as_object_mut() - .unwrap() - .insert("session_id".into(), json!("s-42")); - let pile = vec![ - rec.clone(), - build_pending_record("tc-2", "shell::fs::write", &json!({}), 0, 999_999_999), - ]; - let out = collect_timed_out_for_sweep(&pile, 70_000); - assert_eq!(out.len(), 1); - let (key, flipped, session_id, call_id) = &out[0]; - assert_eq!(key, "s-42/tc-1"); - assert_eq!(session_id, "s-42"); - assert_eq!(call_id, "tc-1"); - assert_eq!(flipped["status"], json!("timed_out")); - // Timeout carries no Denial — status is self-describing. - assert!(flipped.get("denial").is_none()); - assert!(flipped.get("decision_reason").is_none()); - } - - #[test] - fn collect_timed_out_for_sweep_skips_records_without_session_id() { - // Legacy row (pre-session_id-stamping fix). The sweeper can't - // address the right session stream, so it must skip silently — - // lazy-flip on read will still pick it up. - let pile = vec![build_pending_record( - "tc-legacy", - "shell::fs::write", - &json!({}), - 0, - 60_000, - )]; - let out = collect_timed_out_for_sweep(&pile, 70_000); - assert!( - out.is_empty(), - "legacy record without session_id must not be swept" - ); - } - - #[test] - fn unverified_marker_targets_lists_unasserted_rules() { - let rules = vec![ - InterceptorRule { - function_id: "shell::exec".into(), - classifier: None, - classifier_timeout_ms: 2000, - inject_approval_marker: true, - marker_target_verified: false, - }, - InterceptorRule { - function_id: "shell::exec_bg".into(), - classifier: None, - classifier_timeout_ms: 2000, - inject_approval_marker: true, - marker_target_verified: true, - }, - InterceptorRule { - function_id: "no_marker::fn".into(), - classifier: None, - classifier_timeout_ms: 2000, - inject_approval_marker: false, - marker_target_verified: false, - }, - ]; - assert_eq!(unverified_marker_targets(&rules), vec!["shell::exec"]); - } - - #[test] - fn unverified_marker_targets_empty_when_all_verified_or_marker_off() { - let rules = vec![ - InterceptorRule { - function_id: "shell::exec".into(), - classifier: None, - classifier_timeout_ms: 2000, - inject_approval_marker: true, - marker_target_verified: true, - }, - InterceptorRule { - function_id: "other".into(), - classifier: None, - classifier_timeout_ms: 2000, - inject_approval_marker: false, - marker_target_verified: false, - }, - ]; - assert!(unverified_marker_targets(&rules).is_empty()); - } - - // ── Boundary + edge-case tests prompted by cargo-mutants survivors ──── - // - // Each test corresponds to a mutant the test suite previously didn't - // catch. Test name → mutated line in src/lib.rs. - - #[tokio::test] - async fn handle_intercept_replay_of_terminal_record_returns_already_resolved() { - // mutant L331: replace `==` with `!=` in the replay defense — if - // flipped, terminal records would be overwritten with fresh pending. - let bus = InMemoryStateBus::new(); - let call = sample_call(); - let key = pending_key(&call.session_id, &call.function_call_id); - let terminal = transition_record( - &build_pending_record( - &call.function_call_id, - &call.function_id, - &call.args, - 0, - 60_000, - ), - "executed", - Some(json!({"ok": true})), - None, - None, - ); - bus.set(STATE_SCOPE, &key, terminal).await.unwrap(); - - let reply = handle_intercept(&bus, STATE_SCOPE, &call, 1_000, 60_000, false).await; - assert_eq!(reply["block"], json!(true)); - assert_eq!(reply["status"], json!("executed")); - // Replay reply: status carries the prior outcome, `replay` discriminator - // says we're echoing rather than denying afresh, and no `denial` is - // synthesized (the historical record is the source of truth). - assert_eq!(reply["replay"], json!("already_resolved")); - assert!(reply.get("denial").is_none()); - assert!(reply.get("reason").is_none()); - - // Crucial: the stored row is still `executed`, not overwritten. - let stored = bus.get(STATE_SCOPE, &key).await.unwrap(); - assert_eq!(stored["status"], json!("executed")); - assert_eq!(stored["result"], json!({"ok": true})); - } - - #[tokio::test] - async fn handle_intercept_replay_of_pending_record_preserves_expires_at() { - // mutant L331: same branch, pending side. New pending must not bump - // the expires_at on the existing row. - let bus = InMemoryStateBus::new(); - let call = sample_call(); - let key = pending_key(&call.session_id, &call.function_call_id); - let pending = build_pending_record( - &call.function_call_id, - &call.function_id, - &call.args, - 0, - 60_000, - ); - bus.set(STATE_SCOPE, &key, pending.clone()).await.unwrap(); - - let _ = handle_intercept(&bus, STATE_SCOPE, &call, 999_000, 60_000, false).await; - let stored = bus.get(STATE_SCOPE, &key).await.unwrap(); - assert_eq!( - stored["expires_at"], pending["expires_at"], - "replay must not bump expires_at on the live row" - ); - } - - #[tokio::test] - async fn handle_lookup_record_rejects_when_only_one_id_is_empty() { - // mutant L395: `||` → `&&` would let one-empty slip through. - let bus = InMemoryStateBus::new(); - let v1 = handle_lookup_record( - &bus, - STATE_SCOPE, - json!({"session_id": "", "function_call_id": "c"}), - ) - .await; - assert!(v1.is_null()); - let v2 = handle_lookup_record( - &bus, - STATE_SCOPE, - json!({"session_id": "s", "function_call_id": ""}), - ) - .await; - assert!(v2.is_null()); - } - - #[tokio::test] - async fn handle_resolve_rejects_when_only_one_id_is_empty() { - // mutant L489: same `||` pattern in handle_resolve guard. - let bus = InMemoryStateBus::new(); - let exec = FakeExecutor::default(); - let r1 = handle_resolve( - &bus, - &exec, - STATE_SCOPE, - &empty_policy_rules(), - json!({"session_id": "", "function_call_id": "c", "decision": "allow"}), - 0, - ) - .await; - assert_eq!(r1["error"], json!("missing_id")); - let r2 = handle_resolve( - &bus, - &exec, - STATE_SCOPE, - &empty_policy_rules(), - json!({"session_id": "s", "function_call_id": "", "decision": "allow"}), - 0, - ) - .await; - assert_eq!(r2["error"], json!("missing_id")); - } - - #[tokio::test] - async fn handle_ack_delivered_returns_zero_when_only_one_field_is_empty() { - // mutant L677: two `||` operators in the empty-field guard. - let bus = InMemoryStateBus::new(); - // empty turn_id - let r1 = handle_ack_delivered( - &bus, - STATE_SCOPE, - json!({"session_id": "s", "turn_id": "", "call_ids": ["c"]}), - ) - .await; - assert_eq!(r1["stamped"], json!(0)); - // empty call_ids - let r2 = handle_ack_delivered( - &bus, - STATE_SCOPE, - json!({"session_id": "s", "turn_id": "t", "call_ids": []}), - ) - .await; - assert_eq!(r2["stamped"], json!(0)); - // empty session_id - let r3 = handle_ack_delivered( - &bus, - STATE_SCOPE, - json!({"session_id": "", "turn_id": "t", "call_ids": ["c"]}), - ) - .await; - assert_eq!(r3["stamped"], json!(0)); - } - - #[test] - fn collect_timed_out_for_sweep_rejects_record_missing_only_call_id() { - // mutant L423: `||` → `&&` would let one-empty records sweep. - let mut rec = build_pending_record("c1", "shell::fs::write", &json!({}), 0, 60_000); - rec.as_object_mut() - .unwrap() - .insert("session_id".into(), json!("s1")); - rec.as_object_mut() - .unwrap() - .insert("function_call_id".into(), json!("")); - let out = collect_timed_out_for_sweep(&[rec], 70_000); - assert!(out.is_empty(), "empty function_call_id must skip sweep"); - } - - #[tokio::test] - async fn handle_intercept_replay_of_approved_record_preserves_state() { - // mutant L331:42 — replace `==` with `!=` on the "approved" side. - // The L331:19 mutation is killed by the *_pending_* test above; - // this one requires an approved record specifically. - let bus = InMemoryStateBus::new(); - let call = sample_call(); - let key = pending_key(&call.session_id, &call.function_call_id); - let approved = transition_record( - &build_pending_record( - &call.function_call_id, - &call.function_id, - &call.args, - 0, - 60_000, - ), - "approved", - None, - None, - None, - ); - bus.set(STATE_SCOPE, &key, approved.clone()).await.unwrap(); - - let _ = handle_intercept(&bus, STATE_SCOPE, &call, 999_000, 60_000, false).await; - let stored = bus.get(STATE_SCOPE, &key).await.unwrap(); - assert_eq!( - stored["status"], - json!("approved"), - "replay of approved row must keep status; mutant would overwrite with pending" - ); - } - - #[tokio::test] - async fn handle_lookup_record_short_circuits_before_bus_get_on_one_empty_id() { - // mutant L395 — `||` → `&&` would let one-empty slip into bus.get. - // Seed a record at the address the mutant would compute (pending_key("", "c") = "/c"), - // so the mutant returns the seeded row while original code stays at Null. - let bus = InMemoryStateBus::new(); - bus.set(STATE_SCOPE, "/c", json!({"sentinel": "should_not_leak"})) - .await - .unwrap(); - let v = handle_lookup_record( - &bus, - STATE_SCOPE, - json!({"session_id": "", "function_call_id": "c"}), - ) - .await; - assert!( - v.is_null(), - "must short-circuit; the seeded sentinel must not leak through" - ); - } - - #[tokio::test] - async fn handle_ack_delivered_short_circuits_before_stamping_on_one_empty_field() { - // mutant L677 — two `||` operators. If either flips to `&&`, the - // function falls through and stamps a record even when a required - // field is empty. Seed a record so the stamping path can be - // observed. - let bus = InMemoryStateBus::new(); - let terminal = transition_record( - &build_pending_record("c", "shell::fs::write", &json!({}), 0, 60_000), - "executed", - Some(json!({"ok": true})), - None, - None, - ); - bus.set(STATE_SCOPE, &pending_key("s", "c"), terminal) - .await - .unwrap(); - - // empty turn_id — must NOT stamp the seeded record. - let r = handle_ack_delivered( - &bus, - STATE_SCOPE, - json!({"session_id": "s", "turn_id": "", "call_ids": ["c"]}), - ) - .await; - assert_eq!(r["stamped"], json!(0)); - let stored = bus.get(STATE_SCOPE, &pending_key("s", "c")).await.unwrap(); - assert!( - stored.get("delivered_in_turn_id").is_none(), - "must not stamp when turn_id is empty; mutant would stamp" - ); - - // empty call_ids — same property. - let r = handle_ack_delivered( - &bus, - STATE_SCOPE, - json!({"session_id": "s", "turn_id": "t", "call_ids": []}), - ) - .await; - assert_eq!(r["stamped"], json!(0)); - let stored = bus.get(STATE_SCOPE, &pending_key("s", "c")).await.unwrap(); - assert!( - stored.get("delivered_in_turn_id").is_none(), - "must not stamp when call_ids is empty" - ); - } - - #[test] - fn maybe_flip_timed_out_flips_at_exact_expires_at() { - // mutant L439: `<` → `<=` would not flip at the exact boundary. - let rec = build_pending_record("c1", "f", &json!({}), 0, 60_000); - // expires_at = 0 + 60_000 = 60_000. At now=60_000 the gate - // considers the record expired (strictly past or AT expiry). - assert!( - maybe_flip_timed_out(&rec, 60_000).is_some(), - "must flip at exactly expires_at" - ); - assert!( - maybe_flip_timed_out(&rec, 59_999).is_none(), - "must not flip one ms before expires_at" - ); - } - - // ── proptest: state-machine invariants ──────────────────────────────── - // - // Random sequences of intercept/resolve/sweep/ack/lazy-flip operations - // on a single (session, call) record. After every step we assert four - // invariants that the lifecycle is supposed to guarantee: - // - // I1. status ∈ {pending, approved, executed, failed, denied, timed_out}. - // Any other string is a corrupt record. - // I2. Once a terminal status is observed, the record never returns to - // `pending`. Terminal = executed | failed | denied | timed_out. - // I3. Every `pending` record carries an `expires_at: u64`. Without it - // the sweeper and lazy-flip paths can't classify the record. - // I4. `delivered_in_turn_id` is monotonic: once a non-null value is - // written it is never unset, never replaced with a different turn. - // - // If any future change can produce a sequence that violates one of - // these, proptest will shrink to the minimal failing sequence and - // surface it as a counterexample. - - use proptest::prelude::*; - - #[derive(Debug, Clone)] - enum Op { - InterceptRequired, - InterceptNotRequired, - ResolveAllow, - ResolveDeny, - AdvanceClockAndLazyFlip, // bumps clock past expires_at, hits list_undelivered - SweepSession, - AckDelivered, - } - - fn arb_op() -> impl Strategy { - prop_oneof![ - Just(Op::InterceptRequired), - Just(Op::InterceptNotRequired), - Just(Op::ResolveAllow), - Just(Op::ResolveDeny), - Just(Op::AdvanceClockAndLazyFlip), - Just(Op::SweepSession), - Just(Op::AckDelivered), - ] - } - - fn make_call(approval_required_self: bool) -> IncomingCall { - IncomingCall { - session_id: "s".into(), - function_call_id: "c".into(), - function_id: "test::write".into(), - args: json!({}), - approval_required: if approval_required_self { - vec!["test::write".into()] - } else { - vec!["other::fn".into()] - }, - event_id: "e".into(), - reply_stream: "r".into(), - } - } - - proptest! { - #![proptest_config(ProptestConfig { - cases: 256, - .. ProptestConfig::default() - })] - - #[test] - fn state_machine_invariants(ops in prop::collection::vec(arb_op(), 1..30)) { - let rt = tokio::runtime::Builder::new_current_thread() - .enable_all() - .build() - .expect("tokio runtime"); - - rt.block_on(async { - let bus = InMemoryStateBus::new(); - let exec = FakeExecutor::default(); - let session_id = "s"; - let call_id = "c"; - let timeout_ms: u64 = 60_000; - let mut now_ms: u64 = 1_000; - - let mut ever_terminal = false; - let mut last_delivered: Option = None; - - for op in &ops { - match op { - Op::InterceptRequired => { - let call = make_call(true); - let _ = handle_intercept(&bus, STATE_SCOPE, &call, now_ms, timeout_ms, false).await; - } - Op::InterceptNotRequired => { - let call = make_call(false); - let _ = handle_intercept(&bus, STATE_SCOPE, &call, now_ms, timeout_ms, false).await; - } - Op::ResolveAllow => { - let _ = handle_resolve( - &bus, - &exec, - STATE_SCOPE, - &empty_policy_rules(), - json!({ - "session_id": session_id, - "function_call_id": call_id, - "decision": "allow", - }), - now_ms, - ) - .await; - } - Op::ResolveDeny => { - let _ = handle_resolve( - &bus, - &exec, - STATE_SCOPE, - &empty_policy_rules(), - json!({ - "session_id": session_id, - "function_call_id": call_id, - "decision": "deny", - }), - now_ms, - ) - .await; - } - Op::AdvanceClockAndLazyFlip => { - now_ms = now_ms.saturating_add(timeout_ms + 1); - let _ = handle_list_undelivered( - &bus, STATE_SCOPE, - json!({ "session_id": session_id }), - now_ms, - ).await; - } - Op::SweepSession => { - let _ = handle_sweep_session( - &bus, STATE_SCOPE, - json!({ "session_id": session_id }), - ).await; - } - Op::AckDelivered => { - let _ = handle_ack_delivered( - &bus, STATE_SCOPE, - json!({ - "session_id": session_id, - "turn_id": format!("turn-{now_ms}"), - "call_ids": [call_id], - }), - ).await; - } - } - - // Assert invariants on whatever the record currently is. - let key = pending_key(session_id, call_id); - let Some(rec) = bus.get(STATE_SCOPE, &key).await else { - // No record yet (e.g. only InterceptNotRequired so far). Skip. - continue; - }; - - // I1: legal status - let status = rec.get("status").and_then(Value::as_str).unwrap_or(""); - assert!( - matches!( - status, - "pending" | "approved" | "executed" | "failed" | "denied" | "timed_out" - ), - "I1 violated: illegal status {status:?} after ops {ops:?}; record={rec:?}" - ); - - // I2: no reverting terminal → pending - if matches!(status, "executed" | "failed" | "denied" | "timed_out") { - ever_terminal = true; - } - if ever_terminal { - assert!( - status != "pending", - "I2 violated: reverted to pending after terminal; ops={ops:?}; record={rec:?}" - ); - } - - // I3: pending records always have expires_at: u64 - if status == "pending" { - let exp = rec.get("expires_at").and_then(Value::as_u64); - assert!( - exp.is_some(), - "I3 violated: pending record missing expires_at; ops={ops:?}; record={rec:?}" - ); - } - - // I4: delivered_in_turn_id is monotonic — once set non-null, never unset / never replaced - let cur_delivered = rec - .get("delivered_in_turn_id") - .and_then(Value::as_str) - .map(str::to_string); - if let Some(prev) = &last_delivered { - match &cur_delivered { - Some(cur) => { - assert_eq!( - cur, prev, - "I4 violated: delivered_in_turn_id replaced {prev:?} → {cur:?}; ops={ops:?}" - ); - } - None => { - panic!( - "I4 violated: delivered_in_turn_id unset after being {prev:?}; ops={ops:?}; record={rec:?}" - ); - } - } - } - if cur_delivered.is_some() { - last_delivered = cur_delivered; - } - } - }); - } - } -} diff --git a/approval-gate/tests/common/mod.rs b/approval-gate/tests/common/mod.rs new file mode 100644 index 00000000..e72961fe --- /dev/null +++ b/approval-gate/tests/common/mod.rs @@ -0,0 +1,140 @@ +//! Shared fakes for the approval-gate test suite. +//! +//! Production code goes through `StateBus` and `FunctionExecutor` traits +//! exactly so unit tests can swap in these in-memory fakes. The trait +//! contracts are documented on the production types; the fakes here +//! mirror the wire shape closely enough that any handler behavior tied +//! to bus semantics surfaces in the tests. + +#![allow(dead_code)] // Individual test binaries pull in subsets of these. + +use std::collections::HashMap; +use std::sync::Mutex; + +use approval_gate::{FunctionExecutor, IncomingCall, StateBus}; +use serde_json::{json, Value}; + +/// Records every invocation and replays a canned response. By default +/// the fake returns `Ok({"ok": true})`; set [`Self::response`] to +/// override. +pub struct FakeExecutor { + pub calls: Mutex>, + pub response: Mutex>>, +} + +impl Default for FakeExecutor { + fn default() -> Self { + Self { + calls: Mutex::new(Vec::new()), + response: Mutex::new(None), + } + } +} + +#[async_trait::async_trait] +impl FunctionExecutor for FakeExecutor { + async fn invoke( + &self, + function_id: &str, + args: Value, + function_call_id: &str, + session_id: &str, + ) -> Result { + self.calls.lock().unwrap().push(( + function_id.to_string(), + args, + function_call_id.to_string(), + session_id.to_string(), + )); + self.response + .lock() + .unwrap() + .clone() + .unwrap_or_else(|| Ok(json!({ "ok": true }))) + } +} + +/// In-memory implementation of [`approval_gate::StateBus`]. Keys are +/// `"/"`; `list_prefix` filters by that flattened prefix +/// (same shape the production iii bus exposes). +pub struct InMemoryStateBus { + store: Mutex>, +} + +impl InMemoryStateBus { + pub fn new() -> Self { + Self { + store: Mutex::new(HashMap::new()), + } + } +} + +#[async_trait::async_trait] +impl StateBus for InMemoryStateBus { + async fn set(&self, scope: &str, key: &str, value: Value) -> Result<(), iii_sdk::IIIError> { + self.store + .lock() + .unwrap() + .insert(format!("{scope}/{key}"), value); + Ok(()) + } + async fn get(&self, scope: &str, key: &str) -> Option { + self.store + .lock() + .unwrap() + .get(&format!("{scope}/{key}")) + .cloned() + } + async fn list_prefix(&self, scope: &str, prefix: &str) -> Vec { + let map = self.store.lock().unwrap(); + map.iter() + .filter(|(k, _)| k.starts_with(&format!("{scope}/{prefix}"))) + .map(|(_, v)| v.clone()) + .collect() + } +} + +/// `StateBus` whose `set` always errors. Used to exercise the gate's +/// fail-closed behavior on transient kv outages. +pub struct FailingStateBus; + +#[async_trait::async_trait] +impl StateBus for FailingStateBus { + async fn set( + &self, + _scope: &str, + _key: &str, + _value: Value, + ) -> Result<(), iii_sdk::IIIError> { + Err(iii_sdk::IIIError::Runtime("kv unreachable".into())) + } + async fn get(&self, _scope: &str, _key: &str) -> Option { + None + } + async fn list_prefix(&self, _scope: &str, _prefix: &str) -> Vec { + Vec::new() + } +} + +/// A canonical `shell::fs::write` call gated by the run's +/// `approval_required` list. Most handler tests use this so the only +/// thing they need to vary is the session/call id + whether the run +/// opts in. +pub fn sample_call() -> IncomingCall { + IncomingCall { + session_id: "s1".into(), + function_call_id: "tc-1".into(), + function_id: "shell::fs::write".into(), + args: json!({ "path": "/tmp/x" }), + approval_required: vec!["shell::fs::write".into()], + event_id: "evt-1".into(), + reply_stream: "rs-1".into(), + } +} + +/// Empty runtime ruleset for handler tests that don't care about the +/// cascade-on-`always` path. Each call freshly constructs the lock so +/// tests stay independent — there's no shared mutable state. +pub fn empty_policy_rules() -> std::sync::RwLock { + std::sync::RwLock::new(approval_gate::rules::Ruleset::new()) +} diff --git a/approval-gate/tests/delivery.rs b/approval-gate/tests/delivery.rs new file mode 100644 index 00000000..9ed3ca4c --- /dev/null +++ b/approval-gate/tests/delivery.rs @@ -0,0 +1,729 @@ +//! Delivery-tracking handlers: list_pending, list_undelivered, +//! ack_delivered, consume_undelivered, flush_delivered, sweep_session. + +mod common; + +use approval_gate::*; +use common::{empty_policy_rules, sample_call, FailingStateBus, FakeExecutor, InMemoryStateBus}; +use serde_json::{json, Value}; +use std::sync::Mutex; + + + + #[tokio::test] + async fn handle_list_undelivered_caps_at_default_limit_and_reports_omitted() { + let bus = InMemoryStateBus::new(); + for i in 0..75 { + let cid = format!("c{i}"); + let mut rec = transition_record_with_now( + &build_pending_record(&cid, "shell::fs::write", &json!({}), 1_000, 60_000), + "executed", + Some(json!({"ok": true})), + None, + None, + 1_000 + i as u64, + ); + rec.as_object_mut() + .unwrap() + .insert("session_id".into(), Value::String("s1".into())); + bus.set(STATE_SCOPE, &pending_key("s1", &cid), rec) + .await + .unwrap(); + } + let resp = + handle_list_undelivered(&bus, STATE_SCOPE, json!({"session_id": "s1"}), 100_000).await; + assert_eq!(resp["entries"].as_array().unwrap().len(), 50); + assert_eq!(resp["omitted"].as_u64(), Some(25)); + } + + + #[tokio::test] + async fn handle_list_undelivered_honors_explicit_limit() { + let bus = InMemoryStateBus::new(); + for i in 0..10 { + let cid = format!("c{i}"); + let mut rec = transition_record_with_now( + &build_pending_record(&cid, "shell::fs::write", &json!({}), 1_000, 60_000), + "executed", + Some(json!({"ok": true})), + None, + None, + 1_000 + i as u64, + ); + rec.as_object_mut() + .unwrap() + .insert("session_id".into(), Value::String("s1".into())); + bus.set(STATE_SCOPE, &pending_key("s1", &cid), rec) + .await + .unwrap(); + } + let resp = handle_list_undelivered( + &bus, + STATE_SCOPE, + json!({"session_id": "s1", "limit": 3}), + 100_000, + ) + .await; + assert_eq!(resp["entries"].as_array().unwrap().len(), 3); + assert_eq!(resp["omitted"].as_u64(), Some(7)); + } + + + #[tokio::test] + async fn handle_list_undelivered_returns_oldest_first_by_resolved_at() { + let bus = InMemoryStateBus::new(); + for (i, ts) in [(0_u32, 5_000_u64), (1, 1_000), (2, 3_000)] { + let cid = format!("c{i}"); + let mut rec = transition_record_with_now( + &build_pending_record(&cid, "shell::fs::write", &json!({}), 1_000, 60_000), + "executed", + Some(json!({"ok": true})), + None, + None, + ts, + ); + rec.as_object_mut() + .unwrap() + .insert("session_id".into(), Value::String("s1".into())); + bus.set(STATE_SCOPE, &pending_key("s1", &cid), rec) + .await + .unwrap(); + } + let resp = handle_list_undelivered( + &bus, + STATE_SCOPE, + json!({"session_id": "s1", "limit": 10}), + 100_000, + ) + .await; + let entries = resp["entries"].as_array().unwrap(); + let ids: Vec<&str> = entries + .iter() + .map(|e| e["function_call_id"].as_str().unwrap()) + .collect(); + assert_eq!(ids, vec!["c1", "c2", "c0"]); + } + + + #[tokio::test] + async fn handle_list_undelivered_omitted_is_zero_when_under_limit() { + let bus = InMemoryStateBus::new(); + let mut rec = transition_record_with_now( + &build_pending_record("c1", "shell::fs::write", &json!({}), 1_000, 60_000), + "executed", + Some(json!({"ok": true})), + None, + None, + 1_500, + ); + rec.as_object_mut() + .unwrap() + .insert("session_id".into(), Value::String("s1".into())); + bus.set(STATE_SCOPE, &pending_key("s1", "c1"), rec) + .await + .unwrap(); + let resp = + handle_list_undelivered(&bus, STATE_SCOPE, json!({"session_id": "s1"}), 100_000).await; + assert_eq!(resp["entries"].as_array().unwrap().len(), 1); + assert_eq!(resp["omitted"].as_u64(), Some(0)); + } + + + #[tokio::test] + async fn handle_consume_undelivered_stamps_returned_entries() { + let bus = InMemoryStateBus::new(); + for i in 0..3 { + let cid = format!("c{i}"); + let mut rec = transition_record_with_now( + &build_pending_record(&cid, "shell::fs::write", &json!({}), 1_000, 60_000), + "executed", + Some(json!({"ok": true})), + None, + None, + 1_000 + i as u64, + ); + rec.as_object_mut() + .unwrap() + .insert("session_id".into(), Value::String("s1".into())); + bus.set(STATE_SCOPE, &pending_key("s1", &cid), rec) + .await + .unwrap(); + } + let resp = handle_consume_undelivered( + &bus, + STATE_SCOPE, + json!({"session_id": "s1", "turn_id": "turn-7", "limit": 10}), + 100_000, + ) + .await; + assert_eq!(resp["ok"], json!(true)); + assert_eq!(resp["entries"].as_array().unwrap().len(), 3); + assert_eq!(resp["omitted"].as_u64(), Some(0)); + let next = + handle_list_undelivered(&bus, STATE_SCOPE, json!({"session_id": "s1"}), 100_000).await; + assert_eq!(next["entries"].as_array().unwrap().len(), 0); + } + + + #[tokio::test] + async fn handle_consume_undelivered_respects_limit_and_leaves_remainder() { + let bus = InMemoryStateBus::new(); + for i in 0..5 { + let cid = format!("c{i}"); + let mut rec = transition_record_with_now( + &build_pending_record(&cid, "shell::fs::write", &json!({}), 1_000, 60_000), + "executed", + Some(json!({"ok": true})), + None, + None, + 1_000 + i as u64, + ); + rec.as_object_mut() + .unwrap() + .insert("session_id".into(), Value::String("s1".into())); + bus.set(STATE_SCOPE, &pending_key("s1", &cid), rec) + .await + .unwrap(); + } + let resp = handle_consume_undelivered( + &bus, + STATE_SCOPE, + json!({"session_id": "s1", "turn_id": "turn-7", "limit": 2}), + 100_000, + ) + .await; + assert_eq!(resp["entries"].as_array().unwrap().len(), 2); + assert_eq!(resp["omitted"].as_u64(), Some(3)); + let next = + handle_list_undelivered(&bus, STATE_SCOPE, json!({"session_id": "s1"}), 100_000).await; + assert_eq!(next["entries"].as_array().unwrap().len(), 3); + } + + + #[tokio::test] + async fn handle_consume_undelivered_missing_turn_id_returns_error() { + let bus = InMemoryStateBus::new(); + let resp = handle_consume_undelivered( + &bus, + STATE_SCOPE, + json!({"session_id": "s1"}), + 100_000, + ) + .await; + assert_eq!(resp["ok"], json!(false)); + assert_eq!(resp["error"], json!("missing_turn_id")); + } + + + #[tokio::test] + async fn handle_flush_delivered_stamps_all_unacked_terminals() { + let bus = InMemoryStateBus::new(); + for i in 0..5 { + let cid = format!("c{i}"); + let mut rec = transition_record_with_now( + &build_pending_record(&cid, "shell::fs::write", &json!({}), 1_000, 60_000), + "executed", + Some(json!({"ok": true})), + None, + None, + 1_000 + i as u64, + ); + rec.as_object_mut() + .unwrap() + .insert("session_id".into(), Value::String("s1".into())); + bus.set(STATE_SCOPE, &pending_key("s1", &cid), rec) + .await + .unwrap(); + } + let resp = handle_flush_delivered( + &bus, + STATE_SCOPE, + json!({"session_id": "s1", "turn_id": "manual-flush"}), + ) + .await; + assert_eq!(resp["ok"], json!(true)); + assert_eq!(resp["stamped"].as_u64(), Some(5)); + let next = + handle_list_undelivered(&bus, STATE_SCOPE, json!({"session_id": "s1"}), 100_000).await; + assert_eq!(next["entries"].as_array().unwrap().len(), 0); + } + + + #[tokio::test] + async fn handle_flush_delivered_skips_pending_records() { + let bus = InMemoryStateBus::new(); + bus.set( + STATE_SCOPE, + &pending_key("s1", "c1"), + build_pending_record("c1", "shell::fs::write", &json!({}), 1_000, 60_000), + ) + .await + .unwrap(); + let resp = handle_flush_delivered( + &bus, + STATE_SCOPE, + json!({"session_id": "s1", "turn_id": "manual-flush"}), + ) + .await; + assert_eq!(resp["stamped"].as_u64(), Some(0)); + let still = bus + .get(STATE_SCOPE, &pending_key("s1", "c1")) + .await + .unwrap(); + assert_eq!(still["status"].as_str(), Some("pending")); + assert!(still.get("delivered_in_turn_id").is_none()); + } + + + #[tokio::test] + async fn handle_flush_delivered_idempotent_on_already_stamped() { + let bus = InMemoryStateBus::new(); + let mut rec = transition_record_with_now( + &build_pending_record("c1", "shell::fs::write", &json!({}), 1_000, 60_000), + "executed", + Some(json!({"ok": true})), + None, + None, + 1_500, + ); + { + let obj = rec.as_object_mut().unwrap(); + obj.insert( + "delivered_in_turn_id".into(), + Value::String("turn-prev".into()), + ); + obj.insert("session_id".into(), Value::String("s1".into())); + } + bus.set(STATE_SCOPE, &pending_key("s1", "c1"), rec) + .await + .unwrap(); + let resp = handle_flush_delivered( + &bus, + STATE_SCOPE, + json!({"session_id": "s1", "turn_id": "manual-flush"}), + ) + .await; + assert_eq!(resp["stamped"].as_u64(), Some(0)); + let still = bus + .get(STATE_SCOPE, &pending_key("s1", "c1")) + .await + .unwrap(); + assert_eq!(still["delivered_in_turn_id"].as_str(), Some("turn-prev")); + } + + + #[tokio::test] + async fn handle_list_undelivered_returns_terminal_records_with_no_delivered_stamp() { + let bus = InMemoryStateBus::new(); + let mut r1 = transition_record( + &build_pending_record("c1", "shell::fs::write", &json!({}), 1_000, 60_000), + "executed", + Some(json!({"ok": true})), + None, + None, + ); + r1.as_object_mut() + .unwrap() + .insert("session_id".into(), Value::String("s1".into())); + bus.set(STATE_SCOPE, &pending_key("s1", "c1"), r1) + .await + .unwrap(); + let mut r2 = transition_record( + &build_pending_record("c2", "shell::fs::write", &json!({}), 1_000, 60_000), + "denied", + None, + None, + Some(Denial::UserCorrected { + feedback: "nope".into(), + }), + ); + r2.as_object_mut() + .unwrap() + .insert("session_id".into(), Value::String("s1".into())); + bus.set(STATE_SCOPE, &pending_key("s1", "c2"), r2) + .await + .unwrap(); + + let resp = + handle_list_undelivered(&bus, STATE_SCOPE, json!({"session_id": "s1"}), 100_000).await; + let entries = resp["entries"].as_array().unwrap(); + assert_eq!(entries.len(), 2); + assert_eq!(resp["omitted"].as_u64(), Some(0)); + } + + + #[tokio::test] + async fn handle_list_undelivered_excludes_pending_records() { + let bus = InMemoryStateBus::new(); + bus.set( + STATE_SCOPE, + &pending_key("s1", "c1"), + build_pending_record("c1", "shell::fs::write", &json!({}), 1_000, 60_000), + ) + .await + .unwrap(); + + let resp = + handle_list_undelivered(&bus, STATE_SCOPE, json!({"session_id": "s1"}), 1_500).await; + assert_eq!(resp["entries"].as_array().unwrap().len(), 0); + } + + + #[tokio::test] + async fn handle_list_undelivered_empty_session_returns_empty() { + let bus = InMemoryStateBus::new(); + let resp = + handle_list_undelivered(&bus, STATE_SCOPE, json!({"session_id": "s1"}), 1_500).await; + assert_eq!(resp["entries"], json!([])); + } + + + #[tokio::test] + async fn handle_list_undelivered_excludes_records_stamped_with_delivered_turn_id() { + let bus = InMemoryStateBus::new(); + let mut rec = transition_record( + &build_pending_record("c1", "shell::fs::write", &json!({}), 1_000, 60_000), + "executed", + Some(json!({"ok": true})), + None, + None, + ); + { + let obj = rec.as_object_mut().unwrap(); + obj.insert( + "delivered_in_turn_id".into(), + Value::String("turn-prev".into()), + ); + obj.insert("session_id".into(), Value::String("s1".into())); + } + bus.set(STATE_SCOPE, &pending_key("s1", "c1"), rec) + .await + .unwrap(); + + let mut r2 = transition_record( + &build_pending_record("c2", "shell::fs::write", &json!({}), 1_000, 60_000), + "executed", + Some(json!({"ok": true})), + None, + None, + ); + r2.as_object_mut() + .unwrap() + .insert("session_id".into(), Value::String("s1".into())); + bus.set(STATE_SCOPE, &pending_key("s1", "c2"), r2) + .await + .unwrap(); + + let resp = + handle_list_undelivered(&bus, STATE_SCOPE, json!({"session_id": "s1"}), 100_000).await; + let entries = resp["entries"].as_array().unwrap(); + assert_eq!(entries.len(), 1); + assert_eq!(entries[0]["function_call_id"], "c2"); + } + + + #[tokio::test] + async fn handle_list_undelivered_returns_empty_when_session_id_missing() { + let bus = InMemoryStateBus::new(); + let resp = handle_list_undelivered(&bus, STATE_SCOPE, json!({}), 1_500).await; + assert_eq!(resp["entries"], json!([])); + } + + + #[tokio::test] + async fn handle_ack_delivered_stamps_records_with_turn_id() { + let bus = InMemoryStateBus::new(); + bus.set( + STATE_SCOPE, + &pending_key("s1", "c1"), + transition_record( + &build_pending_record("c1", "shell::fs::write", &json!({}), 1_000, 60_000), + "executed", + Some(json!({"ok": true})), + None, + None, + ), + ) + .await + .unwrap(); + + let resp = handle_ack_delivered( + &bus, + STATE_SCOPE, + json!({ + "session_id": "s1", + "call_ids": ["c1"], + "turn_id": "turn-1", + }), + ) + .await; + assert_eq!(resp["ok"], json!(true)); + assert_eq!(resp["stamped"], json!(1)); + + let rec = bus + .get(STATE_SCOPE, &pending_key("s1", "c1")) + .await + .unwrap(); + assert_eq!(rec["delivered_in_turn_id"], "turn-1"); + } + + + #[tokio::test] + async fn handle_ack_delivered_is_idempotent_keeps_first_turn_id() { + let bus = InMemoryStateBus::new(); + bus.set( + STATE_SCOPE, + &pending_key("s1", "c1"), + transition_record( + &build_pending_record("c1", "shell::fs::write", &json!({}), 1_000, 60_000), + "executed", + Some(json!({"ok": true})), + None, + None, + ), + ) + .await + .unwrap(); + + let _ = handle_ack_delivered( + &bus, + STATE_SCOPE, + json!({ + "session_id": "s1", "call_ids": ["c1"], "turn_id": "turn-first", + }), + ) + .await; + let resp = handle_ack_delivered( + &bus, + STATE_SCOPE, + json!({ + "session_id": "s1", "call_ids": ["c1"], "turn_id": "turn-second", + }), + ) + .await; + assert_eq!(resp["stamped"], json!(0), "second ack must not re-stamp"); + + let rec = bus + .get(STATE_SCOPE, &pending_key("s1", "c1")) + .await + .unwrap(); + assert_eq!(rec["delivered_in_turn_id"], "turn-first"); + } + + + #[tokio::test] + async fn handle_ack_delivered_skips_unknown_call_ids_silently() { + let bus = InMemoryStateBus::new(); + let resp = handle_ack_delivered( + &bus, + STATE_SCOPE, + json!({ + "session_id": "s1", "call_ids": ["ghost"], "turn_id": "turn-1", + }), + ) + .await; + assert_eq!(resp["ok"], json!(true)); + assert_eq!(resp["stamped"], json!(0)); + } + + + #[tokio::test] + async fn list_pending_returns_only_pending_for_session() { + let bus = InMemoryStateBus::new(); + bus.set( + STATE_SCOPE, + &pending_key("s1", "tc-1"), + build_pending_record("tc-1", "write", &json!({}), 0, 60_000), + ) + .await + .unwrap(); + let mut resolved = build_pending_record("tc-2", "write", &json!({}), 0, 60_000); + resolved["status"] = json!("allow"); + bus.set(STATE_SCOPE, &pending_key("s1", "tc-2"), resolved) + .await + .unwrap(); + bus.set( + STATE_SCOPE, + &pending_key("other", "tc-3"), + build_pending_record("tc-3", "write", &json!({}), 0, 60_000), + ) + .await + .unwrap(); + + let out = handle_list_pending(&bus, STATE_SCOPE, json!({ "session_id": "s1" })).await; + let items = out["pending"].as_array().unwrap(); + assert_eq!(items.len(), 1); + assert_eq!(items[0]["function_call_id"], "tc-1"); + } + + + #[tokio::test] + async fn handle_sweep_session_flips_pending_records_to_timed_out() { + let bus = InMemoryStateBus::new(); + bus.set( + STATE_SCOPE, + &pending_key("s1", "c1"), + build_pending_record("c1", "shell::fs::write", &json!({}), 1_000, 60_000), + ) + .await + .unwrap(); + + let resp = handle_sweep_session(&bus, STATE_SCOPE, json!({"session_id": "s1"})).await; + assert_eq!(resp["swept"], json!(1)); + + let rec = bus + .get(STATE_SCOPE, &pending_key("s1", "c1")) + .await + .unwrap(); + assert_eq!(rec["status"], "timed_out"); + // sweep_session no longer stamps a reason string — timed_out is + // self-describing per the Denial refactor. + assert!(rec.get("denial").is_none()); + assert!(rec.get("decision_reason").is_none()); + } + + + #[tokio::test] + async fn handle_sweep_session_ignores_legacy_reason_payload_field() { + // Old callers may still pass `reason` — approval-gate accepts the + // payload but does not persist it. Behavior is identical to a + // bare {session_id} payload. + let bus = InMemoryStateBus::new(); + bus.set( + STATE_SCOPE, + &pending_key("s1", "c1"), + build_pending_record("c1", "shell::fs::write", &json!({}), 1_000, 60_000), + ) + .await + .unwrap(); + let resp = handle_sweep_session( + &bus, + STATE_SCOPE, + json!({"session_id": "s1", "reason": "run_stopped"}), + ) + .await; + assert_eq!(resp["swept"], json!(1)); + let rec = bus + .get(STATE_SCOPE, &pending_key("s1", "c1")) + .await + .unwrap(); + assert_eq!(rec["status"], "timed_out"); + assert!(rec.get("denial").is_none()); + } + + + #[tokio::test] + async fn handle_sweep_session_skips_non_pending_records() { + let bus = InMemoryStateBus::new(); + bus.set( + STATE_SCOPE, + &pending_key("s1", "c1"), + transition_record( + &build_pending_record("c1", "shell::fs::write", &json!({}), 1_000, 60_000), + "executed", + Some(json!({"ok": true})), + None, + None, + ), + ) + .await + .unwrap(); + + let resp = handle_sweep_session(&bus, STATE_SCOPE, json!({"session_id": "s1"})).await; + assert_eq!(resp["swept"], json!(0)); + + let rec = bus + .get(STATE_SCOPE, &pending_key("s1", "c1")) + .await + .unwrap(); + assert_eq!(rec["status"], "executed"); + } + + + #[tokio::test] + async fn handle_sweep_session_returns_error_when_session_id_missing() { + let bus = InMemoryStateBus::new(); + let resp = handle_sweep_session(&bus, STATE_SCOPE, json!({})).await; + assert_eq!(resp["ok"], json!(false)); + assert_eq!(resp["error"], "missing_session_id"); + assert_eq!(resp["swept"], json!(0)); + } + + + #[tokio::test] + async fn handle_ack_delivered_returns_zero_when_only_one_field_is_empty() { + // mutant L677: two `||` operators in the empty-field guard. + let bus = InMemoryStateBus::new(); + // empty turn_id + let r1 = handle_ack_delivered( + &bus, + STATE_SCOPE, + json!({"session_id": "s", "turn_id": "", "call_ids": ["c"]}), + ) + .await; + assert_eq!(r1["stamped"], json!(0)); + // empty call_ids + let r2 = handle_ack_delivered( + &bus, + STATE_SCOPE, + json!({"session_id": "s", "turn_id": "t", "call_ids": []}), + ) + .await; + assert_eq!(r2["stamped"], json!(0)); + // empty session_id + let r3 = handle_ack_delivered( + &bus, + STATE_SCOPE, + json!({"session_id": "", "turn_id": "t", "call_ids": ["c"]}), + ) + .await; + assert_eq!(r3["stamped"], json!(0)); + } + + + #[tokio::test] + async fn handle_ack_delivered_short_circuits_before_stamping_on_one_empty_field() { + // mutant L677 — two `||` operators. If either flips to `&&`, the + // function falls through and stamps a record even when a required + // field is empty. Seed a record so the stamping path can be + // observed. + let bus = InMemoryStateBus::new(); + let terminal = transition_record( + &build_pending_record("c", "shell::fs::write", &json!({}), 0, 60_000), + "executed", + Some(json!({"ok": true})), + None, + None, + ); + bus.set(STATE_SCOPE, &pending_key("s", "c"), terminal) + .await + .unwrap(); + + // empty turn_id — must NOT stamp the seeded record. + let r = handle_ack_delivered( + &bus, + STATE_SCOPE, + json!({"session_id": "s", "turn_id": "", "call_ids": ["c"]}), + ) + .await; + assert_eq!(r["stamped"], json!(0)); + let stored = bus.get(STATE_SCOPE, &pending_key("s", "c")).await.unwrap(); + assert!( + stored.get("delivered_in_turn_id").is_none(), + "must not stamp when turn_id is empty; mutant would stamp" + ); + + // empty call_ids — same property. + let r = handle_ack_delivered( + &bus, + STATE_SCOPE, + json!({"session_id": "s", "turn_id": "t", "call_ids": []}), + ) + .await; + assert_eq!(r["stamped"], json!(0)); + let stored = bus.get(STATE_SCOPE, &pending_key("s", "c")).await.unwrap(); + assert!( + stored.get("delivered_in_turn_id").is_none(), + "must not stamp when call_ids is empty" + ); + } diff --git a/approval-gate/tests/intercept.rs b/approval-gate/tests/intercept.rs new file mode 100644 index 00000000..fa5a0c35 --- /dev/null +++ b/approval-gate/tests/intercept.rs @@ -0,0 +1,218 @@ +//! handle_intercept — the gate's intercept-time decision path. +//! Covers replay handling, fail-closed on state-write errors, the +//! session_id stamping, and the force_pending classifier branch. + +mod common; + +use approval_gate::*; +use common::{empty_policy_rules, sample_call, FailingStateBus, FakeExecutor, InMemoryStateBus}; +use serde_json::{json, Value}; +use std::sync::Mutex; + + + + #[tokio::test] + async fn handle_intercept_returns_pending_envelope_when_call_is_gated() { + let bus = InMemoryStateBus::new(); + let call = sample_call(); + let reply = handle_intercept(&bus, STATE_SCOPE, &call, 1_000, 60_000, false).await; + assert_eq!(reply["block"], json!(true)); + assert_eq!(reply["status"], json!("pending")); + assert_eq!(reply["call_id"], json!("tc-1")); + assert_eq!(reply["function_id"], json!("shell::fs::write")); + // Pending status is self-describing — no `reason` or `denial` field + // is emitted while the call is in-flight. + assert!(reply.get("reason").is_none()); + assert!(reply.get("denial").is_none()); + } + + + #[tokio::test] + async fn handle_intercept_writes_pending_record_to_state() { + let bus = InMemoryStateBus::new(); + let call = sample_call(); + let _ = handle_intercept(&bus, STATE_SCOPE, &call, 1_000, 60_000, false).await; + let key = pending_key(&call.session_id, &call.function_call_id); + let rec = bus + .get(STATE_SCOPE, &key) + .await + .expect("pending record written"); + assert_eq!(rec["status"], "pending"); + assert_eq!(rec["function_call_id"], "tc-1"); + assert_eq!(rec["expires_at"], 61_000); + } + + + #[tokio::test] + async fn handle_intercept_passes_through_when_call_is_not_gated() { + let bus = InMemoryStateBus::new(); + let mut call = sample_call(); + call.approval_required = vec!["other".into()]; + let reply = handle_intercept(&bus, STATE_SCOPE, &call, 1_000, 60_000, false).await; + assert_eq!(reply["block"], json!(false)); + let key = pending_key(&call.session_id, &call.function_call_id); + assert!( + bus.get(STATE_SCOPE, &key).await.is_none(), + "no record written" + ); + } + + + #[tokio::test] + async fn handle_intercept_force_pending_writes_when_not_on_required_list() { + let bus = InMemoryStateBus::new(); + let mut call = sample_call(); + call.approval_required = vec!["other".into()]; + let reply = handle_intercept(&bus, STATE_SCOPE, &call, 1_000, 60_000, true).await; + assert_eq!(reply["block"], json!(true)); + assert_eq!(reply["status"], json!("pending")); + let key = pending_key(&call.session_id, &call.function_call_id); + assert!(bus.get(STATE_SCOPE, &key).await.is_some()); + } + + + #[tokio::test] + async fn handle_intercept_fails_closed_on_state_write_error() { + let bus = FailingStateBus; + let call = sample_call(); + let reply = handle_intercept(&bus, STATE_SCOPE, &call, 1_000, 60_000, false).await; + assert_eq!( + reply["block"], + json!(true), + "state write failure must NOT fail-open" + ); + assert_eq!(reply["status"], json!("denied")); + assert_eq!(reply["denial"]["kind"], json!("state_error")); + assert_eq!( + reply["denial"]["detail"]["phase"], + json!("intercept_write_pending") + ); + // The underlying error message is present but its exact text is + // bus-implementation-specific; just check it's non-empty. + assert!( + reply["denial"]["detail"]["error"] + .as_str() + .map(|s| !s.is_empty()) + .unwrap_or(false), + "state_error detail must include error message: {reply}" + ); + assert_eq!(reply["function_id"], json!("shell::fs::write")); + } + + + #[tokio::test] + async fn handle_intercept_stamps_session_id_into_pending_record() { + let bus = InMemoryStateBus::new(); + let call = sample_call(); + let _ = handle_intercept(&bus, STATE_SCOPE, &call, 1_000, 60_000, false).await; + let rec = bus + .get( + STATE_SCOPE, + &pending_key(&call.session_id, &call.function_call_id), + ) + .await + .expect("pending record"); + assert_eq!(rec["session_id"], json!(call.session_id)); + } + + + // ── Boundary + edge-case tests prompted by cargo-mutants survivors ──── + // + // Each test corresponds to a mutant the test suite previously didn't + // catch. Test name → mutated line in src/lib.rs. + + #[tokio::test] + async fn handle_intercept_replay_of_terminal_record_returns_already_resolved() { + // mutant L331: replace `==` with `!=` in the replay defense — if + // flipped, terminal records would be overwritten with fresh pending. + let bus = InMemoryStateBus::new(); + let call = sample_call(); + let key = pending_key(&call.session_id, &call.function_call_id); + let terminal = transition_record( + &build_pending_record( + &call.function_call_id, + &call.function_id, + &call.args, + 0, + 60_000, + ), + "executed", + Some(json!({"ok": true})), + None, + None, + ); + bus.set(STATE_SCOPE, &key, terminal).await.unwrap(); + + let reply = handle_intercept(&bus, STATE_SCOPE, &call, 1_000, 60_000, false).await; + assert_eq!(reply["block"], json!(true)); + assert_eq!(reply["status"], json!("executed")); + // Replay reply: status carries the prior outcome, `replay` discriminator + // says we're echoing rather than denying afresh, and no `denial` is + // synthesized (the historical record is the source of truth). + assert_eq!(reply["replay"], json!("already_resolved")); + assert!(reply.get("denial").is_none()); + assert!(reply.get("reason").is_none()); + + // Crucial: the stored row is still `executed`, not overwritten. + let stored = bus.get(STATE_SCOPE, &key).await.unwrap(); + assert_eq!(stored["status"], json!("executed")); + assert_eq!(stored["result"], json!({"ok": true})); + } + + + #[tokio::test] + async fn handle_intercept_replay_of_pending_record_preserves_expires_at() { + // mutant L331: same branch, pending side. New pending must not bump + // the expires_at on the existing row. + let bus = InMemoryStateBus::new(); + let call = sample_call(); + let key = pending_key(&call.session_id, &call.function_call_id); + let pending = build_pending_record( + &call.function_call_id, + &call.function_id, + &call.args, + 0, + 60_000, + ); + bus.set(STATE_SCOPE, &key, pending.clone()).await.unwrap(); + + let _ = handle_intercept(&bus, STATE_SCOPE, &call, 999_000, 60_000, false).await; + let stored = bus.get(STATE_SCOPE, &key).await.unwrap(); + assert_eq!( + stored["expires_at"], pending["expires_at"], + "replay must not bump expires_at on the live row" + ); + } + + + #[tokio::test] + async fn handle_intercept_replay_of_approved_record_preserves_state() { + // mutant L331:42 — replace `==` with `!=` on the "approved" side. + // The L331:19 mutation is killed by the *_pending_* test above; + // this one requires an approved record specifically. + let bus = InMemoryStateBus::new(); + let call = sample_call(); + let key = pending_key(&call.session_id, &call.function_call_id); + let approved = transition_record( + &build_pending_record( + &call.function_call_id, + &call.function_id, + &call.args, + 0, + 60_000, + ), + "approved", + None, + None, + None, + ); + bus.set(STATE_SCOPE, &key, approved.clone()).await.unwrap(); + + let _ = handle_intercept(&bus, STATE_SCOPE, &call, 999_000, 60_000, false).await; + let stored = bus.get(STATE_SCOPE, &key).await.unwrap(); + assert_eq!( + stored["status"], + json!("approved"), + "replay of approved row must keep status; mutant would overwrite with pending" + ); + } diff --git a/approval-gate/tests/lifecycle.rs b/approval-gate/tests/lifecycle.rs new file mode 100644 index 00000000..0dcaff1a --- /dev/null +++ b/approval-gate/tests/lifecycle.rs @@ -0,0 +1,271 @@ +//! Record-lifecycle helpers: build_pending_record, transition_record, +//! maybe_flip_timed_out, collect_timed_out_for_sweep, plus the small +//! is_terminal_status / pending_key utilities. + +mod common; + +use approval_gate::*; +use common::{empty_policy_rules, sample_call, FailingStateBus, FakeExecutor, InMemoryStateBus}; +use serde_json::{json, Value}; +use std::sync::Mutex; + + + + #[test] + fn maybe_flip_timed_out_returns_some_when_pending_and_expired() { + let rec = build_pending_record("tc-1", "shell::fs::write", &json!({}), 1_000, 60_000); + let flipped = maybe_flip_timed_out(&rec, 70_000).expect("should flip"); + assert_eq!(flipped["status"], "timed_out"); + // Timeout carries no Denial — the status alone explains the outcome. + assert!(flipped.get("denial").is_none()); + assert!(flipped.get("decision_reason").is_none()); + } + + + #[test] + fn maybe_flip_timed_out_returns_none_when_pending_and_not_expired() { + let rec = build_pending_record("tc-1", "shell::fs::write", &json!({}), 1_000, 60_000); + assert!(maybe_flip_timed_out(&rec, 60_000).is_none()); + assert!(maybe_flip_timed_out(&rec, 1_500).is_none()); + } + + + #[test] + fn maybe_flip_timed_out_returns_none_when_not_pending() { + let rec = json!({ + "function_call_id": "tc-1", + "status": "executed", + "expires_at": 1_000_u64, + }); + assert!(maybe_flip_timed_out(&rec, 999_999_999).is_none()); + } + + + #[test] + fn transition_record_stamps_resolved_at_for_terminal_status() { + let base = build_pending_record("c1", "shell::fs::write", &json!({}), 1_000, 60_000); + let rec = transition_record_with_now( + &base, + "executed", + Some(json!({"ok": true})), + None, + None, + 12_345, + ); + assert_eq!(rec["resolved_at"].as_u64(), Some(12_345)); + } + + + #[test] + fn transition_record_preserves_existing_resolved_at_on_relift() { + let base = build_pending_record("c1", "shell::fs::write", &json!({}), 1_000, 60_000); + let first = transition_record_with_now( + &base, + "executed", + Some(json!({"ok": true})), + None, + None, + 12_345, + ); + let second = transition_record_with_now( + &first, + "executed", + Some(json!({"ok": true})), + None, + None, + 99_999, + ); + assert_eq!(second["resolved_at"].as_u64(), Some(12_345)); + } + + + #[test] + fn transition_record_does_not_stamp_resolved_at_for_intermediate_status() { + let base = build_pending_record("c1", "shell::fs::write", &json!({}), 1_000, 60_000); + let rec = + transition_record_with_now(&base, "approved", None, None, None, 12_345); + assert!(rec.get("resolved_at").is_none()); + } + + + #[test] + fn is_terminal_status_returns_true_for_terminal_states() { + assert!(is_terminal_status("executed")); + assert!(is_terminal_status("failed")); + assert!(is_terminal_status("denied")); + assert!(is_terminal_status("timed_out")); + } + + + #[test] + fn is_terminal_status_returns_false_for_in_progress_states() { + assert!(!is_terminal_status("pending")); + assert!(!is_terminal_status("approved")); + assert!(!is_terminal_status("anything_else")); + assert!(!is_terminal_status("")); + } + + + #[test] + fn pending_key_includes_session_and_tool_call_id() { + assert_eq!(pending_key("s1", "tc-1"), "s1/tc-1"); + } + + + #[test] + fn build_pending_record_sets_status_and_expiry() { + let now = 1_000_000; + let rec = build_pending_record("tc-1", "write", &json!({"x": 1}), now, 60_000); + assert_eq!(rec["status"], "pending"); + assert_eq!(rec["function_call_id"], "tc-1"); + assert_eq!(rec["expires_at"], 1_060_000); + } + + + #[test] + fn transition_record_to_executed_attaches_result() { + let base = build_pending_record( + "tc-1", + "shell::fs::write", + &json!({"path":"/a"}), + 1_000, + 60_000, + ); + let rec = transition_record(&base, "executed", Some(json!({"ok": true})), None, None); + assert_eq!(rec["status"], "executed"); + assert_eq!(rec["result"], json!({"ok": true})); + assert!(rec.get("error").is_none() || rec["error"].is_null()); + assert_eq!(rec["function_call_id"], "tc-1"); + assert_eq!(rec["function_id"], "shell::fs::write"); + } + + + #[test] + fn transition_record_to_failed_attaches_error() { + let base = build_pending_record("tc-1", "shell::fs::write", &json!({}), 1_000, 60_000); + let rec = transition_record(&base, "failed", None, Some("EACCES".into()), None); + assert_eq!(rec["status"], "failed"); + assert_eq!(rec["error"], "EACCES"); + assert!(rec.get("result").is_none() || rec["result"].is_null()); + } + + + #[test] + fn transition_record_to_denied_attaches_structured_denial() { + let base = build_pending_record("tc-1", "shell::fs::write", &json!({}), 1_000, 60_000); + let rec = transition_record( + &base, + "denied", + None, + None, + Some(Denial::Policy { + classifier_reason: "not authorized".into(), + classifier_fn: "shell::classify_argv".into(), + }), + ); + assert_eq!(rec["status"], "denied"); + assert_eq!(rec["denial"]["kind"], "policy"); + assert_eq!(rec["denial"]["detail"]["classifier_reason"], "not authorized"); + assert!( + rec.get("decision_reason").is_none(), + "legacy decision_reason must not be written: {rec}" + ); + } + + + #[test] + fn transition_record_to_timed_out_carries_no_denial() { + // Timeout status is self-describing — no Denial attached. + let base = build_pending_record("tc-1", "shell::fs::write", &json!({}), 1_000, 60_000); + let rec = transition_record(&base, "timed_out", None, None, None); + assert_eq!(rec["status"], "timed_out"); + assert!(rec.get("denial").is_none()); + assert!(rec.get("decision_reason").is_none()); + } + + + #[test] + fn transition_record_preserves_delivered_in_turn_id_when_set() { + let mut base = build_pending_record("tc-1", "shell::fs::write", &json!({}), 1_000, 60_000); + base.as_object_mut().unwrap().insert( + "delivered_in_turn_id".into(), + Value::String("turn-X".into()), + ); + let rec = transition_record(&base, "executed", Some(json!({"ok": true})), None, None); + assert_eq!(rec["delivered_in_turn_id"], "turn-X"); + } + + + #[test] + fn collect_timed_out_for_sweep_returns_expired_records_with_session_id() { + let mut rec = build_pending_record("tc-1", "shell::fs::write", &json!({}), 0, 60_000); + rec.as_object_mut() + .unwrap() + .insert("session_id".into(), json!("s-42")); + let pile = vec![ + rec.clone(), + build_pending_record("tc-2", "shell::fs::write", &json!({}), 0, 999_999_999), + ]; + let out = collect_timed_out_for_sweep(&pile, 70_000); + assert_eq!(out.len(), 1); + let (key, flipped, session_id, call_id) = &out[0]; + assert_eq!(key, "s-42/tc-1"); + assert_eq!(session_id, "s-42"); + assert_eq!(call_id, "tc-1"); + assert_eq!(flipped["status"], json!("timed_out")); + // Timeout carries no Denial — status is self-describing. + assert!(flipped.get("denial").is_none()); + assert!(flipped.get("decision_reason").is_none()); + } + + + #[test] + fn collect_timed_out_for_sweep_skips_records_without_session_id() { + // Legacy row (pre-session_id-stamping fix). The sweeper can't + // address the right session stream, so it must skip silently — + // lazy-flip on read will still pick it up. + let pile = vec![build_pending_record( + "tc-legacy", + "shell::fs::write", + &json!({}), + 0, + 60_000, + )]; + let out = collect_timed_out_for_sweep(&pile, 70_000); + assert!( + out.is_empty(), + "legacy record without session_id must not be swept" + ); + } + + + #[test] + fn collect_timed_out_for_sweep_rejects_record_missing_only_call_id() { + // mutant L423: `||` → `&&` would let one-empty records sweep. + let mut rec = build_pending_record("c1", "shell::fs::write", &json!({}), 0, 60_000); + rec.as_object_mut() + .unwrap() + .insert("session_id".into(), json!("s1")); + rec.as_object_mut() + .unwrap() + .insert("function_call_id".into(), json!("")); + let out = collect_timed_out_for_sweep(&[rec], 70_000); + assert!(out.is_empty(), "empty function_call_id must skip sweep"); + } + + + #[test] + fn maybe_flip_timed_out_flips_at_exact_expires_at() { + // mutant L439: `<` → `<=` would not flip at the exact boundary. + let rec = build_pending_record("c1", "f", &json!({}), 0, 60_000); + // expires_at = 0 + 60_000 = 60_000. At now=60_000 the gate + // considers the record expired (strictly past or AT expiry). + assert!( + maybe_flip_timed_out(&rec, 60_000).is_some(), + "must flip at exactly expires_at" + ); + assert!( + maybe_flip_timed_out(&rec, 59_999).is_none(), + "must not flip one ms before expires_at" + ); + } diff --git a/approval-gate/tests/misc.rs b/approval-gate/tests/misc.rs new file mode 100644 index 00000000..2cc96f69 --- /dev/null +++ b/approval-gate/tests/misc.rs @@ -0,0 +1,87 @@ +//! Miscellaneous: function-id constants, marker-target validation, +//! and the FakeExecutor recording-of-calls smoke test. + +mod common; + +use approval_gate::*; +use common::{empty_policy_rules, sample_call, FailingStateBus, FakeExecutor, InMemoryStateBus}; +use serde_json::{json, Value}; +use std::sync::Mutex; + + + + #[test] + fn fn_constants_match_spec_strings() { + assert_eq!(FN_RESOLVE, "approval::resolve"); + assert_eq!(FN_LIST_PENDING, "approval::list_pending"); + assert_eq!(FN_LIST_UNDELIVERED, "approval::list_undelivered"); + assert_eq!(FN_ACK_DELIVERED, "approval::ack_delivered"); + assert_eq!(FN_LOOKUP_RECORD, "approval::lookup_record"); + } + + + #[tokio::test] + async fn fake_executor_records_calls() { + let exec = FakeExecutor::default(); + let out = exec + .invoke("shell::fs::write", json!({"x": 1}), "cid", "sid") + .await + .unwrap(); + assert_eq!(out, json!({"ok": true})); + let calls = exec.calls.lock().unwrap().clone(); + assert_eq!(calls.len(), 1); + assert_eq!(calls[0].0, "shell::fs::write"); + assert_eq!(calls[0].2, "cid"); + assert_eq!(calls[0].3, "sid"); + } + + + #[test] + fn unverified_marker_targets_lists_unasserted_rules() { + let rules = vec![ + InterceptorRule { + function_id: "shell::exec".into(), + classifier: None, + classifier_timeout_ms: 2000, + inject_approval_marker: true, + marker_target_verified: false, + }, + InterceptorRule { + function_id: "shell::exec_bg".into(), + classifier: None, + classifier_timeout_ms: 2000, + inject_approval_marker: true, + marker_target_verified: true, + }, + InterceptorRule { + function_id: "no_marker::fn".into(), + classifier: None, + classifier_timeout_ms: 2000, + inject_approval_marker: false, + marker_target_verified: false, + }, + ]; + assert_eq!(unverified_marker_targets(&rules), vec!["shell::exec"]); + } + + + #[test] + fn unverified_marker_targets_empty_when_all_verified_or_marker_off() { + let rules = vec![ + InterceptorRule { + function_id: "shell::exec".into(), + classifier: None, + classifier_timeout_ms: 2000, + inject_approval_marker: true, + marker_target_verified: true, + }, + InterceptorRule { + function_id: "other".into(), + classifier: None, + classifier_timeout_ms: 2000, + inject_approval_marker: false, + marker_target_verified: false, + }, + ]; + assert!(unverified_marker_targets(&rules).is_empty()); + } diff --git a/approval-gate/tests/resolve.rs b/approval-gate/tests/resolve.rs new file mode 100644 index 00000000..537c44ae --- /dev/null +++ b/approval-gate/tests/resolve.rs @@ -0,0 +1,673 @@ +//! Approval-resolve flow: handle_resolve, the cascade-on-`always` sweep, +//! and handle_lookup_record. Uses an InMemoryStateBus + FakeExecutor. + +mod common; + +use approval_gate::*; +use common::{empty_policy_rules, sample_call, FailingStateBus, FakeExecutor, InMemoryStateBus}; +use serde_json::{json, Value}; +use std::sync::Mutex; + + + + #[tokio::test] + async fn handle_resolve_on_expired_pending_flips_to_timed_out_and_ignores_decision() { + let bus = InMemoryStateBus::new(); + let exec = FakeExecutor::default(); + bus.set( + STATE_SCOPE, + &pending_key("s1", "tc-1"), + build_pending_record("tc-1", "shell::fs::write", &json!({}), 1_000, 60_000), + ) + .await + .unwrap(); + + let resp = handle_resolve( + &bus, + &exec, + STATE_SCOPE, + &empty_policy_rules(), + json!({"session_id":"s1","function_call_id":"tc-1","decision":"allow"}), + 70_000, + ) + .await; + assert_eq!(resp["ok"], json!(false)); + assert_eq!(resp["error"], "timed_out"); + + assert!(exec.calls.lock().unwrap().is_empty()); + + let rec = bus + .get(STATE_SCOPE, &pending_key("s1", "tc-1")) + .await + .unwrap(); + assert_eq!(rec["status"], "timed_out"); + } + + + #[tokio::test] + async fn handle_lookup_record_returns_null_when_missing() { + let bus = InMemoryStateBus::new(); + let v = handle_lookup_record( + &bus, + STATE_SCOPE, + json!({"session_id": "s1", "function_call_id": "c1"}), + ) + .await; + assert!(v.is_null()); + } + + + #[tokio::test] + async fn handle_lookup_record_returns_record_when_present() { + let bus = InMemoryStateBus::new(); + let call = sample_call(); + let _ = handle_intercept(&bus, STATE_SCOPE, &call, 1_000, 60_000, false).await; + let v = handle_lookup_record( + &bus, + STATE_SCOPE, + json!({"session_id": "s1", "function_call_id": "tc-1"}), + ) + .await; + assert_eq!(v["status"], json!("pending")); + assert_eq!(v["function_id"], json!("shell::fs::write")); + } + + + #[tokio::test] + async fn handle_resolve_allow_invokes_function_and_records_executed() { + let bus = InMemoryStateBus::new(); + let exec = FakeExecutor::default(); + bus.set( + STATE_SCOPE, + &pending_key("s1", "tc-1"), + build_pending_record( + "tc-1", + "shell::fs::write", + &json!({"path":"/a"}), + 1_000, + 60_000, + ), + ) + .await + .unwrap(); + + let resp = handle_resolve( + &bus, + &exec, + STATE_SCOPE, + &empty_policy_rules(), + json!({ + "session_id": "s1", + "function_call_id": "tc-1", + "decision": "allow", + }), + 1_500, + ) + .await; + assert_eq!(resp["ok"], json!(true)); + + let calls = exec.calls.lock().unwrap().clone(); + assert_eq!(calls.len(), 1); + assert_eq!(calls[0].0, "shell::fs::write"); + assert_eq!(calls[0].1, json!({"path":"/a"})); + assert_eq!(calls[0].2, "tc-1"); + assert_eq!(calls[0].3, "s1"); + + let rec = bus + .get(STATE_SCOPE, &pending_key("s1", "tc-1")) + .await + .unwrap(); + assert_eq!(rec["status"], "executed"); + assert_eq!(rec["result"], json!({"ok": true})); + } + + + #[tokio::test] + async fn allow_without_always_does_not_cascade() { + // Two pending shell::exec calls in the same session. Resolving + // the first with allow (always=false) must NOT touch the second. + let bus = InMemoryStateBus::new(); + let exec = FakeExecutor::default(); + for cid in ["tc-1", "tc-2"] { + let mut rec = build_pending_record(cid, "shell::exec", &json!({}), 1_000, 60_000); + rec.as_object_mut() + .unwrap() + .insert("session_id".into(), json!("s1")); + bus.set(STATE_SCOPE, &pending_key("s1", cid), rec) + .await + .unwrap(); + } + let rules = empty_policy_rules(); + let resp = handle_resolve( + &bus, + &exec, + STATE_SCOPE, + &rules, + json!({ + "session_id": "s1", + "function_call_id": "tc-1", + "decision": "allow", + }), + 1_500, + ) + .await; + assert_eq!(resp["ok"], true); + assert!( + resp.get("cascaded").is_none(), + "cascaded field must be omitted when always was not set: {resp}" + ); + let other = bus + .get(STATE_SCOPE, &pending_key("s1", "tc-2")) + .await + .unwrap(); + assert_eq!(other["status"], "pending"); + assert_eq!(rules.read().unwrap().len(), 0, "rule must not be pushed"); + } + + + #[tokio::test] + async fn allow_with_always_pushes_rule_and_cascades_same_session_pending() { + // Three pending calls in session s1: two shell::exec, one + // shell::fs::write. Resolving the first shell::exec with + // always=true must: + // 1. Push an Allow rule for shell::exec + // 2. Auto-resolve the other shell::exec pending in this session + // 3. Leave the shell::fs::write pending untouched + let bus = InMemoryStateBus::new(); + let exec = FakeExecutor::default(); + for (cid, fn_id) in [ + ("tc-1", "shell::exec"), + ("tc-2", "shell::exec"), + ("tc-3", "shell::fs::write"), + ] { + let mut rec = build_pending_record(cid, fn_id, &json!({}), 1_000, 60_000); + rec.as_object_mut() + .unwrap() + .insert("session_id".into(), json!("s1")); + bus.set(STATE_SCOPE, &pending_key("s1", cid), rec) + .await + .unwrap(); + } + let rules = empty_policy_rules(); + + let resp = handle_resolve( + &bus, + &exec, + STATE_SCOPE, + &rules, + json!({ + "session_id": "s1", + "function_call_id": "tc-1", + "decision": "allow", + "always": true, + }), + 1_500, + ) + .await; + assert_eq!(resp["ok"], true); + assert_eq!( + resp["cascaded"], json!(1), + "tc-2 should cascade; tc-1 originator excluded; tc-3 not matched" + ); + + // The Allow rule for shell::exec is now in the shared ruleset. + let pushed = rules.read().unwrap(); + assert_eq!(pushed.len(), 1); + assert_eq!(pushed[0].permission, "shell::exec"); + assert_eq!(pushed[0].action, rules::Action::Allow); + drop(pushed); + + // Originator and cascaded record both transitioned to executed. + let r1 = bus + .get(STATE_SCOPE, &pending_key("s1", "tc-1")) + .await + .unwrap(); + let r2 = bus + .get(STATE_SCOPE, &pending_key("s1", "tc-2")) + .await + .unwrap(); + let r3 = bus + .get(STATE_SCOPE, &pending_key("s1", "tc-3")) + .await + .unwrap(); + assert_eq!(r1["status"], "executed"); + assert_eq!(r2["status"], "executed"); + assert_eq!( + r3["status"], "pending", + "non-matching function_id must stay pending: {r3}" + ); + + // Executor was invoked twice: originator + cascaded. + assert_eq!(exec.calls.lock().unwrap().len(), 2); + } + + + #[tokio::test] + async fn cascade_does_not_cross_session_boundary() { + // tc-1 in session s1, tc-2 in session s2 — both shell::exec. + // Resolving s1/tc-1 with always must not touch s2/tc-2. + let bus = InMemoryStateBus::new(); + let exec = FakeExecutor::default(); + for (session, cid) in [("s1", "tc-1"), ("s2", "tc-2")] { + let mut rec = build_pending_record(cid, "shell::exec", &json!({}), 1_000, 60_000); + rec.as_object_mut() + .unwrap() + .insert("session_id".into(), json!(session)); + bus.set(STATE_SCOPE, &pending_key(session, cid), rec) + .await + .unwrap(); + } + let rules = empty_policy_rules(); + + let resp = handle_resolve( + &bus, + &exec, + STATE_SCOPE, + &rules, + json!({ + "session_id": "s1", + "function_call_id": "tc-1", + "decision": "allow", + "always": true, + }), + 1_500, + ) + .await; + assert_eq!(resp["ok"], true); + assert!( + resp.get("cascaded").is_none() || resp["cascaded"] == json!(0), + "no record in s1 to cascade onto; tc-2 in s2 must NOT be touched: {resp}" + ); + + let other_session = bus + .get(STATE_SCOPE, &pending_key("s2", "tc-2")) + .await + .unwrap(); + assert_eq!(other_session["status"], "pending"); + assert_eq!( + exec.calls.lock().unwrap().len(), + 1, + "only the originator should have been invoked" + ); + } + + + #[tokio::test] + async fn cascade_skips_originator_record() { + // Single pending record. always=true must not double-resolve it. + let bus = InMemoryStateBus::new(); + let exec = FakeExecutor::default(); + let mut rec = build_pending_record("tc-1", "shell::exec", &json!({}), 1_000, 60_000); + rec.as_object_mut() + .unwrap() + .insert("session_id".into(), json!("s1")); + bus.set(STATE_SCOPE, &pending_key("s1", "tc-1"), rec) + .await + .unwrap(); + let rules = empty_policy_rules(); + + let resp = handle_resolve( + &bus, + &exec, + STATE_SCOPE, + &rules, + json!({ + "session_id": "s1", + "function_call_id": "tc-1", + "decision": "allow", + "always": true, + }), + 1_500, + ) + .await; + assert_eq!(resp["ok"], true); + // Originator counts under the existing allow path, not the cascade. + assert!(resp.get("cascaded").is_none() || resp["cascaded"] == json!(0)); + assert_eq!(exec.calls.lock().unwrap().len(), 1); + } + + + #[tokio::test] + async fn cascade_skips_already_resolved_records_in_session() { + // Two records in s1: tc-1 pending, tc-2 already terminal. The + // cascade must skip tc-2. + let bus = InMemoryStateBus::new(); + let exec = FakeExecutor::default(); + let mut r1 = build_pending_record("tc-1", "shell::exec", &json!({}), 1_000, 60_000); + r1.as_object_mut() + .unwrap() + .insert("session_id".into(), json!("s1")); + bus.set(STATE_SCOPE, &pending_key("s1", "tc-1"), r1) + .await + .unwrap(); + let mut r2 = build_pending_record("tc-2", "shell::exec", &json!({}), 1_000, 60_000); + r2.as_object_mut() + .unwrap() + .insert("session_id".into(), json!("s1")); + let r2_done = transition_record(&r2, "executed", Some(json!({"ok": true})), None, None); + bus.set(STATE_SCOPE, &pending_key("s1", "tc-2"), r2_done) + .await + .unwrap(); + + let rules = empty_policy_rules(); + let resp = handle_resolve( + &bus, + &exec, + STATE_SCOPE, + &rules, + json!({ + "session_id": "s1", + "function_call_id": "tc-1", + "decision": "allow", + "always": true, + }), + 1_500, + ) + .await; + assert_eq!(resp["ok"], true); + // tc-2 is terminal — not pending — so cascade skips it. + assert!(resp.get("cascaded").is_none() || resp["cascaded"] == json!(0)); + } + + + #[tokio::test] + async fn handle_resolve_deny_does_not_invoke_function() { + let bus = InMemoryStateBus::new(); + let exec = FakeExecutor::default(); + bus.set( + STATE_SCOPE, + &pending_key("s1", "tc-1"), + build_pending_record("tc-1", "shell::fs::write", &json!({}), 1_000, 60_000), + ) + .await + .unwrap(); + + let resp = handle_resolve( + &bus, + &exec, + STATE_SCOPE, + &empty_policy_rules(), + json!({ + "session_id": "s1", + "function_call_id": "tc-1", + "decision": "deny", + "denial": { + "kind": "user_corrected", + "detail": { "feedback": "not authorized" } + }, + }), + 1_500, + ) + .await; + assert_eq!(resp["ok"], json!(true)); + + assert!(exec.calls.lock().unwrap().is_empty()); + + let rec = bus + .get(STATE_SCOPE, &pending_key("s1", "tc-1")) + .await + .unwrap(); + assert_eq!(rec["status"], "denied"); + assert_eq!(rec["denial"]["kind"], "user_corrected"); + assert_eq!(rec["denial"]["detail"]["feedback"], "not authorized"); + } + + + #[tokio::test] + async fn handle_resolve_allow_records_failed_when_function_errors() { + let bus = InMemoryStateBus::new(); + let exec = FakeExecutor::default(); + *exec.response.lock().unwrap() = Some(Err("EACCES".into())); + bus.set( + STATE_SCOPE, + &pending_key("s1", "tc-1"), + build_pending_record("tc-1", "shell::fs::write", &json!({}), 1_000, 60_000), + ) + .await + .unwrap(); + + let resp = handle_resolve( + &bus, + &exec, + STATE_SCOPE, + &empty_policy_rules(), + json!({"session_id":"s1","function_call_id":"tc-1","decision":"allow"}), + 1_500, + ) + .await; + assert_eq!(resp["ok"], json!(true)); + + let rec = bus + .get(STATE_SCOPE, &pending_key("s1", "tc-1")) + .await + .unwrap(); + assert_eq!(rec["status"], "failed"); + assert_eq!(rec["error"], "EACCES"); + } + + + #[tokio::test] + async fn resolve_flips_status_when_pending() { + let bus = InMemoryStateBus::new(); + bus.set( + STATE_SCOPE, + &pending_key("s1", "tc-1"), + build_pending_record("tc-1", "write", &json!({}), 0, 60_000), + ) + .await + .unwrap(); + + let exec = FakeExecutor::default(); + let out = handle_resolve( + &bus, + &exec, + STATE_SCOPE, + &empty_policy_rules(), + json!({ + "function_call_id": "tc-1", + "session_id": "s1", + "decision": "allow", + }), + 1_500, + ) + .await; + + assert_eq!(out["ok"], true); + let stored = bus + .get(STATE_SCOPE, &pending_key("s1", "tc-1")) + .await + .unwrap(); + assert_eq!(stored["status"], "executed"); + } + + + #[tokio::test] + async fn resolve_accepts_legacy_tool_call_id_field() { + let bus = InMemoryStateBus::new(); + bus.set( + STATE_SCOPE, + &pending_key("s1", "tc-1"), + build_pending_record("tc-1", "write", &json!({}), 0, 60_000), + ) + .await + .unwrap(); + + let exec = FakeExecutor::default(); + let out = handle_resolve( + &bus, + &exec, + STATE_SCOPE, + &empty_policy_rules(), + json!({ + "tool_call_id": "tc-1", + "session_id": "s1", + "decision": "allow", + }), + 1_500, + ) + .await; + + assert_eq!(out["ok"], true); + } + + + #[tokio::test] + async fn resolve_rejects_already_resolved_entry() { + let bus = InMemoryStateBus::new(); + let mut rec = build_pending_record("tc-1", "write", &json!({}), 0, 60_000); + rec["status"] = json!("allow"); + bus.set(STATE_SCOPE, &pending_key("s1", "tc-1"), rec) + .await + .unwrap(); + + let exec = FakeExecutor::default(); + let out = handle_resolve( + &bus, + &exec, + STATE_SCOPE, + &empty_policy_rules(), + json!({"function_call_id": "tc-1", "session_id": "s1", "decision": "deny"}), + 1_500, + ) + .await; + assert_eq!(out["ok"], false); + assert_eq!(out["error"], "already_resolved"); + } + + + #[tokio::test] + async fn resolve_deny_without_denial_defaults_to_user_rejected() { + let bus = InMemoryStateBus::new(); + let _ = bus + .set( + STATE_SCOPE, + &pending_key("s1", "tc-1"), + build_pending_record("tc-1", "write", &json!({}), 0, 60_000), + ) + .await; + + let exec = FakeExecutor::default(); + let out = handle_resolve( + &bus, + &exec, + STATE_SCOPE, + &empty_policy_rules(), + json!({ + "session_id": "s1", + "function_call_id": "tc-1", + "decision": "deny", + }), + 1_500, + ) + .await; + assert_eq!(out["ok"], true); + + let stored = bus + .get(STATE_SCOPE, &pending_key("s1", "tc-1")) + .await + .unwrap(); + assert_eq!(stored["status"], "denied"); + assert_eq!(stored["denial"]["kind"], "user_rejected"); + } + + + #[tokio::test] + async fn resolve_deny_rejects_malformed_denial() { + let bus = InMemoryStateBus::new(); + let _ = bus + .set( + STATE_SCOPE, + &pending_key("s1", "tc-1"), + build_pending_record("tc-1", "write", &json!({}), 0, 60_000), + ) + .await; + + let exec = FakeExecutor::default(); + let out = handle_resolve( + &bus, + &exec, + STATE_SCOPE, + &empty_policy_rules(), + json!({ + "session_id": "s1", + "function_call_id": "tc-1", + "decision": "deny", + "denial": { "kind": "not_a_real_kind" }, + }), + 1_500, + ) + .await; + assert_eq!(out["ok"], false); + assert_eq!(out["error"], "bad_denial"); + } + + + #[tokio::test] + async fn handle_lookup_record_rejects_when_only_one_id_is_empty() { + // mutant L395: `||` → `&&` would let one-empty slip through. + let bus = InMemoryStateBus::new(); + let v1 = handle_lookup_record( + &bus, + STATE_SCOPE, + json!({"session_id": "", "function_call_id": "c"}), + ) + .await; + assert!(v1.is_null()); + let v2 = handle_lookup_record( + &bus, + STATE_SCOPE, + json!({"session_id": "s", "function_call_id": ""}), + ) + .await; + assert!(v2.is_null()); + } + + + #[tokio::test] + async fn handle_resolve_rejects_when_only_one_id_is_empty() { + // mutant L489: same `||` pattern in handle_resolve guard. + let bus = InMemoryStateBus::new(); + let exec = FakeExecutor::default(); + let r1 = handle_resolve( + &bus, + &exec, + STATE_SCOPE, + &empty_policy_rules(), + json!({"session_id": "", "function_call_id": "c", "decision": "allow"}), + 0, + ) + .await; + assert_eq!(r1["error"], json!("missing_id")); + let r2 = handle_resolve( + &bus, + &exec, + STATE_SCOPE, + &empty_policy_rules(), + json!({"session_id": "s", "function_call_id": "", "decision": "allow"}), + 0, + ) + .await; + assert_eq!(r2["error"], json!("missing_id")); + } + + + #[tokio::test] + async fn handle_lookup_record_short_circuits_before_bus_get_on_one_empty_id() { + // mutant L395 — `||` → `&&` would let one-empty slip into bus.get. + // Seed a record at the address the mutant would compute (pending_key("", "c") = "/c"), + // so the mutant returns the seeded row while original code stays at Null. + let bus = InMemoryStateBus::new(); + bus.set(STATE_SCOPE, "/c", json!({"sentinel": "should_not_leak"})) + .await + .unwrap(); + let v = handle_lookup_record( + &bus, + STATE_SCOPE, + json!({"session_id": "", "function_call_id": "c"}), + ) + .await; + assert!( + v.is_null(), + "must short-circuit; the seeded sentinel must not leak through" + ); + } diff --git a/approval-gate/tests/state_machine.rs b/approval-gate/tests/state_machine.rs new file mode 100644 index 00000000..d42d2d57 --- /dev/null +++ b/approval-gate/tests/state_machine.rs @@ -0,0 +1,209 @@ +//! Property-based state-machine invariants. Drives the gate through +//! random sequences of (intercept, resolve, sweep, ack, ...) ops and +//! asserts the four invariants documented in the test body. + + +mod common; + +use approval_gate::*; +use common::{empty_policy_rules, FakeExecutor, InMemoryStateBus}; +use proptest::prelude::*; +use serde_json::{json, Value}; + + + +#[derive(Debug, Clone)] +enum Op { + InterceptRequired, + InterceptNotRequired, + ResolveAllow, + ResolveDeny, + AdvanceClockAndLazyFlip, + SweepSession, + AckDelivered, +} + +fn arb_op() -> impl Strategy { + prop_oneof![ + Just(Op::InterceptRequired), + Just(Op::InterceptNotRequired), + Just(Op::ResolveAllow), + Just(Op::ResolveDeny), + Just(Op::AdvanceClockAndLazyFlip), + Just(Op::SweepSession), + Just(Op::AckDelivered), + ] +} + +fn make_call(approval_required_self: bool) -> IncomingCall { + IncomingCall { + session_id: "s".into(), + function_call_id: "c".into(), + function_id: "test::write".into(), + args: json!({}), + approval_required: if approval_required_self { + vec!["test::write".into()] + } else { + vec!["other::fn".into()] + }, + event_id: "e".into(), + reply_stream: "r".into(), + } +} + + + + proptest! { + #![proptest_config(ProptestConfig { + cases: 256, + .. ProptestConfig::default() + })] + + #[test] + fn state_machine_invariants(ops in prop::collection::vec(arb_op(), 1..30)) { + let rt = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + .expect("tokio runtime"); + + rt.block_on(async { + let bus = InMemoryStateBus::new(); + let exec = FakeExecutor::default(); + let session_id = "s"; + let call_id = "c"; + let timeout_ms: u64 = 60_000; + let mut now_ms: u64 = 1_000; + + let mut ever_terminal = false; + let mut last_delivered: Option = None; + + for op in &ops { + match op { + Op::InterceptRequired => { + let call = make_call(true); + let _ = handle_intercept(&bus, STATE_SCOPE, &call, now_ms, timeout_ms, false).await; + } + Op::InterceptNotRequired => { + let call = make_call(false); + let _ = handle_intercept(&bus, STATE_SCOPE, &call, now_ms, timeout_ms, false).await; + } + Op::ResolveAllow => { + let _ = handle_resolve( + &bus, + &exec, + STATE_SCOPE, + &empty_policy_rules(), + json!({ + "session_id": session_id, + "function_call_id": call_id, + "decision": "allow", + }), + now_ms, + ) + .await; + } + Op::ResolveDeny => { + let _ = handle_resolve( + &bus, + &exec, + STATE_SCOPE, + &empty_policy_rules(), + json!({ + "session_id": session_id, + "function_call_id": call_id, + "decision": "deny", + }), + now_ms, + ) + .await; + } + Op::AdvanceClockAndLazyFlip => { + now_ms = now_ms.saturating_add(timeout_ms + 1); + let _ = handle_list_undelivered( + &bus, STATE_SCOPE, + json!({ "session_id": session_id }), + now_ms, + ).await; + } + Op::SweepSession => { + let _ = handle_sweep_session( + &bus, STATE_SCOPE, + json!({ "session_id": session_id }), + ).await; + } + Op::AckDelivered => { + let _ = handle_ack_delivered( + &bus, STATE_SCOPE, + json!({ + "session_id": session_id, + "turn_id": format!("turn-{now_ms}"), + "call_ids": [call_id], + }), + ).await; + } + } + + // Assert invariants on whatever the record currently is. + let key = pending_key(session_id, call_id); + let Some(rec) = bus.get(STATE_SCOPE, &key).await else { + // No record yet (e.g. only InterceptNotRequired so far). Skip. + continue; + }; + + // I1: legal status + let status = rec.get("status").and_then(Value::as_str).unwrap_or(""); + assert!( + matches!( + status, + "pending" | "approved" | "executed" | "failed" | "denied" | "timed_out" + ), + "I1 violated: illegal status {status:?} after ops {ops:?}; record={rec:?}" + ); + + // I2: no reverting terminal → pending + if matches!(status, "executed" | "failed" | "denied" | "timed_out") { + ever_terminal = true; + } + if ever_terminal { + assert!( + status != "pending", + "I2 violated: reverted to pending after terminal; ops={ops:?}; record={rec:?}" + ); + } + + // I3: pending records always have expires_at: u64 + if status == "pending" { + let exp = rec.get("expires_at").and_then(Value::as_u64); + assert!( + exp.is_some(), + "I3 violated: pending record missing expires_at; ops={ops:?}; record={rec:?}" + ); + } + + // I4: delivered_in_turn_id is monotonic — once set non-null, never unset / never replaced + let cur_delivered = rec + .get("delivered_in_turn_id") + .and_then(Value::as_str) + .map(str::to_string); + if let Some(prev) = &last_delivered { + match &cur_delivered { + Some(cur) => { + assert_eq!( + cur, prev, + "I4 violated: delivered_in_turn_id replaced {prev:?} → {cur:?}; ops={ops:?}" + ); + } + None => { + panic!( + "I4 violated: delivered_in_turn_id unset after being {prev:?}; ops={ops:?}; record={rec:?}" + ); + } + } + } + if cur_delivered.is_some() { + last_delivered = cur_delivered; + } + } + }); + } + } diff --git a/approval-gate/tests/wire.rs b/approval-gate/tests/wire.rs new file mode 100644 index 00000000..4cee9e04 --- /dev/null +++ b/approval-gate/tests/wire.rs @@ -0,0 +1,156 @@ +//! Wire-shape helpers: extract_call envelope parsing, block_reply_for +//! hook reply, IncomingCall::requires_approval semantics. + +mod common; + +use approval_gate::*; +use common::{empty_policy_rules, sample_call, FailingStateBus, FakeExecutor, InMemoryStateBus}; +use serde_json::{json, Value}; +use std::sync::Mutex; + + + + #[test] + fn extract_call_reads_session_id_and_function_call_from_envelope() { + let envelope = json!({ + "event_id": "evt-1", + "reply_stream": "rs-1", + "payload": { + "function_call": { "id": "tc-1", "function_id": "write", "arguments": {"path": "/tmp/x"} }, + "approval_required": ["write"], + "session_id": "s1", + } + }); + let call = extract_call(&envelope).expect("decoded"); + assert_eq!(call.session_id, "s1"); + assert_eq!(call.function_call_id, "tc-1"); + assert_eq!(call.function_id, "write"); + assert_eq!(call.event_id, "evt-1"); + assert_eq!(call.reply_stream, "rs-1"); + assert!(call.approval_required.iter().any(|s| s == "write")); + } + + + #[test] + fn extract_call_accepts_legacy_tool_call_envelope_with_name() { + let envelope = json!({ + "event_id": "evt-1", + "reply_stream": "rs-1", + "payload": { + "tool_call": { "id": "tc-1", "name": "write", "arguments": {} }, + "approval_required": ["write"], + "session_id": "s1", + } + }); + let call = extract_call(&envelope).expect("decoded"); + assert_eq!(call.function_call_id, "tc-1"); + assert_eq!(call.function_id, "write"); + } + + + #[test] + fn requires_approval_only_for_listed_functions() { + let call = IncomingCall { + session_id: "s1".into(), + function_call_id: "tc-1".into(), + function_id: "ls".into(), + args: json!({}), + approval_required: vec!["write".into()], + event_id: "e".into(), + reply_stream: "r".into(), + }; + assert!(!call.requires_approval()); + + let call2 = IncomingCall { + function_id: "write".into(), + ..call + }; + assert!(call2.requires_approval()); + } + + + #[test] + fn block_reply_for_decision_allow_does_not_block() { + let reply = block_reply_for(&Decision::Allow); + assert_eq!(reply["block"], false); + } + + + #[test] + fn block_reply_for_deny_emits_structured_denial() { + let reply = block_reply_for(&Decision::Deny(Denial::UserRejected)); + assert_eq!(reply["block"], true); + assert_eq!(reply["denial"]["kind"], "user_rejected"); + assert!(reply.as_object().unwrap().get("reason").is_none()); + } + + + #[test] + fn block_reply_for_policy_deny_carries_classifier_detail() { + let reply = block_reply_for(&Decision::Deny(Denial::Policy { + classifier_reason: "command matches denylist".into(), + classifier_fn: "shell::classify_argv".into(), + })); + assert_eq!(reply["block"], true); + assert_eq!(reply["denial"]["kind"], "policy"); + assert_eq!( + reply["denial"]["detail"]["classifier_reason"], + "command matches denylist" + ); + assert_eq!( + reply["denial"]["detail"]["classifier_fn"], + "shell::classify_argv" + ); + } + + + #[test] + fn block_reply_for_user_corrected_carries_feedback() { + let reply = block_reply_for(&Decision::Deny(Denial::UserCorrected { + feedback: "use git diff instead".into(), + })); + assert_eq!(reply["denial"]["kind"], "user_corrected"); + assert_eq!( + reply["denial"]["detail"]["feedback"], + "use git diff instead" + ); + } + + + #[test] + fn extract_call_returns_none_when_function_call_absent() { + let envelope = json!({ + "event_id": "evt-1", + "reply_stream": "rs-1", + "payload": { "session_id": "s1", "approval_required": ["write"] } + }); + assert!(extract_call(&envelope).is_none()); + } + + + #[test] + fn extract_call_returns_none_when_session_id_absent() { + let envelope = json!({ + "event_id": "evt-1", + "reply_stream": "rs-1", + "payload": { + "tool_call": { "id": "tc-1", "name": "write", "arguments": {} } + } + }); + assert!(extract_call(&envelope).is_none()); + } + + + #[test] + fn block_reply_for_allow_omits_denial_and_reason() { + let reply = block_reply_for(&Decision::Allow); + assert_eq!(reply["block"], false); + assert!( + reply.get("reason").is_none(), + "Allow must not include reason: {reply}" + ); + assert!( + reply.get("denial").is_none(), + "Allow must not include denial: {reply}" + ); + } From 0c6f95f6dd84821206cc7631837a2ff09971e2db Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Sat, 16 May 2026 00:02:28 -0300 Subject: [PATCH 15/30] feat(approval-gate): new Record schema with Pending|InFlight|Done lifecycle MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pending → InFlight → Done(Outcome). InFlight is the intermediate persist that closes the dup-exec race within a worker process — a second approval::resolve arriving during the invoke await sees the row is non-Pending and bails. Outcome is a tagged enum (Executed/Failed/Denied/TimedOut). resolved_at is stamped on the first non-Pending transition for deterministic multi-row consume ordering. lifecycle.rs is deleted; its only surviving helper (flipped_to_timed_out_if_expired) is now a Record method. --- approval-gate/src/lib.rs | 7 +- approval-gate/src/lifecycle.rs | 140 ------------ approval-gate/src/record.rs | 405 ++++++++++++++++----------------- 3 files changed, 194 insertions(+), 358 deletions(-) delete mode 100644 approval-gate/src/lifecycle.rs diff --git a/approval-gate/src/lib.rs b/approval-gate/src/lib.rs index e181db08..6fa4cf43 100644 --- a/approval-gate/src/lib.rs +++ b/approval-gate/src/lib.rs @@ -5,7 +5,6 @@ pub mod config; pub mod delivery; pub mod intercept; -pub mod lifecycle; pub mod manifest; pub mod record; pub mod register; @@ -21,11 +20,7 @@ pub use delivery::{ handle_list_undelivered, handle_sweep_session, LIST_UNDELIVERED_DEFAULT_LIMIT, }; pub use intercept::handle_intercept; -pub use lifecycle::{ - build_pending_record, collect_timed_out_for_sweep, is_terminal_status, maybe_flip_timed_out, - transition_record, transition_record_with_now, -}; -pub use record::{Next, Record, Status}; +pub use record::{Outcome, Record, Status}; pub use register::{ register, Refs, FN_ACK_DELIVERED, FN_CONSUME_UNDELIVERED, FN_FLUSH_DELIVERED, FN_LIST_PENDING, FN_LIST_UNDELIVERED, FN_LOOKUP_RECORD, FN_RESOLVE, FN_SWEEP_SESSION, STATE_SCOPE, diff --git a/approval-gate/src/lifecycle.rs b/approval-gate/src/lifecycle.rs deleted file mode 100644 index 1b7c684c..00000000 --- a/approval-gate/src/lifecycle.rs +++ /dev/null @@ -1,140 +0,0 @@ -//! Persisted-record lifecycle helpers. -//! -//! Pure functions that construct and transition the `Value`-blob record -//! schema as it lives in the iii state bus. No I/O, no async — the only -//! impurity is reading the system clock via [`transition_record`], whose -//! testable variant [`transition_record_with_now`] takes `now_ms` -//! directly. (Operators adopting the typed schema can read the same -//! shape via [`crate::record::Record`] / [`crate::record::Record::from_value`].) -//! -//! The wire keys (`status`, `function_call_id`, `expires_at`, -//! `resolved_at`, `result`, `error`, `denial`, `delivered_in_turn_id`) -//! are stable contract; renaming requires a state-store migration. The -//! `denial` field is documented at [`crate::wire::Denial`]. - -use serde_json::{json, Value}; - -use crate::wire::{pending_key, Denial}; - -/// True if `status` is one of the terminal states a stitched system message -/// should be built from. `pending` and `approved` are intermediate. -pub fn is_terminal_status(status: &str) -> bool { - matches!(status, "executed" | "failed" | "denied" | "timed_out") -} - -/// Build a fresh pending record. `session_id` is unset here — -/// `handle_intercept` stamps it before persisting. `expires_at` is -/// `now_ms + timeout_ms`, saturating on overflow so a buggy caller -/// can't underflow the deadline. -pub fn build_pending_record( - function_call_id: &str, - function_id: &str, - args: &Value, - now_ms: u64, - timeout_ms: u64, -) -> Value { - json!({ - "function_call_id": function_call_id, - "function_id": function_id, - "args": args, - "status": "pending", - "expires_at": now_ms.saturating_add(timeout_ms), - }) -} - -/// Build a new record by transitioning a pending base record to a terminal -/// status. All terminal fields (`result`, `error`, `denial`) are optional; -/// only the ones provided are attached. Existing fields on the base -/// (including `delivered_in_turn_id` and `resolved_at` if present) are -/// preserved. The first transition into a terminal status stamps -/// `resolved_at`. -pub fn transition_record( - base: &Value, - new_status: &str, - result: Option, - error: Option, - denial: Option, -) -> Value { - let now_ms = std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .map(|d| d.as_millis() as u64) - .unwrap_or(0); - transition_record_with_now(base, new_status, result, error, denial, now_ms) -} - -/// Testable variant of [`transition_record`] that takes `now_ms` directly. -pub fn transition_record_with_now( - base: &Value, - new_status: &str, - result: Option, - error: Option, - denial: Option, - now_ms: u64, -) -> Value { - let mut rec = base.clone(); - if let Some(obj) = rec.as_object_mut() { - obj.insert("status".into(), Value::String(new_status.to_string())); - if let Some(r) = result { - obj.insert("result".into(), r); - } - if let Some(e) = error { - obj.insert("error".into(), Value::String(e)); - } - if let Some(d) = denial { - obj.insert( - "denial".into(), - serde_json::to_value(&d).expect("Denial is always serializable"), - ); - } - if is_terminal_status(new_status) && !obj.contains_key("resolved_at") { - obj.insert("resolved_at".into(), Value::Number(now_ms.into())); - } - } - rec -} - -/// For a bag of pending records, return the subset that have expired at -/// `now_ms` along with the metadata needed to commit the flip and notify the -/// owning session. Records without a stamped `session_id` (legacy rows -/// written before that field existed) are skipped — they'll still be picked -/// up lazily by `handle_list_undelivered` on the next read. -pub fn collect_timed_out_for_sweep( - records: &[Value], - now_ms: u64, -) -> Vec<(String, Value, String, String)> { - records - .iter() - .filter_map(|rec| { - let flipped = maybe_flip_timed_out(rec, now_ms)?; - let session_id = flipped - .get("session_id") - .and_then(Value::as_str)? - .to_string(); - let function_call_id = flipped - .get("function_call_id") - .and_then(Value::as_str)? - .to_string(); - if session_id.is_empty() || function_call_id.is_empty() { - return None; - } - let key = pending_key(&session_id, &function_call_id); - Some((key, flipped, session_id, function_call_id)) - }) - .collect() -} - -/// Return Some(timed_out_record) if `rec` is pending and `now_ms` is past -/// `expires_at`; otherwise None. Pure function — does not write state. -pub fn maybe_flip_timed_out(rec: &Value, now_ms: u64) -> Option { - if rec.get("status").and_then(Value::as_str) != Some("pending") { - return None; - } - let exp = rec.get("expires_at").and_then(Value::as_u64)?; - if now_ms < exp { - return None; - } - // Timeout flip carries no Denial: the `timed_out` status itself is the - // explanation. Downstream renderers (turn-orchestrator stitching, UIs) - // branch on the status, not on a redundant reason string. - Some(transition_record(rec, "timed_out", None, None, None)) -} diff --git a/approval-gate/src/record.rs b/approval-gate/src/record.rs index ed8d0400..3ea2bdb8 100644 --- a/approval-gate/src/record.rs +++ b/approval-gate/src/record.rs @@ -1,106 +1,74 @@ -//! Typed approval record schema. +//! Approval-gate record schema. //! -//! Replaces the ad-hoc `serde_json::Value` blobs that handlers used to -//! pass around. The Rust shape mirrors the persisted JSON exactly via -//! serde — wire compatibility is the contract, this is just the in-process -//! representation. +//! `Pending → InFlight → Done(Outcome)`. The intermediate InFlight write +//! between operator-approve and the executor `iii.trigger` is what closes +//! the duplicate-execution race — a second `approval::resolve` arriving +//! during the invoke await sees a non-Pending row and bails. //! -//! ## Lifecycle -//! -//! ```text -//! intercept → Pending ──user allow──> Approved ──invoke──> Executed -//! │ └invoke-err──> Failed -//! ├──user deny──> Denied -//! └──expires──> TimedOut -//! ``` -//! -//! Outcome data (result / error / denial) is required on the terminal -//! status it belongs to and meaningless elsewhere. [`Next`] enforces this -//! at the type level so transitions can't be miscalled. +//! `lifecycle.rs` is gone; its only surviving helper +//! (`flipped_to_timed_out_if_expired`) lives here as a `Record` method +//! because it operates on a `Record`. use serde::{Deserialize, Serialize}; use serde_json::Value; -use crate::Denial; +use crate::wire::Denial; -/// Lifecycle status of an approval record. Wire format is snake_case so -/// it stays human-readable in iii-state dumps and audit logs. +/// Lifecycle status. Wire format is snake_case so iii-state dumps stay +/// human-readable. #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] #[serde(rename_all = "snake_case")] pub enum Status { + /// Waiting for the operator's decision (no outcome attached). Pending, - Approved, - Executed, - Failed, - Denied, - TimedOut, + /// Operator approved; underlying `iii.trigger` is in flight. Persisted + /// to close the dup-exec race across concurrent `approval::resolve` + /// calls within a worker process. + InFlight, + /// Terminal. `outcome` is `Some`. + Done, } -impl Status { - /// `true` for statuses that represent a final outcome — anything - /// stitchable into the LLM's next turn. `Pending` and `Approved` are - /// intermediate; the rest are terminal. - pub fn is_terminal(self) -> bool { - matches!( - self, - Status::Executed | Status::Failed | Status::Denied | Status::TimedOut - ) - } +/// Outcome data attached to terminal records. Tagged enum on the wire +/// (`{ "kind": "...", "detail": { ... } }`). +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(tag = "kind", content = "detail", rename_all = "snake_case")] +pub enum Outcome { + Executed { result: Value }, + Failed { error: String }, + Denied { denial: Denial }, + TimedOut, } -/// Persisted approval record. Wire-compatible with the historical -/// JSON shape — every field uses the same key/type the previous -/// `serde_json::Value`-based code emitted, so existing iii-state -/// rows deserialize cleanly. -/// -/// Optional fields are scoped to particular statuses (e.g. `result` -/// only when `status == Executed`); the type itself does not enforce -/// that pairing — [`Next`] does, at the transition boundary. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +/// Persisted approval record. +#[derive(Debug, Clone, Serialize, Deserialize)] pub struct Record { pub function_call_id: String, pub function_id: String, pub args: Value, - pub status: Status, + pub session_id: String, pub expires_at: u64, - - /// Stamped by `handle_intercept` after the pending record is built so - /// the timeout sweeper can address the right session stream. + pub status: Status, + /// `Some` iff `status == Done`. Constructors enforce this invariant. #[serde(default, skip_serializing_if = "Option::is_none")] - pub session_id: Option, - - /// Unix ms of the first transition into a terminal status. + pub outcome: Option, + /// Unix ms stamped on the first non-Pending transition. `handle_consume` + /// sorts entries by this field so multi-row consumes (cascade case) + /// produce deterministic LLM message order. Provider-minted + /// `function_call_id` (Anthropic `toolu_*`, OpenAI `call_*`) is not + /// lex-monotonic and can't substitute. #[serde(default, skip_serializing_if = "Option::is_none")] pub resolved_at: Option, - - /// Function output. Present iff `status == Executed`. - #[serde(default, skip_serializing_if = "Option::is_none")] - pub result: Option, - - /// Function error string. Present iff `status == Failed`. - #[serde(default, skip_serializing_if = "Option::is_none")] - pub error: Option, - - /// Structured deny payload. Present iff `status == Denied`. - #[serde(default, skip_serializing_if = "Option::is_none")] - pub denial: Option, - - /// Set when `approval::ack_delivered` stamps the record with the turn - /// id that surfaced it to the LLM. Records without this stamp surface - /// again on subsequent `approval::list_undelivered` calls. - #[serde(default, skip_serializing_if = "Option::is_none")] - pub delivered_in_turn_id: Option, } impl Record { - /// Construct a fresh pending record. `session_id` is unset here — - /// `handle_intercept` stamps it before persisting. `expires_at` is - /// `now_ms + timeout_ms`, saturating on overflow so a malicious - /// or buggy caller can't underflow the deadline. - pub fn new_pending( + /// Fresh Pending row. `expires_at = now_ms + timeout_ms`, saturating on + /// overflow so a buggy caller can't underflow the deadline. + pub fn pending( function_call_id: String, function_id: String, args: Value, + session_id: String, now_ms: u64, timeout_ms: u64, ) -> Self { @@ -108,58 +76,70 @@ impl Record { function_call_id, function_id, args, - status: Status::Pending, + session_id, expires_at: now_ms.saturating_add(timeout_ms), - session_id: None, + status: Status::Pending, + outcome: None, resolved_at: None, - result: None, - error: None, - denial: None, - delivered_in_turn_id: None, } } - /// Serialize to the wire JSON shape. Infallible — `serde_json::to_value` - /// on a struct with only serializable fields cannot fail at runtime. - pub fn to_value(&self) -> Value { - serde_json::to_value(self).expect("Record is always serializable") + /// Pending → InFlight. Stamps `resolved_at` (the "first non-Pending" + /// marker for ordering). Caller is responsible for ensuring the row + /// was actually Pending before calling; this is enforced at the + /// callsite (`handle_resolve`) via a Status check. + pub fn in_flight(self, now_ms: u64) -> Self { + Self { + status: Status::InFlight, + resolved_at: Some(self.resolved_at.unwrap_or(now_ms)), + ..self + } } - /// Deserialize from the wire JSON shape. Returns `None` if the value - /// doesn't match the schema (missing required fields, bad status enum, - /// etc.) — handlers treat that as "skip this record" rather than - /// crashing on corrupt state. - pub fn from_value(v: Value) -> Option { - serde_json::from_value(v).ok() + /// InFlight → Done. Preserves `resolved_at` from the InFlight write + /// (so audit timestamps reflect when the row left Pending, not when + /// the invoke finished). + pub fn done(self, outcome: Outcome) -> Self { + Self { + status: Status::Done, + outcome: Some(outcome), + ..self + } } -} -/// What `transition_record` should change. Each variant pairs the target -/// [`Status`] with whatever outcome data that status carries. The type -/// system makes invalid combinations unrepresentable: you can't ask for -/// `Executed` without providing a result, or for `Denied` without a -/// `Denial`. `Approved` is an intermediate status carrying no outcome — -/// it exists so the bus can observe the post-allow / pre-invoke state. -#[derive(Debug, Clone)] -pub enum Next { - Approved, - Executed { result: Value }, - Failed { error: String }, - Denied { denial: Denial }, - TimedOut, -} + /// Pending → Done directly (deny path, timeout flip — paths that + /// don't run an invoke). Stamps `resolved_at` with `now_ms`. + pub fn done_at(self, now_ms: u64, outcome: Outcome) -> Self { + Self { + status: Status::Done, + outcome: Some(outcome), + resolved_at: Some(self.resolved_at.unwrap_or(now_ms)), + ..self + } + } -impl Next { - /// The target status this transition moves the record to. - pub fn status(&self) -> Status { - match self { - Next::Approved => Status::Approved, - Next::Executed { .. } => Status::Executed, - Next::Failed { .. } => Status::Failed, - Next::Denied { .. } => Status::Denied, - Next::TimedOut => Status::TimedOut, + /// Lazy timeout flip. Returns `Some(flipped)` iff the row is Pending + /// AND `now_ms >= expires_at`. InFlight rows are owned by an + /// in-progress invoke and are never touched here. Done rows are + /// already terminal. + pub fn flipped_to_timed_out_if_expired(&self, now_ms: u64) -> Option { + if self.status == Status::Pending && now_ms >= self.expires_at { + Some(self.clone().done_at(now_ms, Outcome::TimedOut)) + } else { + None } } + + /// Wire JSON shape (infallible — only serializable fields). + pub fn to_value(&self) -> Value { + serde_json::to_value(self).expect("Record is always serializable") + } + + /// Parse from wire JSON. `None` means the row doesn't match the + /// schema; callers skip such rows. + pub fn from_value(v: Value) -> Option { + serde_json::from_value(v).ok() + } } #[cfg(test)] @@ -167,127 +147,128 @@ mod tests { use super::*; use serde_json::json; + fn pending_record() -> Record { + Record::pending( + "tc-1".into(), + "shell::exec".into(), + json!({"command": "ls"}), + "sess_a".into(), + 1_000, + 60_000, + ) + } + #[test] - fn status_is_terminal_matches_lifecycle() { - assert!(!Status::Pending.is_terminal()); - assert!(!Status::Approved.is_terminal()); - assert!(Status::Executed.is_terminal()); - assert!(Status::Failed.is_terminal()); - assert!(Status::Denied.is_terminal()); - assert!(Status::TimedOut.is_terminal()); + fn pending_has_no_outcome_and_no_resolved_at() { + let r = pending_record(); + assert_eq!(r.status, Status::Pending); + assert!(r.outcome.is_none()); + assert!(r.resolved_at.is_none()); + assert_eq!(r.expires_at, 61_000); } #[test] - fn status_serializes_as_snake_case_string() { - assert_eq!(serde_json::to_value(Status::Pending).unwrap(), json!("pending")); - assert_eq!(serde_json::to_value(Status::Approved).unwrap(), json!("approved")); - assert_eq!(serde_json::to_value(Status::Executed).unwrap(), json!("executed")); - assert_eq!(serde_json::to_value(Status::Failed).unwrap(), json!("failed")); - assert_eq!(serde_json::to_value(Status::Denied).unwrap(), json!("denied")); - assert_eq!(serde_json::to_value(Status::TimedOut).unwrap(), json!("timed_out")); + fn pending_expires_at_saturates_on_overflow() { + let r = Record::pending( + "tc-1".into(), "f".into(), json!({}), "s".into(), u64::MAX - 5, 100); + assert_eq!(r.expires_at, u64::MAX); } #[test] - fn status_round_trips_via_json() { - for s in [ - Status::Pending, - Status::Approved, - Status::Executed, - Status::Failed, - Status::Denied, - Status::TimedOut, + fn in_flight_preserves_fields_and_clears_outcome_state() { + let p = pending_record(); + let i = p.clone().in_flight(2_000); + assert_eq!(i.status, Status::InFlight); + assert_eq!(i.function_call_id, p.function_call_id); + assert_eq!(i.session_id, p.session_id); + assert_eq!(i.args, p.args); + assert!(i.outcome.is_none()); + assert_eq!(i.resolved_at, Some(2_000), "InFlight stamps resolved_at"); + } + + #[test] + fn done_stamps_outcome_and_preserves_in_flight_resolved_at() { + let i = pending_record().in_flight(2_000); + let d = i.clone().done(Outcome::Executed { result: json!({"ok": true}) }); + assert_eq!(d.status, Status::Done); + assert!(matches!(d.outcome, Some(Outcome::Executed { .. }))); + // resolved_at was set at InFlight time and must NOT be re-stamped on Done. + assert_eq!(d.resolved_at, Some(2_000)); + } + + #[test] + fn done_directly_from_pending_stamps_resolved_at() { + // Deny path skips InFlight; we still need a resolved_at for ordering. + let p = pending_record(); + let d = p.done_at(3_000, Outcome::Denied { denial: Denial::UserRejected }); + assert_eq!(d.status, Status::Done); + assert_eq!(d.resolved_at, Some(3_000)); + } + + #[test] + fn outcome_round_trip_via_json() { + for o in [ + Outcome::Executed { result: json!({"x": 1}) }, + Outcome::Failed { error: "boom".into() }, + Outcome::Denied { denial: Denial::UserRejected }, + Outcome::TimedOut, ] { - let v = serde_json::to_value(s).unwrap(); - let back: Status = serde_json::from_value(v).unwrap(); - assert_eq!(back, s); + let v = serde_json::to_value(&o).unwrap(); + let back: Outcome = serde_json::from_value(v).unwrap(); + // Exhaustive equality is verbose; just round-trip the discriminant. + assert_eq!(std::mem::discriminant(&o), std::mem::discriminant(&back)); } } #[test] - fn record_pending_round_trips() { - let rec = Record::new_pending( - "c1".into(), - "shell::exec".into(), - json!({"command": "ls"}), - 1_000, - 60_000, - ); - let v = serde_json::to_value(&rec).unwrap(); - assert_eq!(v["function_call_id"], "c1"); - assert_eq!(v["function_id"], "shell::exec"); - assert_eq!(v["status"], "pending"); - assert_eq!(v["expires_at"], 61_000); - // Optional fields are omitted when None. - assert!(v.as_object().unwrap().get("session_id").is_none()); - assert!(v.as_object().unwrap().get("denial").is_none()); - let back: Record = serde_json::from_value(v).unwrap(); - assert_eq!(back, rec); + fn record_round_trip_pending() { + let r = pending_record(); + let v = r.to_value(); + let back = Record::from_value(v).expect("deserialize"); + assert_eq!(back.status, Status::Pending); + assert_eq!(back.function_call_id, "tc-1"); + } + + #[test] + fn record_round_trip_done_carries_outcome_and_resolved_at() { + let r = pending_record() + .in_flight(2_000) + .done(Outcome::Executed { result: json!({"out": "hi"}) }); + let v = r.to_value(); + let back = Record::from_value(v).expect("deserialize"); + assert_eq!(back.status, Status::Done); + assert_eq!(back.resolved_at, Some(2_000)); + assert!(matches!(back.outcome, Some(Outcome::Executed { .. }))); } #[test] - fn record_with_optional_fields_round_trips() { - let rec = Record { - function_call_id: "c1".into(), - function_id: "shell::exec".into(), - args: json!({}), - status: Status::Executed, - expires_at: 60_000, - session_id: Some("s1".into()), - resolved_at: Some(5_000), - result: Some(json!({"ok": true})), - error: None, - denial: None, - delivered_in_turn_id: Some("turn-X".into()), - }; - let v = serde_json::to_value(&rec).unwrap(); - let back: Record = serde_json::from_value(v).unwrap(); - assert_eq!(back, rec); + fn flip_returns_none_when_not_expired() { + let r = pending_record(); + assert!(r.flipped_to_timed_out_if_expired(60_000).is_none()); } #[test] - fn record_pending_expires_at_saturates_on_overflow() { - let rec = Record::new_pending( - "c1".into(), - "f".into(), - json!({}), - u64::MAX - 5, - 100, - ); - assert_eq!(rec.expires_at, u64::MAX); + fn flip_returns_done_timed_out_for_expired_pending() { + let r = pending_record(); + let flipped = r.flipped_to_timed_out_if_expired(70_000) + .expect("expired pending should flip"); + assert_eq!(flipped.status, Status::Done); + assert!(matches!(flipped.outcome, Some(Outcome::TimedOut))); + assert_eq!(flipped.resolved_at, Some(70_000)); } #[test] - fn record_deserializes_from_wire_with_unknown_extra_fields() { - // Forward-compat: unknown fields are silently ignored so a worker - // can read a record written by a newer worker version without - // crashing on schema additions it doesn't know about yet. - let v = json!({ - "function_call_id": "c1", - "function_id": "f", - "args": {}, - "status": "pending", - "expires_at": 1000, - "future_field": "some new thing", - }); - let rec: Record = serde_json::from_value(v).unwrap(); - assert_eq!(rec.status, Status::Pending); + fn flip_does_not_touch_in_flight_rows() { + let r = pending_record().in_flight(2_000); + assert!(r.flipped_to_timed_out_if_expired(70_000).is_none(), + "InFlight rows are owned by an in-progress invoke; lazy flip must not steal them"); } #[test] - fn next_status_pairing_is_correct() { - assert_eq!(Next::Approved.status(), Status::Approved); - assert_eq!( - Next::Executed { result: json!({"ok": true}) }.status(), - Status::Executed - ); - assert_eq!( - Next::Failed { error: "EACCES".into() }.status(), - Status::Failed - ); - assert_eq!( - Next::Denied { denial: Denial::UserRejected }.status(), - Status::Denied - ); - assert_eq!(Next::TimedOut.status(), Status::TimedOut); + fn flip_does_not_touch_already_done_rows() { + let r = pending_record() + .in_flight(2_000) + .done(Outcome::Executed { result: json!({}) }); + assert!(r.flipped_to_timed_out_if_expired(70_000).is_none()); } } From 4f305f859800955a4c4a8c1a1f6bbd6e5e02e52f Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Sat, 16 May 2026 06:53:31 -0300 Subject: [PATCH 16/30] fix(approval-gate): keep lifecycle.rs as transitional compat shim Task 1 of the simplification plan deletes lifecycle.rs entirely, but the plan was a bit optimistic: dependent modules (intercept, resolve, delivery, sweeper) still import its helpers (build_pending_record, transition_record, maybe_flip_timed_out, is_terminal_status, collect_timed_out_for_sweep) and the crate fails to compile without them. Cargo can't run record.rs's unit tests if the library itself won't link. Restore lifecycle.rs as a transitional shim. Tasks 5/6/8/11 will progressively migrate each callsite to the new Record API; whichever of those tasks lands last deletes lifecycle.rs for real. --- approval-gate/src/lib.rs | 1 + approval-gate/src/lifecycle.rs | 140 +++++++++++++++++++++++++++++++++ 2 files changed, 141 insertions(+) create mode 100644 approval-gate/src/lifecycle.rs diff --git a/approval-gate/src/lib.rs b/approval-gate/src/lib.rs index 6fa4cf43..7091217e 100644 --- a/approval-gate/src/lib.rs +++ b/approval-gate/src/lib.rs @@ -5,6 +5,7 @@ pub mod config; pub mod delivery; pub mod intercept; +pub mod lifecycle; // transitional compat shim — deleted in T11 after T5/T6/T8 migrate callsites pub mod manifest; pub mod record; pub mod register; diff --git a/approval-gate/src/lifecycle.rs b/approval-gate/src/lifecycle.rs new file mode 100644 index 00000000..1b7c684c --- /dev/null +++ b/approval-gate/src/lifecycle.rs @@ -0,0 +1,140 @@ +//! Persisted-record lifecycle helpers. +//! +//! Pure functions that construct and transition the `Value`-blob record +//! schema as it lives in the iii state bus. No I/O, no async — the only +//! impurity is reading the system clock via [`transition_record`], whose +//! testable variant [`transition_record_with_now`] takes `now_ms` +//! directly. (Operators adopting the typed schema can read the same +//! shape via [`crate::record::Record`] / [`crate::record::Record::from_value`].) +//! +//! The wire keys (`status`, `function_call_id`, `expires_at`, +//! `resolved_at`, `result`, `error`, `denial`, `delivered_in_turn_id`) +//! are stable contract; renaming requires a state-store migration. The +//! `denial` field is documented at [`crate::wire::Denial`]. + +use serde_json::{json, Value}; + +use crate::wire::{pending_key, Denial}; + +/// True if `status` is one of the terminal states a stitched system message +/// should be built from. `pending` and `approved` are intermediate. +pub fn is_terminal_status(status: &str) -> bool { + matches!(status, "executed" | "failed" | "denied" | "timed_out") +} + +/// Build a fresh pending record. `session_id` is unset here — +/// `handle_intercept` stamps it before persisting. `expires_at` is +/// `now_ms + timeout_ms`, saturating on overflow so a buggy caller +/// can't underflow the deadline. +pub fn build_pending_record( + function_call_id: &str, + function_id: &str, + args: &Value, + now_ms: u64, + timeout_ms: u64, +) -> Value { + json!({ + "function_call_id": function_call_id, + "function_id": function_id, + "args": args, + "status": "pending", + "expires_at": now_ms.saturating_add(timeout_ms), + }) +} + +/// Build a new record by transitioning a pending base record to a terminal +/// status. All terminal fields (`result`, `error`, `denial`) are optional; +/// only the ones provided are attached. Existing fields on the base +/// (including `delivered_in_turn_id` and `resolved_at` if present) are +/// preserved. The first transition into a terminal status stamps +/// `resolved_at`. +pub fn transition_record( + base: &Value, + new_status: &str, + result: Option, + error: Option, + denial: Option, +) -> Value { + let now_ms = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_millis() as u64) + .unwrap_or(0); + transition_record_with_now(base, new_status, result, error, denial, now_ms) +} + +/// Testable variant of [`transition_record`] that takes `now_ms` directly. +pub fn transition_record_with_now( + base: &Value, + new_status: &str, + result: Option, + error: Option, + denial: Option, + now_ms: u64, +) -> Value { + let mut rec = base.clone(); + if let Some(obj) = rec.as_object_mut() { + obj.insert("status".into(), Value::String(new_status.to_string())); + if let Some(r) = result { + obj.insert("result".into(), r); + } + if let Some(e) = error { + obj.insert("error".into(), Value::String(e)); + } + if let Some(d) = denial { + obj.insert( + "denial".into(), + serde_json::to_value(&d).expect("Denial is always serializable"), + ); + } + if is_terminal_status(new_status) && !obj.contains_key("resolved_at") { + obj.insert("resolved_at".into(), Value::Number(now_ms.into())); + } + } + rec +} + +/// For a bag of pending records, return the subset that have expired at +/// `now_ms` along with the metadata needed to commit the flip and notify the +/// owning session. Records without a stamped `session_id` (legacy rows +/// written before that field existed) are skipped — they'll still be picked +/// up lazily by `handle_list_undelivered` on the next read. +pub fn collect_timed_out_for_sweep( + records: &[Value], + now_ms: u64, +) -> Vec<(String, Value, String, String)> { + records + .iter() + .filter_map(|rec| { + let flipped = maybe_flip_timed_out(rec, now_ms)?; + let session_id = flipped + .get("session_id") + .and_then(Value::as_str)? + .to_string(); + let function_call_id = flipped + .get("function_call_id") + .and_then(Value::as_str)? + .to_string(); + if session_id.is_empty() || function_call_id.is_empty() { + return None; + } + let key = pending_key(&session_id, &function_call_id); + Some((key, flipped, session_id, function_call_id)) + }) + .collect() +} + +/// Return Some(timed_out_record) if `rec` is pending and `now_ms` is past +/// `expires_at`; otherwise None. Pure function — does not write state. +pub fn maybe_flip_timed_out(rec: &Value, now_ms: u64) -> Option { + if rec.get("status").and_then(Value::as_str) != Some("pending") { + return None; + } + let exp = rec.get("expires_at").and_then(Value::as_u64)?; + if now_ms < exp { + return None; + } + // Timeout flip carries no Denial: the `timed_out` status itself is the + // explanation. Downstream renderers (turn-orchestrator stitching, UIs) + // branch on the status, not on a redundant reason string. + Some(transition_record(rec, "timed_out", None, None, None)) +} From 84b886b5057a582920b24fc0cb7043099909446f Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Sat, 16 May 2026 06:59:59 -0300 Subject: [PATCH 17/30] feat(approval-gate): Denial::Policy carries rule_permission + rule_pattern MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The classifier surface is being deleted in favor of the layered rules engine; rename the Policy variant's detail fields to match the actual data they'll carry once T5 lands. For now, classifier callsites (intercept.rs, register.rs) map their old reason/fn locals into the new field names — transient mapping, deleted in T5. Test callsites in tests/wire.rs and tests/lifecycle.rs updated to use representative rule_permission/rule_pattern values. Note: IncomingCall.approval_required + requires_approval() field/method are NOT removed in this commit (the plan's T2 calls for it but intercept.rs's decide_intercept_action still depends on them). They disappear in T5 when intercept.rs is rewritten end-to-end. --- approval-gate/src/intercept.rs | 16 ++++++++++------ approval-gate/src/register.rs | 6 ++---- approval-gate/src/wire.rs | 16 ++++++++++------ approval-gate/tests/lifecycle.rs | 6 +++--- approval-gate/tests/wire.rs | 12 ++++++------ 5 files changed, 31 insertions(+), 25 deletions(-) diff --git a/approval-gate/src/intercept.rs b/approval-gate/src/intercept.rs index 7cd58628..8ee4906d 100644 --- a/approval-gate/src/intercept.rs +++ b/approval-gate/src/intercept.rs @@ -123,9 +123,12 @@ pub(crate) fn interpret_classifier_reply( .and_then(Value::as_str) .unwrap_or("denied") .to_string(); + // Transient mapping: classifier reason/fn stored in the renamed + // Denial::Policy fields. The whole classifier surface is deleted + // in T5; for now this just keeps the build green. Ok(ClassifierDecision::Deny(Denial::Policy { - classifier_reason, - classifier_fn: classifier_fn.to_string(), + rule_permission: classifier_fn.to_string(), + rule_pattern: classifier_reason, })) } "ask" => Ok(ClassifierDecision::Ask), @@ -252,11 +255,12 @@ mod tests { "shell::classify_argv", ) { Ok(ClassifierDecision::Deny(Denial::Policy { - classifier_reason, - classifier_fn, + rule_permission, + rule_pattern, })) => { - assert_eq!(classifier_reason, "nope"); - assert_eq!(classifier_fn, "shell::classify_argv"); + // Per the transient mapping in interpret_classifier_reply. + assert_eq!(rule_pattern, "nope"); + assert_eq!(rule_permission, "shell::classify_argv"); } o => panic!("expected Policy denial {:?}", o), } diff --git a/approval-gate/src/register.rs b/approval-gate/src/register.rs index a6d9617c..e26b9d3a 100644 --- a/approval-gate/src/register.rs +++ b/approval-gate/src/register.rs @@ -370,10 +370,8 @@ pub fn register(iii: &III, cfg: &WorkerConfig) -> anyhow::Result { rule_pattern, } => { let denial = Denial::Policy { - classifier_reason: format!( - "rule {rule_permission} {rule_pattern} denies" - ), - classifier_fn: "approval-gate::rules".to_string(), + rule_permission, + rule_pattern, }; return Ok::<_, IIIError>(json!({ "block": true, diff --git a/approval-gate/src/wire.rs b/approval-gate/src/wire.rs index 7f540e5d..2aaa98b0 100644 --- a/approval-gate/src/wire.rs +++ b/approval-gate/src/wire.rs @@ -22,21 +22,25 @@ use serde::{Deserialize, Serialize}; use serde_json::{json, Value}; /// Structured deny payload carried on wire replies, persisted records, and -/// `approval_resolved` stream events. Replaces the legacy free-form -/// `decision_reason` / `reason` strings so consumers (turn-orchestrator -/// stitching, UIs, the LLM) can branch on `kind` instead of parsing prose. +/// `approval_resolved` stream events. Consumers (turn-orchestrator +/// stitching, UIs, the LLM) branch on `kind` instead of parsing prose. /// /// Wire shape (serde tag=kind, content=detail, snake_case): -/// `{ "kind": "policy", "detail": { "classifier_reason": "...", "classifier_fn": "..." } }` +/// `{ "kind": "policy", "detail": { "rule_permission": "...", "rule_pattern": "..." } }` /// `{ "kind": "user_rejected", "detail": null }` /// `{ "kind": "user_corrected", "detail": { "feedback": "..." } }` /// `{ "kind": "state_error", "detail": { "phase": "...", "error": "..." } }` +/// +/// `Policy` names the matching rule from the layered ruleset +/// (`approval-gate/src/rules.rs`). The old `classifier_reason` / +/// `classifier_fn` shape went away when the classifier surface was +/// deleted in favor of pure rules-based decisions. #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] #[serde(tag = "kind", content = "detail", rename_all = "snake_case")] pub enum Denial { Policy { - classifier_reason: String, - classifier_fn: String, + rule_permission: String, + rule_pattern: String, }, UserRejected, UserCorrected { diff --git a/approval-gate/tests/lifecycle.rs b/approval-gate/tests/lifecycle.rs index 0dcaff1a..24036d24 100644 --- a/approval-gate/tests/lifecycle.rs +++ b/approval-gate/tests/lifecycle.rs @@ -159,13 +159,13 @@ use std::sync::Mutex; None, None, Some(Denial::Policy { - classifier_reason: "not authorized".into(), - classifier_fn: "shell::classify_argv".into(), + rule_permission: "shell::fs::write".into(), + rule_pattern: "*".into(), }), ); assert_eq!(rec["status"], "denied"); assert_eq!(rec["denial"]["kind"], "policy"); - assert_eq!(rec["denial"]["detail"]["classifier_reason"], "not authorized"); + assert_eq!(rec["denial"]["detail"]["rule_permission"], "shell::fs::write"); assert!( rec.get("decision_reason").is_none(), "legacy decision_reason must not be written: {rec}" diff --git a/approval-gate/tests/wire.rs b/approval-gate/tests/wire.rs index 4cee9e04..dc1ab59d 100644 --- a/approval-gate/tests/wire.rs +++ b/approval-gate/tests/wire.rs @@ -88,18 +88,18 @@ use std::sync::Mutex; #[test] fn block_reply_for_policy_deny_carries_classifier_detail() { let reply = block_reply_for(&Decision::Deny(Denial::Policy { - classifier_reason: "command matches denylist".into(), - classifier_fn: "shell::classify_argv".into(), + rule_permission: "shell::exec".into(), + rule_pattern: "rm -rf*".into(), })); assert_eq!(reply["block"], true); assert_eq!(reply["denial"]["kind"], "policy"); assert_eq!( - reply["denial"]["detail"]["classifier_reason"], - "command matches denylist" + reply["denial"]["detail"]["rule_permission"], + "shell::exec" ); assert_eq!( - reply["denial"]["detail"]["classifier_fn"], - "shell::classify_argv" + reply["denial"]["detail"]["rule_pattern"], + "rm -rf*" ); } From 1cad8c1e9a9019301ca184da2fba0ad86f4e61a6 Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Sat, 16 May 2026 07:01:50 -0300 Subject: [PATCH 18/30] feat(approval-gate): pattern_for(function_id, args) extractor For shell::exec/shell::exec_bg, derive a pattern string from {command, args} so layered rules can match argv-level patterns like 'git status*'. Other function ids default to '*'. Malformed/missing args fall back to '*' (matches only wildcard rules). 8 new tests cover all branches: join+args, single-string command, empty args list, missing command, malformed args, non-shell function id, and the documented space-conflation case. --- approval-gate/src/rules.rs | 100 +++++++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) diff --git a/approval-gate/src/rules.rs b/approval-gate/src/rules.rs index eb167f63..0cdd7d7c 100644 --- a/approval-gate/src/rules.rs +++ b/approval-gate/src/rules.rs @@ -29,6 +29,7 @@ //! programming so `"a*b*c"` matches `"axxxbxxxc"` correctly. use serde::{Deserialize, Serialize}; +use serde_json::Value; /// Decision a [`Rule`] expresses when it matches an incoming call. /// @@ -119,6 +120,44 @@ where .last() } +/// Per-function pattern extractor. The pattern is the second axis a rule +/// matches on (alongside `function_id`); for `shell::exec` we derive it +/// from `{command, args}` so operators can write rules like +/// `permission: "shell::exec", pattern: "git status*"` and get +/// argv-level granularity. Other function ids default to `"*"`, which +/// matches only wildcard rules. +pub fn pattern_for(function_id: &str, args: &Value) -> String { + match function_id { + "shell::exec" | "shell::exec_bg" => extract_shell_pattern(args), + _ => "*".to_string(), + } +} + +/// Shell ExecRequest is `{ command: String, args: Option> }` +/// per `shell/src/functions/types.rs`. There is no `argv` field. Two +/// modes: +/// - `args = None` → `command` is a shell-words string, use as-is. +/// - `args = Some(list)` → join `command + " " + list.join(" ")`. +/// Malformed input (missing/non-string command) falls back to `"*"` so +/// the row matches only wildcard rules. +/// +/// Known conflation: argv `[git, log, "--grep=foo bar"]` joins to +/// `"git log --grep=foo bar"`, same pattern string as +/// `[git, log, "--grep=foo", bar]`. Documented; acceptable for v1. +fn extract_shell_pattern(args: &Value) -> String { + let cmd = args.get("command").and_then(Value::as_str); + let argv = args.get("args").and_then(Value::as_array); + match (cmd, argv) { + (Some(c), Some(arr)) if !arr.is_empty() => { + let mut parts = vec![c.to_string()]; + parts.extend(arr.iter().filter_map(Value::as_str).map(str::to_string)); + parts.join(" ") + } + (Some(c), _) => c.to_string(), + _ => "*".to_string(), + } +} + #[cfg(test)] mod tests { use super::*; @@ -268,4 +307,65 @@ mod tests { assert_eq!(back, a); } } + + // -------------------- pattern_for / extract_shell_pattern -------------------- + + use serde_json::json; + + #[test] + fn pattern_for_shell_exec_joins_command_with_args() { + let pat = pattern_for("shell::exec", &json!({"command": "git", "args": ["status"]})); + assert_eq!(pat, "git status"); + } + + #[test] + fn pattern_for_shell_exec_bg_joins_command_with_args() { + let pat = pattern_for("shell::exec_bg", + &json!({"command": "tail", "args": ["-f", "/var/log/x"]})); + assert_eq!(pat, "tail -f /var/log/x"); + } + + #[test] + fn pattern_for_shell_exec_single_string_command_no_args() { + // shell::exec supports the "command is a shell-words string" mode + // (args: None). The pattern is just the command string. + let pat = pattern_for("shell::exec", &json!({"command": "git status"})); + assert_eq!(pat, "git status"); + } + + #[test] + fn pattern_for_shell_exec_empty_args_list_treated_as_no_args() { + let pat = pattern_for("shell::exec", &json!({"command": "ls", "args": []})); + assert_eq!(pat, "ls"); + } + + #[test] + fn pattern_for_shell_exec_missing_command_falls_back_to_star() { + let pat = pattern_for("shell::exec", &json!({"args": ["foo"]})); + assert_eq!(pat, "*"); + } + + #[test] + fn pattern_for_shell_exec_completely_malformed_args_falls_back_to_star() { + let pat = pattern_for("shell::exec", &json!(null)); + assert_eq!(pat, "*"); + } + + #[test] + fn pattern_for_non_shell_function_id_returns_star() { + let pat = pattern_for("http::fetch", &json!({"url": "https://x"})); + assert_eq!(pat, "*"); + } + + #[test] + fn pattern_for_known_conflation_documented() { + // Documented in spec: an arg containing a space conflates with two + // separate args. This is acceptable for v1. + let with_inner_space = pattern_for("shell::exec", + &json!({"command": "git", "args": ["log", "--grep=foo bar"]})); + let split_args = pattern_for("shell::exec", + &json!({"command": "git", "args": ["log", "--grep=foo", "bar"]})); + assert_eq!(with_inner_space, split_args, + "v1 conflates space-in-arg with arg boundary; see spec"); + } } From 06401ba87a5ff8d8b7616ae6b9890b0e4d5cac2d Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Sat, 16 May 2026 07:05:16 -0300 Subject: [PATCH 19/30] feat(approval-gate): StateBus::delete primitive MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Required by the new approval::consume RPC (T8), which returns Done rows and deletes them in the same call. IiiStateBus impl maps to the existing state::delete RPC (used elsewhere in the codebase, e.g. auth-credentials). Test fakes (InMemoryStateBus, FailingStateBus) gain matching impls — InMemory removes the key idempotently, Failing errors to match its other primitives. --- approval-gate/src/state.rs | 15 +++++++++++++++ approval-gate/tests/common/mod.rs | 10 ++++++++++ 2 files changed, 25 insertions(+) diff --git a/approval-gate/src/state.rs b/approval-gate/src/state.rs index 5eb960e3..0e21e381 100644 --- a/approval-gate/src/state.rs +++ b/approval-gate/src/state.rs @@ -68,6 +68,10 @@ pub trait StateBus: Send + Sync { async fn set(&self, scope: &str, key: &str, value: Value) -> Result<(), IIIError>; async fn get(&self, scope: &str, key: &str) -> Option; async fn list_prefix(&self, scope: &str, prefix: &str) -> Vec; + /// Remove a key. Required by `approval::consume`, which returns Done + /// rows and deletes them in the same call. Idempotent (deleting a + /// missing key returns Ok). + async fn delete(&self, scope: &str, key: &str) -> Result<(), IIIError>; } /// Invokes an iii function with arguments and returns its result or an @@ -164,6 +168,17 @@ impl StateBus for IiiStateBus { .map(|entry| entry.get("value").cloned().unwrap_or(entry)) .collect() } + async fn delete(&self, scope: &str, key: &str) -> Result<(), IIIError> { + self.0 + .trigger(TriggerRequest { + function_id: "state::delete".into(), + payload: json!({ "scope": scope, "key": key }), + action: None, + timeout_ms: None, + }) + .await + .map(|_| ()) + } } /// Return the list of function ids whose interceptor asks the gate to diff --git a/approval-gate/tests/common/mod.rs b/approval-gate/tests/common/mod.rs index e72961fe..67d73db7 100644 --- a/approval-gate/tests/common/mod.rs +++ b/approval-gate/tests/common/mod.rs @@ -92,6 +92,13 @@ impl StateBus for InMemoryStateBus { .map(|(_, v)| v.clone()) .collect() } + async fn delete(&self, scope: &str, key: &str) -> Result<(), iii_sdk::IIIError> { + self.store + .lock() + .unwrap() + .remove(&format!("{scope}/{key}")); + Ok(()) + } } /// `StateBus` whose `set` always errors. Used to exercise the gate's @@ -114,6 +121,9 @@ impl StateBus for FailingStateBus { async fn list_prefix(&self, _scope: &str, _prefix: &str) -> Vec { Vec::new() } + async fn delete(&self, _scope: &str, _key: &str) -> Result<(), iii_sdk::IIIError> { + Err(iii_sdk::IIIError::Runtime("kv unreachable".into())) + } } /// A canonical `shell::fs::write` call gated by the run's From f5b945c60a1785db1d5018f4b3e7fca4f96c8aca Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Sat, 16 May 2026 07:10:41 -0300 Subject: [PATCH 20/30] feat(approval-gate): verdict-driven handle_intercept MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The classifier surface and per-function InterceptorRule decision flow are gone. handle_intercept now reads the verdict via the new lib-level verdict_for(function_id, args, ruleset) helper: Verdict::Allow → {block:false}, no state write Verdict::Deny → {block:true, denial:Policy{rule_permission, rule_pattern}} Verdict::Ask → write Pending row + reply {block:true, status:pending} Replay defense extended for the new 3-state schema: Pending → replay:in_flight, status:pending InFlight → replay:in_flight, status:in_flight (new) Done → replay:already_resolved, status:done intercept.rs drops InterceptAction, decide_intercept_action, ClassifierDecision, interpret_classifier_reply, PolicyOutcome, apply_policy_rules. register.rs's subscriber closure collapses the old policy_outcome+decide_intercept+classifier dance into one handle_intercept call. resolve.rs's cascade loop reuses verdict_for instead of the deleted apply_policy_rules. State-write failure still fails closed with Denial::StateError. 7 new tests cover all four verdict branches + the three replay states + fail-closed behavior. Note: InterceptorRule config struct and intercept_rules variable are still in place for T12 to fully remove. --- approval-gate/src/intercept.rs | 548 ++++++++++++--------------------- approval-gate/src/lib.rs | 47 ++- approval-gate/src/register.rs | 100 ++---- approval-gate/src/resolve.rs | 10 +- 4 files changed, 262 insertions(+), 443 deletions(-) diff --git a/approval-gate/src/intercept.rs b/approval-gate/src/intercept.rs index 8ee4906d..5e5d0a24 100644 --- a/approval-gate/src/intercept.rs +++ b/approval-gate/src/intercept.rs @@ -1,240 +1,114 @@ //! Intercept decision flow. //! -//! Pure decision helpers + the async [`handle_intercept`] that writes -//! the pending record. Together they answer the question every hook -//! event triggers: "what should the gate do with this function call?" +//! One async entry point: [`handle_intercept`]. The classifier surface and +//! per-function `InterceptorRule` flow are gone — the layered rules engine +//! (`crate::rules`) is the only policy decision. `handle_intercept` reads +//! the verdict via [`crate::verdict_for`] and writes a `Pending` row when +//! the verdict is `Ask`. Allow/Deny verdicts return synchronous replies +//! and never touch state. //! -//! Three layers run, in order: +//! Replay defense recognises all three persisted states: +//! - `Pending` → reply `{replay:"in_flight", status:"pending"}` +//! - `InFlight` → reply `{replay:"in_flight", status:"in_flight"}` +//! - `Done` → reply `{replay:"already_resolved", status:"done"}` //! -//! 1. **Policy rules** ([`apply_policy_rules`]) — operator-configured -//! layered ruleset. `Allow` and `Deny` short-circuit; `Ask` (and -//! no-match) falls through. -//! 2. **Interceptor rule** ([`decide_intercept_action`]) — per-function -//! config. Decides between `Pass`, `Pause` (no classifier), and -//! `Classify { classifier_fn, … }`. -//! 3. **Classifier reply** ([`interpret_classifier_reply`]) — parses the -//! classifier function's JSON response and maps it back to either an -//! immediate `Auto` (pass), an immediate `Deny`, or `Ask` (fall back -//! to user prompt via `handle_intercept`). -//! -//! This module owns only the decision types and `handle_intercept`. The -//! wiring (closure body in `register`) lives in `register.rs`. +//! None of these overwrite the existing row. use serde_json::{json, Value}; -use crate::config::InterceptorRule; -use crate::lifecycle::{build_pending_record, is_terminal_status}; -use crate::rules; +use crate::record::{Record, Status}; +use crate::rules::Ruleset; use crate::state::StateBus; use crate::wire::{pending_key, Denial, IncomingCall}; -/// What the subscriber should do with an incoming call. Decided by the -/// matching interceptor rule (authoritative) with a fallback to the run's -/// `approval_required` list when no rule exists. -#[derive(Debug, Clone, PartialEq, Eq)] -pub(crate) enum InterceptAction { - /// No rule, no `approval_required` listing — let the call through. - Pass, - /// Pause and create a pending record; no classifier consulted. - Pause, - /// Run the classifier first; on `ask`, pause; on `auto`, pass; on `deny`, block. - Classify { - classifier_fn: String, - classifier_timeout_ms: u64, - }, -} - -/// Pure decision: given a matching rule (or none) and whether the run -/// explicitly listed this function id in `approval_required`, what should -/// the subscriber do? Interceptor rules are authoritative — an operator -/// who registered a rule meant for every call to go through it, regardless -/// of per-run opt-in. -pub(crate) fn decide_intercept_action( - rule: Option<&InterceptorRule>, - requires_approval: bool, -) -> InterceptAction { - match rule { - Some(r) if r.classifier.as_ref().is_some_and(|s| !s.is_empty()) => { - InterceptAction::Classify { - classifier_fn: r.classifier.clone().unwrap(), - classifier_timeout_ms: r.classifier_timeout_ms, - } - } - Some(_) => InterceptAction::Pause, - None if requires_approval => InterceptAction::Pause, - None => InterceptAction::Pass, - } -} - -/// Outcome of the policy-rules pre-check that runs before the per-function -/// [`InterceptorRule`] flow. `Allow` and `Deny` short-circuit the -/// subscriber with a final reply; `FallThrough` defers to the existing -/// interceptor logic (classifier or pause). -#[derive(Debug, Clone, PartialEq, Eq)] -pub(crate) enum PolicyOutcome { - Allow, - Deny { - rule_permission: String, - rule_pattern: String, - }, - FallThrough, -} - -/// Apply the layered policy rules to an incoming function id. Pure -/// function — no I/O, no clock. Extracted from the subscriber closure -/// so the decision branch can be unit-tested independently. -pub(crate) fn apply_policy_rules( - rules: &rules::Ruleset, - function_id: &str, -) -> PolicyOutcome { - match rules::evaluate(function_id, "*", rules) { - Some(rule) => match rule.action { - rules::Action::Allow => PolicyOutcome::Allow, - rules::Action::Deny => PolicyOutcome::Deny { - rule_permission: rule.permission.clone(), - rule_pattern: rule.pattern.clone(), - }, - rules::Action::Ask => PolicyOutcome::FallThrough, - }, - None => PolicyOutcome::FallThrough, - } -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub(crate) enum ClassifierDecision { - Auto, - Deny(Denial), - Ask, -} - -/// Parse classifier JSON (`decision` tag: auto | deny | ask). On `deny` -/// the reply may carry `reason` (free-form classifier text); both that -/// and the calling `classifier_fn` get folded into a [`Denial::Policy`]. -pub(crate) fn interpret_classifier_reply( - value: &Value, - classifier_fn: &str, -) -> Result { - let tag = value.get("decision").and_then(Value::as_str).ok_or(())?; - match tag { - "auto" => Ok(ClassifierDecision::Auto), - "deny" => { - let classifier_reason = value - .get("reason") - .and_then(Value::as_str) - .unwrap_or("denied") - .to_string(); - // Transient mapping: classifier reason/fn stored in the renamed - // Denial::Policy fields. The whole classifier surface is deleted - // in T5; for now this just keeps the build green. - Ok(ClassifierDecision::Deny(Denial::Policy { - rule_permission: classifier_fn.to_string(), - rule_pattern: classifier_reason, - })) - } - "ask" => Ok(ClassifierDecision::Ask), - _ => Err(()), - } -} - -/// Decide whether a call is gated; if so, write a pending record and return -/// the structured pending hook reply. If not gated, return `{block: false}` -/// and do nothing. -/// -/// Stamps `session_id` onto the persisted record so the timeout sweeper can -/// emit `approval_resolved` to the right session stream without consulting -/// the storage layer's keys. -/// -/// State-write failure is treated as fail-closed: the gate replies -/// `{block:true, status:"denied"}` so a transient kv outage cannot silently +/// Subscriber-side entry point. Decides via `verdict_for` (rules layer); +/// on Ask, persists a Pending record. State-write failure fails closed +/// with `Denial::StateError` so a transient kv outage cannot silently /// bypass an approval check. pub async fn handle_intercept( bus: &dyn StateBus, state_scope: &str, call: &IncomingCall, + rules: &Ruleset, now_ms: u64, timeout_ms: u64, - force_pending: bool, ) -> Value { - if !force_pending && !call.requires_approval() { - return json!({ "block": false }); - } - - // Defense in depth: if a record for this (session, call_id) already - // exists, don't blow it away. Re-intercept of an already-decided call - // would otherwise revert a terminal record back to `pending`, losing - // the audit trail and any `delivered_in_turn_id` stamp. - let key = pending_key(&call.session_id, &call.function_call_id); - if let Some(existing) = bus.get(state_scope, &key).await { - let status = existing - .get("status") - .and_then(Value::as_str) - .unwrap_or("") - .to_string(); - if is_terminal_status(&status) { - // Replay of an already-resolved call: the prior status carries - // the meaning. No fresh Denial is synthesized — consumers that - // need to render the historical decision read the persisted - // record via approval::lookup_record. + // 1. Rules pre-check. + match crate::verdict_for(&call.function_id, &call.args, rules) { + crate::Verdict::Allow => return json!({ "block": false }), + crate::Verdict::Deny(denial) => { return json!({ "block": true, - "status": status, - "replay": "already_resolved", + "status": "denied", + "denial": denial, "call_id": call.function_call_id, "function_id": call.function_id, }); } - if status == "pending" || status == "approved" { - // Replay of an in-flight intercept — keep the existing row, - // re-emit the pending reply. No state churn. - return json!({ - "block": true, - "status": "pending", - "replay": "in_flight", - "call_id": call.function_call_id, - "function_id": call.function_id, - }); + crate::Verdict::Ask => { /* fall through */ } + } + + // 2. Replay defense — never overwrite an existing row. + let key = pending_key(&call.session_id, &call.function_call_id); + if let Some(existing_raw) = bus.get(state_scope, &key).await { + if let Some(existing) = Record::from_value(existing_raw) { + return match existing.status { + Status::Done => json!({ + "block": true, + "status": "done", + "replay": "already_resolved", + "call_id": call.function_call_id, + "function_id": call.function_id, + }), + Status::Pending => json!({ + "block": true, + "status": "pending", + "replay": "in_flight", + "call_id": call.function_call_id, + "function_id": call.function_id, + }), + Status::InFlight => json!({ + "block": true, + "status": "in_flight", + "replay": "in_flight", + "call_id": call.function_call_id, + "function_id": call.function_id, + }), + }; } + // Malformed row → fall through and overwrite (defensive). } - let mut record = build_pending_record( - &call.function_call_id, - &call.function_id, - &call.args, + // 3. Fresh Pending write. + let record = Record::pending( + call.function_call_id.clone(), + call.function_id.clone(), + call.args.clone(), + call.session_id.clone(), now_ms, timeout_ms, ); - if let Some(obj) = record.as_object_mut() { - obj.insert("session_id".into(), Value::String(call.session_id.clone())); - } - if let Err(err) = bus - .set( - state_scope, - &pending_key(&call.session_id, &call.function_call_id), - record, - ) - .await - { + if let Err(err) = bus.set(state_scope, &key, record.to_value()).await { tracing::error!( "approval-gate: failed to write pending record for {}/{}: {err} — failing closed", - call.session_id, - call.function_call_id + call.session_id, call.function_call_id, ); let denial = Denial::StateError { phase: "intercept_write_pending".to_string(), error: err.to_string(), }; return json!({ - "block": true, - "denial": denial, - "status": "denied", - "call_id": call.function_call_id, + "block": true, + "denial": denial, + "status": "denied", + "call_id": call.function_call_id, "function_id": call.function_id, }); } json!({ - "block": true, - "status": "pending", - "call_id": call.function_call_id, + "block": true, + "status": "pending", + "call_id": call.function_call_id, "function_id": call.function_id, }) } @@ -242,178 +116,158 @@ pub async fn handle_intercept( #[cfg(test)] mod tests { use super::*; + use crate::record::{Outcome, Record}; + use crate::rules::{Action, Rule}; use serde_json::json; + use std::sync::Mutex; - #[test] - fn interpret_classifier_reply_reads_decision_tags() { - assert!(matches!( - interpret_classifier_reply(&json!({"decision": "auto"}), "shell::classify_argv"), - Ok(ClassifierDecision::Auto) - )); - match interpret_classifier_reply( - &json!({"decision":"deny","reason":"nope"}), - "shell::classify_argv", - ) { - Ok(ClassifierDecision::Deny(Denial::Policy { - rule_permission, - rule_pattern, - })) => { - // Per the transient mapping in interpret_classifier_reply. - assert_eq!(rule_pattern, "nope"); - assert_eq!(rule_permission, "shell::classify_argv"); - } - o => panic!("expected Policy denial {:?}", o), + #[derive(Default)] + struct InMemBus { + rows: Mutex>, + } + #[async_trait::async_trait] + impl StateBus for InMemBus { + async fn set(&self, scope: &str, key: &str, value: Value) -> Result<(), iii_sdk::IIIError> { + self.rows.lock().unwrap().insert((scope.into(), key.into()), value); + Ok(()) + } + async fn get(&self, scope: &str, key: &str) -> Option { + self.rows.lock().unwrap().get(&(scope.into(), key.into())).cloned() + } + async fn list_prefix(&self, scope: &str, prefix: &str) -> Vec { + self.rows.lock().unwrap() + .iter() + .filter(|((s, k), _)| s == scope && k.starts_with(prefix)) + .map(|(_, v)| v.clone()) + .collect() + } + async fn delete(&self, scope: &str, key: &str) -> Result<(), iii_sdk::IIIError> { + self.rows.lock().unwrap().remove(&(scope.into(), key.into())); + Ok(()) } - assert!(matches!( - interpret_classifier_reply( - &json!({"decision":"ask","summary":"x"}), - "shell::classify_argv" - ), - Ok(ClassifierDecision::Ask) - )); - assert!(interpret_classifier_reply(&json!({}), "shell::classify_argv").is_err()); } - /// An operator-registered rule is authoritative: every call to that - /// function id runs through the classifier, even when the run's - /// `approval_required` list is empty. - #[test] - fn decide_intercept_action_classifies_when_rule_has_classifier_regardless_of_approval_required( - ) { - let rule = InterceptorRule { - function_id: "shell::exec".into(), - classifier: Some("shell::classify_argv".into()), - classifier_timeout_ms: 2000, - inject_approval_marker: true, - marker_target_verified: true, - }; - let action = decide_intercept_action(Some(&rule), false); - assert_eq!( - action, - InterceptAction::Classify { - classifier_fn: "shell::classify_argv".into(), - classifier_timeout_ms: 2000, - } - ); - assert_eq!(action, decide_intercept_action(Some(&rule), true)); + fn call(fc_id: &str, fn_id: &str, args: Value) -> IncomingCall { + IncomingCall { + session_id: "sess_a".into(), + function_call_id: fc_id.into(), + function_id: fn_id.into(), + args, + approval_required: Vec::new(), + event_id: "evt-1".into(), + reply_stream: "hk-1".into(), + } } - #[test] - fn decide_intercept_action_pauses_when_rule_has_no_classifier_regardless_of_approval_required() - { - let rule = InterceptorRule { - function_id: "shell::fs::write".into(), - classifier: None, - classifier_timeout_ms: 2000, - inject_approval_marker: false, - marker_target_verified: false, - }; - assert_eq!( - decide_intercept_action(Some(&rule), false), - InterceptAction::Pause - ); - assert_eq!( - decide_intercept_action(Some(&rule), true), - InterceptAction::Pause - ); + #[tokio::test] + async fn allow_rule_returns_block_false_no_state_write() { + let bus = InMemBus::default(); + let rs: Ruleset = vec![Rule { + permission: "shell::exec".into(), + pattern: "git status*".into(), + action: Action::Allow, + }]; + let c = call("tc-1", "shell::exec", json!({"command": "git", "args": ["status"]})); + let reply = handle_intercept(&bus, "approvals", &c, &rs, 1_000, 60_000).await; + assert_eq!(reply["block"], false); + assert!(bus.list_prefix("approvals", "sess_a/").await.is_empty()); } - #[test] - fn decide_intercept_action_pauses_when_no_rule_but_run_listed_approval_required() { - assert_eq!(decide_intercept_action(None, true), InterceptAction::Pause); + #[tokio::test] + async fn deny_rule_returns_block_true_structured_policy_denial() { + let bus = InMemBus::default(); + let rs: Ruleset = vec![Rule { + permission: "shell::exec".into(), + pattern: "rm -rf*".into(), + action: Action::Deny, + }]; + let c = call("tc-1", "shell::exec", json!({"command": "rm", "args": ["-rf", "/"]})); + let reply = handle_intercept(&bus, "approvals", &c, &rs, 1_000, 60_000).await; + assert_eq!(reply["block"], true); + assert_eq!(reply["denial"]["kind"], "policy"); + assert_eq!(reply["denial"]["detail"]["rule_permission"], "shell::exec"); + assert_eq!(reply["denial"]["detail"]["rule_pattern"], "rm -rf*"); + assert!(bus.list_prefix("approvals", "sess_a/").await.is_empty()); } - #[test] - fn decide_intercept_action_passes_when_no_rule_and_not_approval_required() { - assert_eq!(decide_intercept_action(None, false), InterceptAction::Pass); + #[tokio::test] + async fn no_match_defaults_to_ask_writes_pending() { + let bus = InMemBus::default(); + let rs: Ruleset = vec![]; + let c = call("tc-1", "shell::exec", json!({"command": "git", "args": ["push"]})); + let reply = handle_intercept(&bus, "approvals", &c, &rs, 1_000, 60_000).await; + assert_eq!(reply["block"], true); + assert_eq!(reply["status"], "pending"); + let stored = bus.get("approvals", "sess_a/tc-1").await.expect("pending row"); + let r = Record::from_value(stored).unwrap(); + assert_eq!(r.status, Status::Pending); } - #[test] - fn decide_intercept_action_classifier_empty_string_treated_as_no_classifier() { - let rule = InterceptorRule { - function_id: "shell::exec".into(), - classifier: Some(String::new()), - classifier_timeout_ms: 2000, - inject_approval_marker: false, - marker_target_verified: false, - }; - assert_eq!( - decide_intercept_action(Some(&rule), false), - InterceptAction::Pause - ); - } + #[tokio::test] + async fn replay_on_pending_returns_in_flight_no_state_churn() { + let bus = InMemBus::default(); + let rs: Ruleset = vec![]; + let c = call("tc-1", "shell::exec", json!({"command": "ls"})); - #[test] - fn apply_policy_rules_empty_ruleset_falls_through() { - let rs: rules::Ruleset = vec![]; - assert_eq!( - apply_policy_rules(&rs, "shell::exec"), - PolicyOutcome::FallThrough - ); - } + handle_intercept(&bus, "approvals", &c, &rs, 1_000, 60_000).await; + let r2 = handle_intercept(&bus, "approvals", &c, &rs, 2_000, 60_000).await; + assert_eq!(r2["replay"], "in_flight"); + assert_eq!(r2["status"], "pending"); - #[test] - fn apply_policy_rules_allow_short_circuits() { - let rs: rules::Ruleset = vec![rules::Rule { - permission: "shell::exec".into(), - pattern: "*".into(), - action: rules::Action::Allow, - }]; - assert_eq!(apply_policy_rules(&rs, "shell::exec"), PolicyOutcome::Allow); + let r = Record::from_value(bus.get("approvals", "sess_a/tc-1").await.unwrap()).unwrap(); + assert_eq!(r.expires_at, 61_000, "second call must NOT have re-written expires_at"); } - #[test] - fn apply_policy_rules_deny_carries_matched_rule_identity() { - let rs: rules::Ruleset = vec![rules::Rule { - permission: "shell::*".into(), - pattern: "*".into(), - action: rules::Action::Deny, - }]; - assert_eq!( - apply_policy_rules(&rs, "shell::fs::write"), - PolicyOutcome::Deny { - rule_permission: "shell::*".into(), - rule_pattern: "*".into(), - } - ); + #[tokio::test] + async fn replay_on_in_flight_returns_in_flight_marker() { + let bus = InMemBus::default(); + let in_flight = Record::pending( + "tc-1".into(), "shell::exec".into(), json!({}), + "sess_a".into(), 0, 60_000, + ).in_flight(500); + bus.set("approvals", "sess_a/tc-1", in_flight.to_value()).await.unwrap(); + + let rs: Ruleset = vec![]; + let c = call("tc-1", "shell::exec", json!({})); + let reply = handle_intercept(&bus, "approvals", &c, &rs, 1_000, 60_000).await; + assert_eq!(reply["replay"], "in_flight"); + assert_eq!(reply["status"], "in_flight"); } - #[test] - fn apply_policy_rules_ask_falls_through_to_interceptor_flow() { - // Ask means "no decision from this layer — let the next handle it". - let rs: rules::Ruleset = vec![rules::Rule { - permission: "shell::exec".into(), - pattern: "*".into(), - action: rules::Action::Ask, - }]; - assert_eq!( - apply_policy_rules(&rs, "shell::exec"), - PolicyOutcome::FallThrough - ); + #[tokio::test] + async fn replay_on_done_returns_already_resolved_marker() { + let bus = InMemBus::default(); + let done = Record::pending( + "tc-1".into(), "shell::exec".into(), json!({}), + "sess_a".into(), 0, 60_000, + ).in_flight(500).done(Outcome::Executed { result: json!({"ok": true}) }); + bus.set("approvals", "sess_a/tc-1", done.to_value()).await.unwrap(); + + let rs: Ruleset = vec![]; + let c = call("tc-1", "shell::exec", json!({})); + let reply = handle_intercept(&bus, "approvals", &c, &rs, 1_000, 60_000).await; + assert_eq!(reply["replay"], "already_resolved"); + assert_eq!(reply["status"], "done"); } - #[test] - fn apply_policy_rules_last_matching_wins() { - // Later-listed more-specific rule overrides earlier permissive default. - let rs: rules::Ruleset = vec![ - rules::Rule { - permission: "*".into(), - pattern: "*".into(), - action: rules::Action::Allow, - }, - rules::Rule { - permission: "shell::exec".into(), - pattern: "*".into(), - action: rules::Action::Deny, - }, - ]; - assert!(matches!( - apply_policy_rules(&rs, "shell::exec"), - PolicyOutcome::Deny { .. } - )); - assert_eq!( - apply_policy_rules(&rs, "approval::resolve"), - PolicyOutcome::Allow - ); + #[tokio::test] + async fn state_write_failure_fails_closed_with_state_error_denial() { + struct FailBus; + #[async_trait::async_trait] + impl StateBus for FailBus { + async fn set(&self, _: &str, _: &str, _: Value) -> Result<(), iii_sdk::IIIError> { + Err(iii_sdk::IIIError::Runtime("kv down".into())) + } + async fn get(&self, _: &str, _: &str) -> Option { None } + async fn list_prefix(&self, _: &str, _: &str) -> Vec { Vec::new() } + async fn delete(&self, _: &str, _: &str) -> Result<(), iii_sdk::IIIError> { Ok(()) } + } + let rs: Ruleset = vec![]; + let c = call("tc-1", "shell::exec", json!({})); + let reply = handle_intercept(&FailBus, "approvals", &c, &rs, 1_000, 60_000).await; + assert_eq!(reply["block"], true); + assert_eq!(reply["status"], "denied"); + assert_eq!(reply["denial"]["kind"], "state_error"); + assert_eq!(reply["denial"]["detail"]["phase"], "intercept_write_pending"); } } diff --git a/approval-gate/src/lib.rs b/approval-gate/src/lib.rs index 7091217e..27dec256 100644 --- a/approval-gate/src/lib.rs +++ b/approval-gate/src/lib.rs @@ -1,6 +1,8 @@ -//! Approval gate. Subscribes to `agent::before_function_call` and blocks calls -//! whose `function_call.function_id` appears in the run's `approval_required` list, -//! waiting for the UI to call `approval::resolve` (or for a timeout). +//! Approval gate. Subscribes to `agent::before_function_call` and decides +//! every call via the layered rules engine (`rules::evaluate`). Allow → +//! `{block:false}`. Deny → `{block:true, denial:Policy{rule_permission, +//! rule_pattern}}`. Ask → write a Pending record and wait for +//! `approval::resolve`. pub mod config; pub mod delivery; @@ -34,13 +36,38 @@ pub use wire::{ block_reply_for, extract_call, pending_key, Decision, Denial, IncomingCall, WireDecision, }; -// Test-only re-imports so the inline `mod tests` below keeps working -// without an unreasonable churn pass over its assertions. -#[cfg(test)] -use intercept::{ - apply_policy_rules, decide_intercept_action, interpret_classifier_reply, ClassifierDecision, - InterceptAction, PolicyOutcome, -}; +/// Subscriber's terminal verdict for an incoming call. +#[derive(Debug, Clone, PartialEq, Eq)] +pub(crate) enum Verdict { + Allow, + Deny(Denial), + Ask, +} + +/// Apply the layered rules to an incoming call. Last-matching rule wins; +/// no match defaults to Ask (operator-safe default — paired with the +/// curated default ruleset shipped in `iii.worker.yaml`). +pub(crate) fn verdict_for( + function_id: &str, + args: &serde_json::Value, + rules: &rules::Ruleset, +) -> Verdict { + let pattern = rules::pattern_for(function_id, args); + match rules::evaluate(function_id, &pattern, rules) { + Some(r) => match r.action { + rules::Action::Allow => Verdict::Allow, + rules::Action::Deny => Verdict::Deny(Denial::Policy { + rule_permission: r.permission.clone(), + rule_pattern: r.pattern.clone(), + }), + rules::Action::Ask => Verdict::Ask, + }, + None => Verdict::Ask, + } +} + +// Test-only re-imports kept as small as possible. Helpers below this line +// will be deleted as their owning modules are rewritten in later tasks. #[cfg(test)] use state::{merge_from_approval_marker_if_needed, rule_for}; #[cfg(test)] diff --git a/approval-gate/src/register.rs b/approval-gate/src/register.rs index e26b9d3a..9adc88c8 100644 --- a/approval-gate/src/register.rs +++ b/approval-gate/src/register.rs @@ -24,10 +24,7 @@ use crate::delivery::{ handle_ack_delivered, handle_consume_undelivered, handle_flush_delivered, handle_list_pending, handle_list_undelivered, handle_sweep_session, }; -use crate::intercept::{ - apply_policy_rules, decide_intercept_action, handle_intercept, interpret_classifier_reply, - ClassifierDecision, InterceptAction, PolicyOutcome, -}; +use crate::intercept::handle_intercept; use crate::resolve::{handle_lookup_record, handle_resolve}; use crate::rules; use crate::state::{ @@ -349,89 +346,28 @@ pub fn register(iii: &III, cfg: &WorkerConfig) -> anyhow::Result { .map(|d| d.as_millis() as u64) .unwrap_or(0); - // Layered policy rules run first. Allow / Deny short-circuit; - // Ask (and no-match) falls through to the existing per-function - // interceptor flow. Pattern is "*" in v1 — see `crate::rules`. - // Read-lock is acquired and dropped inside a block so the - // guard never crosses an `.await` (std::sync::RwLock is not - // async-safe to hold across suspension points). - let policy_outcome = { + // Take a snapshot of the rules ruleset under the read lock, + // then drop the guard before any .await. std::sync::RwLock + // is not async-safe to hold across suspension points, and + // a held guard would block every concurrent intercept. + let rules_snapshot: rules::Ruleset = { let guard = policy_rules .read() .expect("approval-gate policy rules lock poisoned"); - apply_policy_rules(&guard, &call.function_id) + guard.clone() }; - match policy_outcome { - PolicyOutcome::Allow => { - return Ok::<_, IIIError>(json!({ "block": false })); - } - PolicyOutcome::Deny { - rule_permission, - rule_pattern, - } => { - let denial = Denial::Policy { - rule_permission, - rule_pattern, - }; - return Ok::<_, IIIError>(json!({ - "block": true, - "denial": denial, - "status": "denied", - "call_id": call.function_call_id, - "function_id": call.function_id, - })); - } - PolicyOutcome::FallThrough => {} - } - let action = decide_intercept_action( - rule_for(intercept_rules.as_slice(), &call.function_id), - call.requires_approval(), - ); - let reply = match action { - InterceptAction::Pass => json!({ "block": false }), - InterceptAction::Pause => { - handle_intercept(bus.as_ref(), &sc, &call, now_ms, timeout_ms, false).await - } - InterceptAction::Classify { - classifier_fn, - classifier_timeout_ms, - } => match iii - .trigger(TriggerRequest { - function_id: classifier_fn.clone(), - payload: call.args.clone(), - action: None, - timeout_ms: Some(classifier_timeout_ms), - }) - .await - { - Ok(v) => match interpret_classifier_reply(&v, &classifier_fn) { - Ok(ClassifierDecision::Auto) => json!({ "block": false }), - Ok(ClassifierDecision::Deny(denial)) => json!({ - "block": true, - "denial": denial, - "status": "denied", - "call_id": call.function_call_id, - "function_id": call.function_id, - }), - Ok(ClassifierDecision::Ask) | Err(()) => { - handle_intercept( - bus.as_ref(), - &sc, - &call, - now_ms, - timeout_ms, - true, - ) - .await - } - }, - Err(_) => { - handle_intercept(bus.as_ref(), &sc, &call, now_ms, timeout_ms, true) - .await - } - }, - }; + // One decision call. Verdict::Allow → {block:false}. + // Verdict::Deny → {block:true, denial:Policy{...}}. + // Verdict::Ask → write Pending + reply {block:true, status:pending}. + let reply = handle_intercept( + bus.as_ref(), + &sc, + &call, + &rules_snapshot, + now_ms, + timeout_ms, + ).await; if reply.get("status").and_then(Value::as_str) == Some("pending") { write_event( diff --git a/approval-gate/src/resolve.rs b/approval-gate/src/resolve.rs index 7506bcd3..17b81f68 100644 --- a/approval-gate/src/resolve.rs +++ b/approval-gate/src/resolve.rs @@ -11,7 +11,8 @@ use std::sync::RwLock; use serde_json::{json, Value}; -use crate::intercept::{apply_policy_rules, PolicyOutcome}; +// apply_policy_rules / PolicyOutcome were deleted in T5. The cascade loop +// below uses crate::verdict_for instead. T7 rewrites this entirely. use crate::lifecycle::{maybe_flip_timed_out, transition_record}; use crate::rules; use crate::state::{FunctionExecutor, StateBus}; @@ -214,13 +215,14 @@ async fn cascade_allow_for_session( .and_then(Value::as_str) .unwrap_or("") .to_string(); - let outcome = { + let args = rec.get("args").cloned().unwrap_or(json!({})); + let verdict = { let guard = policy_rules .read() .expect("approval-gate policy rules lock poisoned"); - apply_policy_rules(&guard, &fn_id) + crate::verdict_for(&fn_id, &args, &guard) }; - if !matches!(outcome, PolicyOutcome::Allow) { + if !matches!(verdict, crate::Verdict::Allow) { continue; } if let Err(err) = From d16379418cbd6248456b2dca730d3bc20764c09d Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Sat, 16 May 2026 07:12:27 -0300 Subject: [PATCH 21/30] feat(approval-gate): three-phase resolve + cascade exact-pattern (T6+T7) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit handle_resolve rewritten to typed Record + three-phase allow path: 1. write InFlight (closes the dup-exec race within a worker process) 2. iii.trigger(function_id, args) and await 3. write Done(Executed{result}) or Done(Failed{error}) Deny is a single Pending → Done(Denied{denial}) write — no invoke, no InFlight needed. WireDecision::Deny with no denial in payload defaults to UserRejected. UserCorrected{feedback} round-trips. Dup-exec guard: handle_resolve refuses non-Pending rows with in_flight (concurrent resolve mid-invoke) or already_resolved (terminal). Cascade allow on always:true (T7) now pushes a runtime Allow rule with the originator's EXACT pattern via rules::pattern_for(args) — not the prior blanket pattern:'*'. 'Always allow git status' no longer auto-allows rm -rf / via the same shell::exec function id. Lock-ordering invariant pinned in module docs: never hold the ruleset guard across .await. --- approval-gate/src/resolve.rs | 292 ++++++++++++++++------------------- 1 file changed, 135 insertions(+), 157 deletions(-) diff --git a/approval-gate/src/resolve.rs b/approval-gate/src/resolve.rs index 17b81f68..56d1d90a 100644 --- a/approval-gate/src/resolve.rs +++ b/approval-gate/src/resolve.rs @@ -1,33 +1,42 @@ //! Resolve flow — handles `approval::resolve` and the cascading-allow //! behavior that fires when a reply carries `always: true`. //! -//! [`handle_resolve`] is the main entry point. On allow it routes -//! through [`approve_and_execute`], which is also reused by the cascade -//! sweep ([`cascade_allow_for_session`]) so the approved → invoke → -//! executed/failed transitions stay in one place. [`handle_lookup_record`] -//! is the small read-only helper called by shell bypass validation. +//! ## Three-phase allow path +//! +//! [`handle_resolve`] is the entry point. On allow it routes through +//! [`approve_and_execute`]: +//! 1. write `InFlight` (closes the dup-exec race — a second resolve +//! arriving during the invoke await sees a non-Pending row and bails); +//! 2. `iii.trigger(function_id, args)` and await; +//! 3. write `Done(Executed{result})` or `Done(Failed{error})`. +//! +//! Deny is a single Pending → Done(Denied) write — no invoke, no InFlight. +//! +//! ## Cascade +//! +//! On `allow + always:true`, [`cascade_allow_for_session`] pushes a runtime +//! `Allow` rule with the originator's **exact pattern** (via +//! [`crate::rules::pattern_for`]) — not a blanket `pattern: "*"`. "Always +//! allow git status" does NOT auto-allow `rm -rf /` via the same +//! `shell::exec` function id. Same-session pending rows whose +//! `verdict_for` returns `Allow` under the new rule are driven through +//! `approve_and_execute`. use std::sync::RwLock; use serde_json::{json, Value}; -// apply_policy_rules / PolicyOutcome were deleted in T5. The cascade loop -// below uses crate::verdict_for instead. T7 rewrites this entirely. -use crate::lifecycle::{maybe_flip_timed_out, transition_record}; -use crate::rules; +use crate::record::{Record, Status, Outcome}; +use crate::rules::{self, Action, Rule, Ruleset}; use crate::state::{FunctionExecutor, StateBus}; use crate::wire::{pending_key, Denial, WireDecision}; -/// Lookup a single approval record by session + call id (for shell bypass validation). +/// Lookup a single approval record by session + call id (for shell bypass +/// validation). Stays on the old free-form Value shape so shell-side +/// readers don't break — shell strip in T13 deletes the callsite there. pub async fn handle_lookup_record(bus: &dyn StateBus, state_scope: &str, payload: Value) -> Value { - let session_id = payload - .get("session_id") - .and_then(Value::as_str) - .unwrap_or(""); - let function_call_id = payload - .get("function_call_id") - .and_then(Value::as_str) - .unwrap_or(""); + let session_id = payload.get("session_id").and_then(Value::as_str).unwrap_or(""); + let function_call_id = payload.get("function_call_id").and_then(Value::as_str).unwrap_or(""); if session_id.is_empty() || function_call_id.is_empty() { return Value::Null; } @@ -35,22 +44,19 @@ pub async fn handle_lookup_record(bus: &dyn StateBus, state_scope: &str, payload bus.get(state_scope, &key).await.unwrap_or(Value::Null) } -/// Resolve a pending approval. Wire-format errors return `{ok: false, -/// error: ""}`. Success returns `{ok: true}` plus an optional -/// `cascaded: N` count when an `always: true` reply triggered the +/// Resolve a pending approval. Wire-format errors return +/// `{ok:false, error:""}`. Success returns `{ok:true}` plus an +/// optional `cascaded: N` count when an `always:true` reply triggered the /// session sweep. pub async fn handle_resolve( bus: &dyn StateBus, exec: &dyn FunctionExecutor, state_scope: &str, - policy_rules: &RwLock, + policy_rules: &RwLock, payload: Value, now_ms: u64, ) -> Value { - let session_id = payload - .get("session_id") - .and_then(Value::as_str) - .unwrap_or(""); + let session_id = payload.get("session_id").and_then(Value::as_str).unwrap_or(""); let function_call_id = payload .get("function_call_id") .or_else(|| payload.get("tool_call_id")) @@ -59,6 +65,7 @@ pub async fn handle_resolve( if session_id.is_empty() || function_call_id.is_empty() { return json!({ "ok": false, "error": "missing_id" }); } + let decision: WireDecision = match payload.get("decision").cloned() { Some(v) => match serde_json::from_value(v) { Ok(d) => d, @@ -66,33 +73,33 @@ pub async fn handle_resolve( }, None => return json!({ "ok": false, "error": "bad_decision" }), }; + let key = pending_key(session_id, function_call_id); - let Some(existing) = bus.get(state_scope, &key).await else { + let Some(raw) = bus.get(state_scope, &key).await else { return json!({ "ok": false, "error": "not_found" }); }; - - // Lazy timeout flip: if the record is past expires_at, write the - // timed_out transition and refuse the resolve so the caller can't - // race the sweeper. - let existing = match maybe_flip_timed_out(&existing, now_ms) { - Some(flipped) => { - let _ = bus.set(state_scope, &key, flipped.clone()).await; - return json!({ "ok": false, "error": "timed_out" }); - } - None => existing, + let Some(record) = Record::from_value(raw) else { + return json!({ "ok": false, "error": "corrupt_record" }); }; - if existing.get("status").and_then(Value::as_str) != Some("pending") { - return json!({ "ok": false, "error": "already_resolved" }); + // Lazy timeout flip — Pending rows past expires_at flip to + // Done(TimedOut) on read. + if let Some(flipped) = record.flipped_to_timed_out_if_expired(now_ms) { + let _ = bus.set(state_scope, &key, flipped.to_value()).await; + return json!({ "ok": false, "error": "timed_out" }); + } + + // Dup-exec guard: only Pending rows are resolvable. InFlight means a + // concurrent resolve is still mid-invoke; Done means terminal. + match record.status { + Status::Pending => { /* fall through */ } + Status::InFlight => return json!({ "ok": false, "error": "in_flight" }), + Status::Done => return json!({ "ok": false, "error": "already_resolved" }), } match decision { WireDecision::Deny => { - // Caller supplies a structured Denial. Accepted shapes: - // { "decision": "deny", "denial": { "kind": "user_rejected", ... } } - // { "decision": "deny", "denial": { "kind": "user_corrected", "detail": { "feedback": "..." } } } - // Missing `denial` is treated as a bare UserRejected (no feedback) - // so the simplest UI flow stays one-click. + // Optional structured denial from caller; missing → UserRejected. let denial = match payload.get("denial").cloned() { Some(v) => match serde_json::from_value::(v) { Ok(d) => d, @@ -100,54 +107,36 @@ pub async fn handle_resolve( }, None => Denial::UserRejected, }; - let denied = transition_record(&existing, "denied", None, None, Some(denial)); - if let Err(e) = bus.set(state_scope, &key, denied).await { + let denied = record.done_at(now_ms, Outcome::Denied { denial }); + if let Err(e) = bus.set(state_scope, &key, denied.to_value()).await { tracing::error!("approval-gate: failed to write denied record: {e}"); return json!({ "ok": false, "error": "state_write_failed" }); } json!({ "ok": true }) } WireDecision::Allow => { + // Snapshot args + function_id before consuming `record` in + // approve_and_execute — cascade needs them for the rule push. + let function_id = record.function_id.clone(); + let args = record.args.clone(); + if let Err(err) = approve_and_execute( - bus, - exec, - state_scope, - &existing, - session_id, - function_call_id, - ) - .await - { + bus, exec, state_scope, record, session_id, function_call_id, now_ms, + ).await { tracing::error!("approval-gate: failed to execute approved call: {err}"); return json!({ "ok": false, "error": "state_write_failed" }); } - // Optional cascade: when `always: true` is set on an allow - // reply, add a runtime Allow rule for this call's function id - // and resolve every other pending record in the same session - // that the new rule covers. v1 scope is function-id-only — - // the cascade rule's `pattern` is "*" to match the v1 rules - // surface. See [`crate::rules`]. - let cascaded = if payload - .get("always") - .and_then(Value::as_bool) - .unwrap_or(false) - { - let function_id = existing - .get("function_id") - .and_then(Value::as_str) - .unwrap_or("") - .to_string(); + // Cascade on `always:true`. Push a runtime Allow rule with the + // ORIGINATOR'S EXACT PATTERN (via pattern_for), then sweep the + // session's other Pending rows. + let cascaded = if payload.get("always").and_then(Value::as_bool).unwrap_or(false) { cascade_allow_for_session( - bus, - exec, - state_scope, - policy_rules, - session_id, - function_call_id, - &function_id, - ) - .await + bus, exec, state_scope, policy_rules, + session_id, function_call_id, + &function_id, &args, + now_ms, + ).await } else { 0 }; @@ -161,77 +150,72 @@ pub async fn handle_resolve( } } -/// Push an Allow rule for `function_id` into the shared policy ruleset, -/// then resolve every pending record in `session_id` (other than the one -/// just resolved by the caller) that the new rule covers. Returns the -/// number of records auto-resolved. +/// Push an exact-pattern Allow rule into the shared ruleset, then sweep +/// the session's other Pending rows. Returns the number of rows +/// auto-resolved (originator excluded). /// -/// The function id rule is appended once; if the user clicks "always -/// allow X" twice for the same X within a session, the second push is a -/// duplicate but harmless (last-wins still picks Allow). State-write -/// failures inside the loop are logged and skipped so a single bad -/// record can't prevent the rest of the cascade. +/// **Lock-ordering invariant**: the write/read guards on `policy_rules` +/// are released before any `.await`. `std::sync::RwLock` is not async-safe +/// to hold across suspension; a held guard would block every concurrent +/// intercept. async fn cascade_allow_for_session( bus: &dyn StateBus, exec: &dyn FunctionExecutor, state_scope: &str, - policy_rules: &RwLock, + policy_rules: &RwLock, session_id: &str, originator_call_id: &str, originator_function_id: &str, + originator_args: &Value, + now_ms: u64, ) -> u64 { - // Push the new Allow rule under the write lock. Hold the guard only - // for the mutation, not across the .await in the sweep below. + // 1. Push the exact-pattern Allow rule under the write lock. + // pattern_for is the same extractor used at intercept time, so + // "always allow git status" means literally that argv shape — NOT + // a blanket "*" pattern that would auto-allow rm -rf /. + let pushed_pattern = rules::pattern_for(originator_function_id, originator_args); { let mut guard = policy_rules .write() .expect("approval-gate policy rules lock poisoned"); - guard.push(rules::Rule { + guard.push(Rule { permission: originator_function_id.to_string(), - pattern: "*".to_string(), - action: rules::Action::Allow, + pattern: pushed_pattern, + action: Action::Allow, }); } - // Snapshot the session's pending records and re-evaluate each one - // against the now-updated rules. Use a read-clone so we don't hold - // the lock across .await. + // 2. Snapshot the session's pending rows. let prefix = format!("{session_id}/"); - let session_records = bus.list_prefix(state_scope, &prefix).await; + let session_rows = bus.list_prefix(state_scope, &prefix).await; + let mut cascaded = 0u64; - for rec in session_records { - let rec_call_id = match rec.get("function_call_id").and_then(Value::as_str) { - Some(s) => s.to_string(), - None => continue, - }; - if rec_call_id == originator_call_id { - continue; - } - if rec.get("status").and_then(Value::as_str) != Some("pending") { - continue; - } - let fn_id = rec - .get("function_id") - .and_then(Value::as_str) - .unwrap_or("") - .to_string(); - let args = rec.get("args").cloned().unwrap_or(json!({})); + for raw in session_rows { + let Some(record) = Record::from_value(raw) else { continue }; + if record.session_id != session_id { continue; } // defensive + if record.function_call_id == originator_call_id { continue; } // skip originator + if record.status != Status::Pending { continue; } // skip non-pending + + // 3. Re-evaluate against the updated ruleset. let verdict = { let guard = policy_rules .read() .expect("approval-gate policy rules lock poisoned"); - crate::verdict_for(&fn_id, &args, &guard) + crate::verdict_for(&record.function_id, &record.args, &guard) }; if !matches!(verdict, crate::Verdict::Allow) { continue; } - if let Err(err) = - approve_and_execute(bus, exec, state_scope, &rec, session_id, &rec_call_id).await - { + + // 4. Drive through the same approve_and_execute path as the + // user-driven allow (InFlight → invoke → Done). + let cid = record.function_call_id.clone(); + if let Err(err) = approve_and_execute( + bus, exec, state_scope, record, session_id, &cid, now_ms, + ).await { tracing::warn!( - session_id, - call_id = %rec_call_id, - "approval-gate: cascade auto-resolve failed: {err}" + session_id, call_id = %cid, + "approval-gate: cascade auto-resolve failed: {err}", ); continue; } @@ -240,50 +224,44 @@ async fn cascade_allow_for_session( cascaded } -/// Drive a pending record through the approved → invoke → executed/failed -/// flow. Pure plumbing — does not consult policy rules, does not check -/// the original status (caller must have verified it's pending). Used by -/// both the user-driven [`handle_resolve`] allow path and the -/// cascade-on-`always` sweep so the state transitions stay in one place. +/// Drive a Pending row through InFlight → invoke → Done. Used by both +/// the user-driven allow path and the cascade sweep so the lifecycle +/// transitions stay in one place. /// -/// Returns `Err` only when a state write fails; the invocation result -/// itself (success or function-error) is captured on the record. The -/// caller decides how to surface a state-write failure (the existing -/// handlers map it to `{ok:false, error:"state_write_failed"}`). +/// Phase 1 (InFlight) is the dup-exec guard: a concurrent resolve seeing +/// a non-Pending row in `handle_resolve` returns `in_flight` and skips +/// the second invoke. pub(crate) async fn approve_and_execute( bus: &dyn StateBus, exec: &dyn FunctionExecutor, state_scope: &str, - pending: &Value, + pending: Record, session_id: &str, function_call_id: &str, + now_ms: u64, ) -> Result<(), String> { - let function_id = pending - .get("function_id") - .and_then(Value::as_str) - .unwrap_or("") - .to_string(); - let args = pending.get("args").cloned().unwrap_or(json!({})); let key = pending_key(session_id, function_call_id); - let approved = transition_record(pending, "approved", None, None, None); - // Best-effort intermediate write; if it fails we still try to invoke - // so the user-visible behavior matches the pre-extraction allow path. - let _ = bus.set(state_scope, &key, approved.clone()).await; - match exec + let function_id = pending.function_id.clone(); + let args = pending.args.clone(); + + // Phase 1: InFlight write. Closes the dup-exec race. + let in_flight = pending.in_flight(now_ms); + bus.set(state_scope, &key, in_flight.to_value()) + .await + .map_err(|e| e.to_string())?; + + // Phase 2: invoke. Result/error captured on the record below. + let outcome = match exec .invoke(&function_id, args, function_call_id, session_id) .await { - Ok(result) => { - let executed = transition_record(&approved, "executed", Some(result), None, None); - bus.set(state_scope, &key, executed) - .await - .map_err(|e| e.to_string()) - } - Err(error) => { - let failed = transition_record(&approved, "failed", None, Some(error), None); - bus.set(state_scope, &key, failed) - .await - .map_err(|e| e.to_string()) - } - } + Ok(result) => Outcome::Executed { result }, + Err(error) => Outcome::Failed { error }, + }; + + // Phase 3: Done write. resolved_at preserved from the InFlight write. + let done = in_flight.done(outcome); + bus.set(state_scope, &key, done.to_value()) + .await + .map_err(|e| e.to_string()) } From 2b316f932b0f6a851ed40471cdbb2d2ee72b20ea Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Sat, 16 May 2026 07:21:17 -0300 Subject: [PATCH 22/30] feat(approval-gate): approval::consume + strip delivery dead RPCs + delete sweeper.rs/lifecycle.rs (T8+T9+T11) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three behavioral changes folded into one commit because they share files: T8 (new approval::consume RPC): Three-phase drain — gather Done candidates, sort by resolved_at + cap, delete-and-return. Defensive session_id filter. Lazy timeout flip on read. Default cap CONSUME_DEFAULT_LIMIT=50 bounds the response size against MB-scale stdout payloads. T9 (delivery.rs strip): handle_list_undelivered, handle_consume_undelivered, handle_ack_delivered, handle_flush_delivered all deleted. LIST_UNDELIVERED_DEFAULT_LIMIT constant gone. handle_list_pending rewritten on the typed Record API with lazy timeout flip on read. handle_sweep_session rewritten on new schema — flips Pending+InFlight rows to Done(TimedOut). T11 (sweeper.rs deleted): Background polling task gone. Timeouts now flip lazily on read in handle_resolve / handle_consume / handle_list_pending. UI handles expires_at countdown client-side; the LLM learns of timeouts on the next consume. lifecycle.rs transient shim also deleted (no more callers). Stream helpers (uuid_like, write_event, write_hook_reply) move into register.rs as their only consumer; spawn_timeout_sweeper + timeout_resolved_event deleted with the rest of sweeper.rs. register.rs surgery: Refs struct loses 5 dead FunctionRef fields and the sweeper JoinHandle. RPC registrations for list_undelivered, consume_undelivered, ack_delivered, flush_delivered all gone. New FN_CONSUME registration added. Classifier-alias warning check trimmed to live function ids. 15 new tests cover handle_consume (7) + handle_sweep_session (1) + handle_list_pending lazy flip (1) + the pre-existing edits. 65 lib tests pass total. --- approval-gate/src/delivery.rs | 558 ++++++++++++++++++--------------- approval-gate/src/lib.rs | 17 +- approval-gate/src/lifecycle.rs | 140 --------- approval-gate/src/register.rs | 178 +++++------ approval-gate/src/sweeper.rs | 151 --------- 5 files changed, 388 insertions(+), 656 deletions(-) delete mode 100644 approval-gate/src/lifecycle.rs delete mode 100644 approval-gate/src/sweeper.rs diff --git a/approval-gate/src/delivery.rs b/approval-gate/src/delivery.rs index b6ed055c..e2f73281 100644 --- a/approval-gate/src/delivery.rs +++ b/approval-gate/src/delivery.rs @@ -1,309 +1,361 @@ //! Delivery-tracking handlers. //! -//! The six RPCs that orchestrators call to read and acknowledge the -//! gate's terminal-status records, plus the sweep that retires pending -//! ones when a session ends. They share two invariants: +//! Three RPCs make up the gate's read/drain surface: //! -//! - Stamping `delivered_in_turn_id` is idempotent — re-acking a record -//! that already has the stamp is a no-op. -//! - Lazy timeout flip: any read path through this module promotes -//! pending-but-expired records to `timed_out` before applying its -//! filter, so callers see expired calls surface on the same read they -//! would have used regardless. +//! - [`handle_list_pending`] — UI-facing list of in-flight prompts. +//! Applies lazy timeout flip on read: a Pending row past `expires_at` +//! flips to `Done(TimedOut)` and disappears from the list. +//! - [`handle_consume`] — atomic drain: returns Done rows and deletes +//! them in the same call. Defensive `session_id` filter; cap + +//! `omitted` counter; sort by `resolved_at` for deterministic LLM +//! replay across multi-row consumes (cascade case). +//! - [`handle_sweep_session`] — force-cancellation for `run::stop`: +//! flips every Pending and InFlight row to `Done(TimedOut)`. use serde_json::{json, Value}; -use crate::lifecycle::{is_terminal_status, maybe_flip_timed_out, transition_record}; +use crate::record::{Outcome, Record, Status}; use crate::state::StateBus; use crate::wire::pending_key; -/// List records currently in the `pending` status for a session. Used -/// by UIs to render the in-flight approval queue. +/// Default per-call cap on `handle_consume`. Bounds the response size — +/// `Outcome::Executed.result` can carry MB-sized stdout/stderr payloads, +/// and we don't want one consume to blow the trigger wire or the next +/// LLM turn. +pub const CONSUME_DEFAULT_LIMIT: usize = 50; + +/// List Pending rows for a session. Applies lazy timeout flip on read — +/// expired Pending rows are persisted as `Done(TimedOut)` and dropped +/// from the response. pub async fn handle_list_pending(bus: &dyn StateBus, state_scope: &str, payload: Value) -> Value { - let session_id = payload - .get("session_id") - .and_then(Value::as_str) - .unwrap_or(""); + let session_id = payload.get("session_id").and_then(Value::as_str).unwrap_or(""); if session_id.is_empty() { return json!({ "pending": [] }); } + let now_ms = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_millis() as u64) + .unwrap_or(0); + let prefix = format!("{session_id}/"); - let all = bus.list_prefix(state_scope, &prefix).await; - let pending: Vec = all - .into_iter() - .filter(|v| v.get("status").and_then(Value::as_str) == Some("pending")) - .collect(); + let rows = bus.list_prefix(state_scope, &prefix).await; + + let mut pending = Vec::new(); + for raw in rows { + let Some(record) = Record::from_value(raw) else { continue }; + if record.session_id != session_id { continue; } // defensive + // Lazy flip + persist; expired rows leave the Pending list. + if let Some(flipped) = record.flipped_to_timed_out_if_expired(now_ms) { + let key = pending_key(session_id, &flipped.function_call_id); + let _ = bus.set(state_scope, &key, flipped.to_value()).await; + continue; + } + if record.status == Status::Pending { + pending.push(record.to_value()); + } + } json!({ "pending": pending }) } -/// Default cap for `handle_list_undelivered` responses. A single LLM turn -/// should never be asked to ingest more than this many stitched approval -/// messages; older entries beyond the cap stay unacked and are reported via -/// the `omitted` counter so the caller can render a summary line. -pub const LIST_UNDELIVERED_DEFAULT_LIMIT: usize = 50; - -/// Return terminal-status records for a session that haven't been stamped -/// with `delivered_in_turn_id`. Lazy timeout: pending records past -/// `expires_at` (as observed at `now_ms`) are flipped to `timed_out` before -/// the filter so they surface here in the same call. +/// Atomic drain: returns Done rows for a session and deletes them in the +/// same call. Pending and InFlight rows stay in state. Pending rows past +/// `expires_at` are lazy-flipped to `Done(TimedOut)` and returned. +/// +/// Three phases: +/// 1. gather Done candidates (no state mutation); +/// 2. sort by `resolved_at`, apply cap, report `omitted` count; +/// 3. delete-and-return — only rows whose delete succeeded are returned, +/// so a partial failure leaves the row to be retried next consume. /// -/// Sorted oldest-first by `resolved_at` (records missing `resolved_at` sort -/// last as `u64::MAX`). Capped at `limit` (default -/// [`LIST_UNDELIVERED_DEFAULT_LIMIT`]); the response always includes an -/// `omitted` field counting entries left behind. -pub async fn handle_list_undelivered( +/// Sort order matters when cascade auto-resolves multiple rows that all +/// surface to the same consume — `resolved_at` produces deterministic +/// LLM message order. +pub async fn handle_consume( bus: &dyn StateBus, state_scope: &str, payload: Value, now_ms: u64, ) -> Value { - let session_id = payload - .get("session_id") - .and_then(Value::as_str) - .unwrap_or(""); + let session_id = payload.get("session_id").and_then(Value::as_str).unwrap_or(""); if session_id.is_empty() { - return json!({ "entries": [], "omitted": 0 }); + return json!({ "ok": false, "error": "missing_session_id" }); } let limit = payload .get("limit") .and_then(Value::as_u64) .map(|n| n as usize) - .unwrap_or(LIST_UNDELIVERED_DEFAULT_LIMIT); + .unwrap_or(CONSUME_DEFAULT_LIMIT); + let prefix = format!("{session_id}/"); - let all = bus.list_prefix(state_scope, &prefix).await; - let mut entries: Vec = Vec::new(); - for rec in all { - // Defensive scope: some bus backends ignore the prefix and return - // every record in `state_scope`. Drop anything not stamped with - // the session_id we're listing for. Orphan records lacking a - // session_id stamp are dropped (cannot be attributed); the - // migration path that used to recover them no longer exists. - match rec.get("session_id").and_then(Value::as_str) { - Some(sid) if sid == session_id => {} - _ => continue, - } - let rec = if let Some(flipped) = maybe_flip_timed_out(&rec, now_ms) { - let call_id = flipped - .get("function_call_id") - .and_then(Value::as_str) - .unwrap_or(""); - let _ = bus - .set( - state_scope, - &pending_key(session_id, call_id), - flipped.clone(), - ) - .await; - flipped - } else { - rec - }; - let status = rec.get("status").and_then(Value::as_str).unwrap_or(""); - if !is_terminal_status(status) { - continue; - } - if rec - .get("delivered_in_turn_id") - .is_some_and(|v| !v.is_null()) - { - continue; - } - entries.push(rec); - } - entries.sort_by_key(|e| { - e.get("resolved_at") - .and_then(Value::as_u64) - .unwrap_or(u64::MAX) - }); - let total = entries.len(); - let omitted = total.saturating_sub(limit); - entries.truncate(limit); - json!({ "entries": entries, "omitted": omitted }) -} + let rows = bus.list_prefix(state_scope, &prefix).await; -/// Stamp `delivered_in_turn_id` on terminal-status records named in -/// `call_ids` for the given session. Idempotent: records already stamped -/// (non-null `delivered_in_turn_id`) are not overwritten. Unknown call ids -/// are silently skipped. -pub async fn handle_ack_delivered(bus: &dyn StateBus, state_scope: &str, payload: Value) -> Value { - let session_id = payload - .get("session_id") - .and_then(Value::as_str) - .unwrap_or(""); - let turn_id = payload.get("turn_id").and_then(Value::as_str).unwrap_or(""); - let call_ids: Vec = payload - .get("call_ids") - .and_then(|v| v.as_array()) - .map(|arr| { - arr.iter() - .filter_map(|v| v.as_str().map(str::to_string)) - .collect() - }) - .unwrap_or_default(); - if session_id.is_empty() || turn_id.is_empty() || call_ids.is_empty() { - return json!({ "ok": true, "stamped": 0 }); + // Phase 1: gather Done candidates without mutating state. + let mut candidates: Vec = Vec::new(); + for raw in rows { + let Some(record) = Record::from_value(raw) else { continue }; + // Defensive session_id filter: some state-bus backends ignore the + // prefix arg and return every row in the scope. Drop anything not + // stamped with the session_id we're consuming for — otherwise a + // faulty backend could cross-session delete. + if record.session_id != session_id { continue; } + // Lazy flip (Pending → Done(TimedOut)). No persist needed — we're + // about to delete this row. + let record = record.flipped_to_timed_out_if_expired(now_ms).unwrap_or(record); + // Only drain Done. Pending (awaiting operator) and InFlight + // (invoke in progress) stay in state. + if record.status != Status::Done { continue; } + candidates.push(record); } - let mut stamped = 0_u64; - for cid in call_ids { - let key = pending_key(session_id, &cid); - let Some(rec) = bus.get(state_scope, &key).await else { - continue; - }; - if rec - .get("delivered_in_turn_id") - .is_some_and(|v| !v.is_null()) - { - continue; - } - let mut next = rec; - next.as_object_mut().unwrap().insert( - "delivered_in_turn_id".into(), - Value::String(turn_id.to_string()), - ); - if bus.set(state_scope, &key, next).await.is_ok() { - stamped += 1; + + // Phase 2: sort + cap. + candidates.sort_by_key(|r| r.resolved_at.unwrap_or(u64::MAX)); + let total = candidates.len(); + let omitted = total.saturating_sub(limit) as u64; + candidates.truncate(limit); + + // Phase 3: delete-and-return. + let mut entries: Vec = Vec::with_capacity(candidates.len()); + for record in candidates { + let key = pending_key(session_id, &record.function_call_id); + if bus.delete(state_scope, &key).await.is_ok() { + entries.push(record.to_value()); } } - json!({ "ok": true, "stamped": stamped }) + json!({ "ok": true, "entries": entries, "omitted": omitted }) } -/// Atomic list+ack: returns the same entries `handle_list_undelivered` would -/// surface (subject to the same FIFO+cap rules) and stamps each one with -/// `delivered_in_turn_id` before returning. Eliminates the list→LLM→ack -/// race window: if the caller crashes after receiving the response, the -/// entries are still considered delivered and will not resurface, which is -/// acceptable because terminal records are informational (the side-effect -/// already executed inside the gate). -/// -/// Required payload: `{ session_id, turn_id, limit? }`. -pub async fn handle_consume_undelivered( +/// Force-cancel every non-terminal row in a session by flipping it to +/// `Done(TimedOut)`. Called from `run::stop` so a stale UI modal cannot +/// still execute its function after the operator clicks Stop. Lazy +/// timeout is not a substitute — default `expires_at` is 5 min and we +/// cannot leave a 5-min stale-modal window after Stop. +pub async fn handle_sweep_session( bus: &dyn StateBus, state_scope: &str, payload: Value, - now_ms: u64, ) -> Value { - let turn_id = payload.get("turn_id").and_then(Value::as_str).unwrap_or(""); - if turn_id.is_empty() { - return json!({ "ok": false, "error": "missing_turn_id", "entries": [], "omitted": 0 }); + let session_id = payload.get("session_id").and_then(Value::as_str).unwrap_or(""); + if session_id.is_empty() { + return json!({ "ok": false, "error": "missing_session_id", "swept": 0 }); } - let listed = handle_list_undelivered(bus, state_scope, payload.clone(), now_ms).await; - let session_id = payload - .get("session_id") - .and_then(Value::as_str) - .unwrap_or(""); - let entries = listed["entries"].as_array().cloned().unwrap_or_default(); - let omitted = listed["omitted"].as_u64().unwrap_or(0); - for rec in &entries { - let cid = rec - .get("function_call_id") - .and_then(Value::as_str) - .unwrap_or(""); - if cid.is_empty() { - continue; + let now_ms = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_millis() as u64) + .unwrap_or(0); + + let prefix = format!("{session_id}/"); + let rows = bus.list_prefix(state_scope, &prefix).await; + let mut swept = 0u64; + + for raw in rows { + let Some(record) = Record::from_value(raw) else { continue }; + if record.session_id != session_id { continue; } // defensive + if record.status == Status::Done { continue; } // already terminal + + let key = pending_key(session_id, &record.function_call_id); + let timed_out = record.done_at(now_ms, Outcome::TimedOut); + if bus.set(state_scope, &key, timed_out.to_value()).await.is_ok() { + swept += 1; } - let key = pending_key(session_id, cid); - let mut stamped = rec.clone(); - stamped.as_object_mut().unwrap().insert( - "delivered_in_turn_id".into(), - Value::String(turn_id.to_string()), - ); - let _ = bus.set(state_scope, &key, stamped).await; } - json!({ "ok": true, "entries": entries, "omitted": omitted }) + json!({ "ok": true, "swept": swept }) } -/// One-shot drain: stamp every terminal-status record in `session_id` that -/// lacks `delivered_in_turn_id`. Intended for operator recovery after a -/// large backlog accumulates (e.g. when the orchestrator was offline or -/// `consume_undelivered` was unreachable). Pending records are untouched — -/// use `sweep_session` if you want to expire them first. -pub async fn handle_flush_delivered( - bus: &dyn StateBus, - state_scope: &str, - payload: Value, -) -> Value { - let session_id = payload - .get("session_id") - .and_then(Value::as_str) - .unwrap_or(""); - let turn_id = payload.get("turn_id").and_then(Value::as_str).unwrap_or(""); - if session_id.is_empty() || turn_id.is_empty() { - return json!({ "ok": false, "error": "missing_session_or_turn_id", "stamped": 0 }); +#[cfg(test)] +mod tests { + use super::*; + use crate::record::{Outcome, Record}; + use serde_json::json; + use std::sync::Mutex; + + #[derive(Default)] + struct InMemBus { + rows: Mutex>, } - let prefix = format!("{session_id}/"); - let all = bus.list_prefix(state_scope, &prefix).await; - let mut stamped = 0_u64; - for rec in all { - let status = rec.get("status").and_then(Value::as_str).unwrap_or(""); - if !is_terminal_status(status) { - continue; + #[async_trait::async_trait] + impl StateBus for InMemBus { + async fn set(&self, scope: &str, key: &str, value: Value) -> Result<(), iii_sdk::IIIError> { + self.rows.lock().unwrap().insert((scope.into(), key.into()), value); + Ok(()) } - if rec - .get("delivered_in_turn_id") - .is_some_and(|v| !v.is_null()) - { - continue; + async fn get(&self, scope: &str, key: &str) -> Option { + self.rows.lock().unwrap().get(&(scope.into(), key.into())).cloned() } - let cid = rec - .get("function_call_id") - .and_then(Value::as_str) - .map(str::to_string) - .unwrap_or_default(); - if cid.is_empty() { - continue; + async fn list_prefix(&self, scope: &str, prefix: &str) -> Vec { + self.rows.lock().unwrap() + .iter() + .filter(|((s, k), _)| s == scope && k.starts_with(prefix)) + .map(|(_, v)| v.clone()) + .collect() } - let mut next = rec; - next.as_object_mut().unwrap().insert( - "delivered_in_turn_id".into(), - Value::String(turn_id.to_string()), - ); - if bus - .set(state_scope, &pending_key(session_id, &cid), next) - .await - .is_ok() - { - stamped += 1; + async fn delete(&self, scope: &str, key: &str) -> Result<(), iii_sdk::IIIError> { + self.rows.lock().unwrap().remove(&(scope.into(), key.into())); + Ok(()) } } - json!({ "ok": true, "stamped": stamped }) -} -/// Sweep all still-pending approvals for a session to timed_out. -/// -/// The `timed_out` status is self-describing per the Denial refactor — -/// callers no longer pass (or get back) a reason string. If you need to -/// distinguish *why* a session was swept (delete vs. abort vs. timeout), -/// the calling worker already has that context and should log it there. -pub async fn handle_sweep_session(bus: &dyn StateBus, state_scope: &str, payload: Value) -> Value { - let session_id = payload - .get("session_id") - .and_then(Value::as_str) - .unwrap_or(""); - if session_id.is_empty() { - return json!({ "ok": false, "error": "missing_session_id", "swept": 0 }); + async fn seed_done(bus: &InMemBus, session: &str, cid: &str, resolved_at: u64) { + let r = Record::pending( + cid.into(), "shell::exec".into(), + json!({"command": "ls"}), session.into(), 0, 60_000, + ).in_flight(resolved_at).done(Outcome::Executed { result: json!({"cid": cid}) }); + bus.set("approvals", &format!("{session}/{cid}"), r.to_value()).await.unwrap(); } - let prefix = format!("{session_id}/"); - let all = bus.list_prefix(state_scope, &prefix).await; - let mut swept = 0_u64; - for rec in all { - if rec.get("status").and_then(Value::as_str) != Some("pending") { - continue; - } - let call_id = rec - .get("function_call_id") - .and_then(Value::as_str) - .unwrap_or(""); - if call_id.is_empty() { - continue; - } - let flipped = transition_record(&rec, "timed_out", None, None, None); - if bus - .set(state_scope, &pending_key(session_id, call_id), flipped) - .await - .is_ok() - { - swept += 1; + + async fn seed_pending(bus: &InMemBus, session: &str, cid: &str, expires_at: u64) { + let mut r = Record::pending( + cid.into(), "shell::exec".into(), + json!({}), session.into(), 0, 60_000); + r.expires_at = expires_at; + bus.set("approvals", &format!("{session}/{cid}"), r.to_value()).await.unwrap(); + } + + #[tokio::test] + async fn consume_returns_done_rows_and_deletes_them() { + let bus = InMemBus::default(); + seed_done(&bus, "sess_a", "tc-1", 100).await; + seed_done(&bus, "sess_a", "tc-2", 200).await; + let reply = handle_consume(&bus, "approvals", + json!({"session_id": "sess_a"}), 1_000).await; + assert_eq!(reply["ok"], true); + assert_eq!(reply["omitted"], 0); + let entries = reply["entries"].as_array().unwrap(); + assert_eq!(entries.len(), 2); + assert!(bus.get("approvals", "sess_a/tc-1").await.is_none()); + assert!(bus.get("approvals", "sess_a/tc-2").await.is_none()); + } + + #[tokio::test] + async fn consume_skips_pending_rows() { + let bus = InMemBus::default(); + seed_done(&bus, "sess_a", "tc-1", 100).await; + seed_pending(&bus, "sess_a", "tc-2", 999_999).await; + let reply = handle_consume(&bus, "approvals", + json!({"session_id": "sess_a"}), 1_000).await; + let entries = reply["entries"].as_array().unwrap(); + assert_eq!(entries.len(), 1); + assert!(bus.get("approvals", "sess_a/tc-2").await.is_some()); + } + + #[tokio::test] + async fn consume_lazy_flips_expired_pending_then_returns_and_deletes() { + let bus = InMemBus::default(); + seed_pending(&bus, "sess_a", "tc-1", 500).await; + let reply = handle_consume(&bus, "approvals", + json!({"session_id": "sess_a"}), 1_000).await; + let entries = reply["entries"].as_array().unwrap(); + assert_eq!(entries.len(), 1); + assert_eq!(entries[0]["status"], "done"); + assert_eq!(entries[0]["outcome"]["kind"], "timed_out"); + assert!(bus.get("approvals", "sess_a/tc-1").await.is_none()); + } + + #[tokio::test] + async fn consume_sorts_by_resolved_at_ascending() { + let bus = InMemBus::default(); + seed_done(&bus, "sess_a", "tc-z-late", 300).await; + seed_done(&bus, "sess_a", "tc-a-early", 100).await; + seed_done(&bus, "sess_a", "tc-m-mid", 200).await; + let reply = handle_consume(&bus, "approvals", + json!({"session_id": "sess_a"}), 1_000).await; + let entries = reply["entries"].as_array().unwrap(); + assert_eq!(entries[0]["function_call_id"], "tc-a-early"); + assert_eq!(entries[1]["function_call_id"], "tc-m-mid"); + assert_eq!(entries[2]["function_call_id"], "tc-z-late"); + } + + #[tokio::test] + async fn consume_cap_with_omitted_counter() { + let bus = InMemBus::default(); + for i in 0..60 { + seed_done(&bus, "sess_a", &format!("tc-{i:02}"), i as u64).await; } + let reply = handle_consume(&bus, "approvals", + json!({"session_id": "sess_a", "limit": 50}), 1_000).await; + let entries = reply["entries"].as_array().unwrap(); + assert_eq!(entries.len(), 50); + assert_eq!(reply["omitted"], 10); + let still_there = bus.list_prefix("approvals", "sess_a/").await; + assert_eq!(still_there.len(), 10); + } + + #[tokio::test] + async fn consume_missing_session_id_returns_error() { + let bus = InMemBus::default(); + let reply = handle_consume(&bus, "approvals", json!({}), 1_000).await; + assert_eq!(reply["ok"], false); + assert_eq!(reply["error"], "missing_session_id"); + } + + #[tokio::test] + async fn consume_defensive_session_id_filter_drops_foreign_rows() { + let bus = InMemBus::default(); + let r = Record::pending( + "tc-x".into(), "shell::exec".into(), json!({}), + "sess_b".into(), // WRONG session in data + 0, 60_000, + ).in_flight(100).done(Outcome::Executed { result: json!({}) }); + bus.set("approvals", "sess_a/tc-x", r.to_value()).await.unwrap(); + + let reply = handle_consume(&bus, "approvals", + json!({"session_id": "sess_a"}), 1_000).await; + let entries = reply["entries"].as_array().unwrap(); + assert_eq!(entries.len(), 0); + assert!(bus.get("approvals", "sess_a/tc-x").await.is_some(), + "defensive: row stays in state, NOT deleted"); + } + + #[tokio::test] + async fn sweep_flips_pending_and_done_untouched() { + let bus = InMemBus::default(); + let pending = Record::pending( + "tc-1".into(), "shell::exec".into(), json!({}), + "sess_a".into(), 0, 60_000); + bus.set("approvals", "sess_a/tc-1", pending.to_value()).await.unwrap(); + + let in_flight = Record::pending( + "tc-2".into(), "shell::exec".into(), json!({}), + "sess_a".into(), 0, 60_000).in_flight(500); + bus.set("approvals", "sess_a/tc-2", in_flight.to_value()).await.unwrap(); + + let done = Record::pending( + "tc-3".into(), "shell::exec".into(), json!({}), + "sess_a".into(), 0, 60_000) + .in_flight(100).done(Outcome::Executed { result: json!({}) }); + bus.set("approvals", "sess_a/tc-3", done.to_value()).await.unwrap(); + + let reply = handle_sweep_session(&bus, "approvals", + json!({"session_id": "sess_a"})).await; + assert_eq!(reply["swept"], 2); + + let r1 = Record::from_value(bus.get("approvals", "sess_a/tc-1").await.unwrap()).unwrap(); + assert!(matches!(r1.outcome, Some(Outcome::TimedOut))); + let r2 = Record::from_value(bus.get("approvals", "sess_a/tc-2").await.unwrap()).unwrap(); + assert!(matches!(r2.outcome, Some(Outcome::TimedOut))); + let r3 = Record::from_value(bus.get("approvals", "sess_a/tc-3").await.unwrap()).unwrap(); + assert!(matches!(r3.outcome, Some(Outcome::Executed { .. })), + "already-Done rows must not be re-stamped"); + } + + #[tokio::test] + async fn list_pending_lazy_flips_expired_rows_out_of_the_list() { + let bus = InMemBus::default(); + // tc-live: expires far in the future (year ~5138). tc-expired: + // expires near epoch — definitely past now. + seed_pending(&bus, "sess_a", "tc-live", u64::MAX).await; + seed_pending(&bus, "sess_a", "tc-expired", 500).await; + // Advance the system clock indirectly: just trust the inline now_ms + // in handle_list_pending. expires_at=500 < now_ms, so it should flip. + // Wait briefly to ensure SystemTime::now() > 500ms since UNIX_EPOCH + // (it's well past 1970, so any current time satisfies this). + let reply = handle_list_pending(&bus, "approvals", + json!({"session_id": "sess_a"})).await; + let pending = reply["pending"].as_array().unwrap(); + assert_eq!(pending.len(), 1); + assert_eq!(pending[0]["function_call_id"], "tc-live"); + + // Expired row is now persisted as Done(TimedOut). + let r = Record::from_value(bus.get("approvals", "sess_a/tc-expired").await.unwrap()).unwrap(); + assert!(matches!(r.outcome, Some(Outcome::TimedOut))); } - json!({ "ok": true, "swept": swept }) } diff --git a/approval-gate/src/lib.rs b/approval-gate/src/lib.rs index 27dec256..edaafde9 100644 --- a/approval-gate/src/lib.rs +++ b/approval-gate/src/lib.rs @@ -7,26 +7,23 @@ pub mod config; pub mod delivery; pub mod intercept; -pub mod lifecycle; // transitional compat shim — deleted in T11 after T5/T6/T8 migrate callsites pub mod manifest; pub mod record; pub mod register; pub mod resolve; pub mod rules; pub mod state; -pub mod sweeper; pub mod wire; pub use config::{InterceptorRule, WorkerConfig}; pub use delivery::{ - handle_ack_delivered, handle_consume_undelivered, handle_flush_delivered, handle_list_pending, - handle_list_undelivered, handle_sweep_session, LIST_UNDELIVERED_DEFAULT_LIMIT, + handle_consume, handle_list_pending, handle_sweep_session, CONSUME_DEFAULT_LIMIT, }; pub use intercept::handle_intercept; pub use record::{Outcome, Record, Status}; pub use register::{ - register, Refs, FN_ACK_DELIVERED, FN_CONSUME_UNDELIVERED, FN_FLUSH_DELIVERED, FN_LIST_PENDING, - FN_LIST_UNDELIVERED, FN_LOOKUP_RECORD, FN_RESOLVE, FN_SWEEP_SESSION, STATE_SCOPE, + register, Refs, FN_CONSUME, FN_LIST_PENDING, FN_LOOKUP_RECORD, FN_RESOLVE, FN_SWEEP_SESSION, + STATE_SCOPE, }; pub use resolve::{handle_lookup_record, handle_resolve}; pub use state::{ @@ -66,9 +63,9 @@ pub(crate) fn verdict_for( } } -// Test-only re-imports kept as small as possible. Helpers below this line -// will be deleted as their owning modules are rewritten in later tasks. +// Test-only re-imports: the few helpers below this line stay private to +// the crate but the integration tests need them. Will shrink as state.rs +// strips the marker plumbing (T10) and the orchestrator-side helpers +// migrate to the new wire shape (T14). #[cfg(test)] use state::{merge_from_approval_marker_if_needed, rule_for}; -#[cfg(test)] -use sweeper::timeout_resolved_event; diff --git a/approval-gate/src/lifecycle.rs b/approval-gate/src/lifecycle.rs deleted file mode 100644 index 1b7c684c..00000000 --- a/approval-gate/src/lifecycle.rs +++ /dev/null @@ -1,140 +0,0 @@ -//! Persisted-record lifecycle helpers. -//! -//! Pure functions that construct and transition the `Value`-blob record -//! schema as it lives in the iii state bus. No I/O, no async — the only -//! impurity is reading the system clock via [`transition_record`], whose -//! testable variant [`transition_record_with_now`] takes `now_ms` -//! directly. (Operators adopting the typed schema can read the same -//! shape via [`crate::record::Record`] / [`crate::record::Record::from_value`].) -//! -//! The wire keys (`status`, `function_call_id`, `expires_at`, -//! `resolved_at`, `result`, `error`, `denial`, `delivered_in_turn_id`) -//! are stable contract; renaming requires a state-store migration. The -//! `denial` field is documented at [`crate::wire::Denial`]. - -use serde_json::{json, Value}; - -use crate::wire::{pending_key, Denial}; - -/// True if `status` is one of the terminal states a stitched system message -/// should be built from. `pending` and `approved` are intermediate. -pub fn is_terminal_status(status: &str) -> bool { - matches!(status, "executed" | "failed" | "denied" | "timed_out") -} - -/// Build a fresh pending record. `session_id` is unset here — -/// `handle_intercept` stamps it before persisting. `expires_at` is -/// `now_ms + timeout_ms`, saturating on overflow so a buggy caller -/// can't underflow the deadline. -pub fn build_pending_record( - function_call_id: &str, - function_id: &str, - args: &Value, - now_ms: u64, - timeout_ms: u64, -) -> Value { - json!({ - "function_call_id": function_call_id, - "function_id": function_id, - "args": args, - "status": "pending", - "expires_at": now_ms.saturating_add(timeout_ms), - }) -} - -/// Build a new record by transitioning a pending base record to a terminal -/// status. All terminal fields (`result`, `error`, `denial`) are optional; -/// only the ones provided are attached. Existing fields on the base -/// (including `delivered_in_turn_id` and `resolved_at` if present) are -/// preserved. The first transition into a terminal status stamps -/// `resolved_at`. -pub fn transition_record( - base: &Value, - new_status: &str, - result: Option, - error: Option, - denial: Option, -) -> Value { - let now_ms = std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .map(|d| d.as_millis() as u64) - .unwrap_or(0); - transition_record_with_now(base, new_status, result, error, denial, now_ms) -} - -/// Testable variant of [`transition_record`] that takes `now_ms` directly. -pub fn transition_record_with_now( - base: &Value, - new_status: &str, - result: Option, - error: Option, - denial: Option, - now_ms: u64, -) -> Value { - let mut rec = base.clone(); - if let Some(obj) = rec.as_object_mut() { - obj.insert("status".into(), Value::String(new_status.to_string())); - if let Some(r) = result { - obj.insert("result".into(), r); - } - if let Some(e) = error { - obj.insert("error".into(), Value::String(e)); - } - if let Some(d) = denial { - obj.insert( - "denial".into(), - serde_json::to_value(&d).expect("Denial is always serializable"), - ); - } - if is_terminal_status(new_status) && !obj.contains_key("resolved_at") { - obj.insert("resolved_at".into(), Value::Number(now_ms.into())); - } - } - rec -} - -/// For a bag of pending records, return the subset that have expired at -/// `now_ms` along with the metadata needed to commit the flip and notify the -/// owning session. Records without a stamped `session_id` (legacy rows -/// written before that field existed) are skipped — they'll still be picked -/// up lazily by `handle_list_undelivered` on the next read. -pub fn collect_timed_out_for_sweep( - records: &[Value], - now_ms: u64, -) -> Vec<(String, Value, String, String)> { - records - .iter() - .filter_map(|rec| { - let flipped = maybe_flip_timed_out(rec, now_ms)?; - let session_id = flipped - .get("session_id") - .and_then(Value::as_str)? - .to_string(); - let function_call_id = flipped - .get("function_call_id") - .and_then(Value::as_str)? - .to_string(); - if session_id.is_empty() || function_call_id.is_empty() { - return None; - } - let key = pending_key(&session_id, &function_call_id); - Some((key, flipped, session_id, function_call_id)) - }) - .collect() -} - -/// Return Some(timed_out_record) if `rec` is pending and `now_ms` is past -/// `expires_at`; otherwise None. Pure function — does not write state. -pub fn maybe_flip_timed_out(rec: &Value, now_ms: u64) -> Option { - if rec.get("status").and_then(Value::as_str) != Some("pending") { - return None; - } - let exp = rec.get("expires_at").and_then(Value::as_u64)?; - if now_ms < exp { - return None; - } - // Timeout flip carries no Denial: the `timed_out` status itself is the - // explanation. Downstream renderers (turn-orchestrator stitching, UIs) - // branch on the status, not on a redundant reason string. - Some(transition_record(rec, "timed_out", None, None, None)) -} diff --git a/approval-gate/src/register.rs b/approval-gate/src/register.rs index 9adc88c8..1b0497ec 100644 --- a/approval-gate/src/register.rs +++ b/approval-gate/src/register.rs @@ -20,29 +20,19 @@ use iii_sdk::{ use serde_json::{json, Value}; use crate::config::{InterceptorRule, WorkerConfig}; -use crate::delivery::{ - handle_ack_delivered, handle_consume_undelivered, handle_flush_delivered, handle_list_pending, - handle_list_undelivered, handle_sweep_session, -}; +use crate::delivery::{handle_consume, handle_list_pending, handle_sweep_session}; use crate::intercept::handle_intercept; use crate::resolve::{handle_lookup_record, handle_resolve}; use crate::rules; use crate::state::{ - rule_for, unverified_marker_targets, FunctionExecutor, IiiFunctionExecutor, IiiStateBus, - StateBus, + unverified_marker_targets, FunctionExecutor, IiiFunctionExecutor, IiiStateBus, StateBus, }; -use crate::sweeper::{spawn_timeout_sweeper, write_event, write_hook_reply}; -use crate::wire::{extract_call, pending_key, Denial}; +use crate::wire::{extract_call, pending_key}; -/// The iii function ids registered by [`register`]. Operators must not -/// alias these on any classifier — the boot guard logs a warning when -/// a misconfiguration is detected, see [`register`]. +/// The iii function ids registered by [`register`]. pub const FN_RESOLVE: &str = "approval::resolve"; pub const FN_LIST_PENDING: &str = "approval::list_pending"; -pub const FN_LIST_UNDELIVERED: &str = "approval::list_undelivered"; -pub const FN_CONSUME_UNDELIVERED: &str = "approval::consume_undelivered"; -pub const FN_ACK_DELIVERED: &str = "approval::ack_delivered"; -pub const FN_FLUSH_DELIVERED: &str = "approval::flush_delivered"; +pub const FN_CONSUME: &str = "approval::consume"; pub const FN_SWEEP_SESSION: &str = "approval::sweep_session"; pub const FN_LOOKUP_RECORD: &str = "approval::lookup_record"; @@ -50,22 +40,16 @@ pub const FN_LOOKUP_RECORD: &str = "approval::lookup_record"; pub const STATE_SCOPE: &str = "approvals"; /// Handles returned from [`register`]; holding them keeps every iii -/// function registration and the background sweeper task alive. +/// function registration alive for the worker's lifetime. The 2-second +/// background sweeper task is gone — timeouts now flip lazily on read. pub struct Refs { pub resolve: FunctionRef, pub list_pending: FunctionRef, - pub list_undelivered: FunctionRef, - pub consume_undelivered: FunctionRef, - pub ack_delivered: FunctionRef, - pub flush_delivered: FunctionRef, + pub consume: FunctionRef, pub sweep_session: FunctionRef, pub lookup_record: FunctionRef, pub subscriber_fn: FunctionRef, pub subscriber_trigger: iii_sdk::Trigger, - /// Background task that flips expired pending records to `timed_out` and - /// emits the corresponding `approval_resolved` events. Kept alive by - /// virtue of being held here; aborts when the worker shuts down. - pub sweeper: tokio::task::JoinHandle<()>, } pub fn register(iii: &III, cfg: &WorkerConfig) -> anyhow::Result { @@ -94,8 +78,7 @@ pub fn register(iii: &III, cfg: &WorkerConfig) -> anyhow::Result { if cid == FN_LOOKUP_RECORD || cid == FN_RESOLVE || cid == FN_LIST_PENDING - || cid == FN_LIST_UNDELIVERED - || cid == FN_ACK_DELIVERED + || cid == FN_CONSUME || cid == FN_SWEEP_SESSION { tracing::warn!( @@ -207,36 +190,14 @@ pub fn register(iii: &III, cfg: &WorkerConfig) -> anyhow::Result { }, )); - let bus_for_list_undelivered = bus.clone(); - let scope_list_undelivered = state_scope.clone(); - let list_undelivered = iii.register_function(( - RegisterFunctionMessage::with_id(FN_LIST_UNDELIVERED.into()).with_description( - "Return resolved approval records for a session that haven't yet been stitched \ - into an LLM turn. Lazy-flips expired pendings to timed_out." - .into(), - ), - move |payload: Value| { - let bus = bus_for_list_undelivered.clone(); - let scope = scope_list_undelivered.clone(); - async move { - let now_ms = std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .map(|d| d.as_millis() as u64) - .unwrap_or(0); - Ok::<_, IIIError>( - handle_list_undelivered(bus.as_ref(), &scope, payload, now_ms).await, - ) - } - }, - )); - let bus_for_consume = bus.clone(); let scope_consume = state_scope.clone(); - let consume_undelivered = iii.register_function(( - RegisterFunctionMessage::with_id(FN_CONSUME_UNDELIVERED.into()).with_description( - "Atomic list+ack of resolved approval records. Returns the same FIFO-capped \ - slice as list_undelivered AND stamps each entry with delivered_in_turn_id \ - before returning. Required payload: {session_id, turn_id, limit?}." + let consume = iii.register_function(( + RegisterFunctionMessage::with_id(FN_CONSUME.into()).with_description( + "Atomic drain: returns Done rows for a session and deletes them in the \ + same call. Pending and InFlight rows stay in state. Pending rows past \ + expires_at are lazy-flipped to Done(TimedOut) before return. \ + Required payload: {session_id, limit?}. Response: {ok, entries, omitted}." .into(), ), move |payload: Value| { @@ -248,47 +209,12 @@ pub fn register(iii: &III, cfg: &WorkerConfig) -> anyhow::Result { .map(|d| d.as_millis() as u64) .unwrap_or(0); Ok::<_, IIIError>( - handle_consume_undelivered(bus.as_ref(), &scope, payload, now_ms).await, + handle_consume(bus.as_ref(), &scope, payload, now_ms).await, ) } }, )); - let bus_for_ack = bus.clone(); - let scope_ack = state_scope.clone(); - let ack_delivered = iii.register_function(( - RegisterFunctionMessage::with_id(FN_ACK_DELIVERED.into()).with_description( - "Stamp delivered_in_turn_id on resolved approvals so they aren't replayed \ - in subsequent turns. Idempotent." - .into(), - ), - move |payload: Value| { - let bus = bus_for_ack.clone(); - let scope = scope_ack.clone(); - async move { - Ok::<_, IIIError>(handle_ack_delivered(bus.as_ref(), &scope, payload).await) - } - }, - )); - - let bus_for_flush = bus.clone(); - let scope_flush = state_scope.clone(); - let flush_delivered = iii.register_function(( - RegisterFunctionMessage::with_id(FN_FLUSH_DELIVERED.into()).with_description( - "Stamp every unacked terminal approval record in a session as \ - delivered. One-shot operator recovery for backlog accumulation. \ - Required payload: {session_id, turn_id}." - .into(), - ), - move |payload: Value| { - let bus = bus_for_flush.clone(); - let scope = scope_flush.clone(); - async move { - Ok::<_, IIIError>(handle_flush_delivered(bus.as_ref(), &scope, payload).await) - } - }, - )); - let bus_for_sweep = bus.clone(); let scope_sweep = state_scope.clone(); let sweep_session = iii.register_function(( @@ -400,24 +326,72 @@ pub fn register(iii: &III, cfg: &WorkerConfig) -> anyhow::Result { }) .map_err(|e| anyhow::anyhow!(e.to_string()))?; - let sweeper = spawn_timeout_sweeper( - iii.clone(), - bus.clone(), - state_scope.clone(), - cfg.sweeper_interval_ms, - ); - Ok(Refs { resolve, list_pending, - list_undelivered, - consume_undelivered, - ack_delivered, - flush_delivered, + consume, sweep_session, lookup_record, subscriber_fn, subscriber_trigger, - sweeper, }) } + +// ───────────────────────────────────────────────────────────────────────── +// Inline stream helpers (used by the subscriber to write the +// `approval_requested` stream frame and the hook reply). These used to +// live in `sweeper.rs` but that file is gone now that the background +// polling task is deleted; the helpers move here as their only consumer. +// ───────────────────────────────────────────────────────────────────────── + +pub(crate) fn uuid_like() -> String { + use std::sync::atomic::{AtomicU64, Ordering}; + static C: AtomicU64 = AtomicU64::new(0); + let n = C.fetch_add(1, Ordering::Relaxed); + let t = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_nanos()) + .unwrap_or(0); + format!("{t:x}-{n:x}") +} + +/// Append `event` to the `agent::events` stream for `session_id`. Fire- +/// and-forget: errors are swallowed because the persisted record is the +/// source of truth — orchestrators re-derive state from +/// `approval::consume` if a frame is lost. +pub(crate) async fn write_event(iii: &III, session_id: &str, event: &Value) { + let _ = iii + .trigger(TriggerRequest { + function_id: "stream::set".into(), + payload: json!({ + "stream_name": "agent::events", + "group_id": session_id, + "item_id": format!("approval-{}", uuid_like()), + "data": event, + }), + action: None, + timeout_ms: None, + }) + .await; +} + +/// Append a hook reply onto `stream_name` keyed by `event_id`. No-op when +/// either id is empty so a malformed envelope can't crash the gate. +pub(crate) async fn write_hook_reply(iii: &III, stream_name: &str, event_id: &str, reply: &Value) { + if stream_name.is_empty() || event_id.is_empty() { + return; + } + let _ = iii + .trigger(TriggerRequest { + function_id: "stream::set".into(), + payload: json!({ + "stream_name": stream_name, + "group_id": event_id, + "item_id": uuid_like(), + "data": reply, + }), + action: None, + timeout_ms: None, + }) + .await; +} diff --git a/approval-gate/src/sweeper.rs b/approval-gate/src/sweeper.rs deleted file mode 100644 index 3cf8012c..00000000 --- a/approval-gate/src/sweeper.rs +++ /dev/null @@ -1,151 +0,0 @@ -//! Periodic timeout sweeper + stream-event helpers. -//! -//! The sweeper runs as a background task: every `interval_ms` it scans -//! the configured state scope, promotes any pending record past its -//! `expires_at` to `timed_out`, and emits the resulting -//! `approval_resolved` event on `agent::events/` so the -//! orchestrator sees the timeout without having to poll. -//! -//! [`write_event`] and [`write_hook_reply`] are the two iii stream -//! writes the gate makes; they live here because the sweeper is their -//! primary caller (the resolve flow also uses them, but their shape is -//! tied to the events-stream contract that the sweeper owns). - -use std::sync::Arc; - -use iii_sdk::{TriggerRequest, III}; -use serde_json::{json, Value}; - -use crate::lifecycle::collect_timed_out_for_sweep; -use crate::state::StateBus; - -/// Lightweight unique-ish id without pulling uuid in: ns timestamp + counter. -/// Used as the `item_id` for stream writes so two appends from the same -/// process don't collide. -pub(crate) fn uuid_like() -> String { - use std::sync::atomic::{AtomicU64, Ordering}; - static C: AtomicU64 = AtomicU64::new(0); - let n = C.fetch_add(1, Ordering::Relaxed); - let t = std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .map(|d| d.as_nanos()) - .unwrap_or(0); - format!("{t:x}-{n:x}") -} - -/// Append `event` to the `agent::events` stream for `session_id`. Used by -/// the sweeper (timeout flips) and by the resolve closure (post-resolve -/// `approval_resolved` frame). Fire-and-forget: errors are swallowed -/// because the persisted record is the source of truth — orchestrators -/// re-derive state from `approval::list_undelivered` if a frame is lost. -pub(crate) async fn write_event(iii: &III, session_id: &str, event: &Value) { - let _ = iii - .trigger(TriggerRequest { - function_id: "stream::set".into(), - payload: json!({ - "stream_name": "agent::events", - "group_id": session_id, - "item_id": format!("approval-{}", uuid_like()), - "data": event, - }), - action: None, - timeout_ms: None, - }) - .await; -} - -/// Build the `approval_resolved` event a sweeper emits when it auto-flips an -/// expired pending record. Pure — caller pumps the result onto the stream. -pub(crate) fn timeout_resolved_event(function_call_id: &str) -> Value { - // Timed-out approvals carry no Denial — the `status: "timed_out"` is - // self-describing per the Denial refactor. Consumers (turn-orchestrator - // stitching, UIs) render the timeout from the status alone. - json!({ - "type": "approval_resolved", - "function_call_id": function_call_id, - "tool_call_id": function_call_id, - "decision": "deny", - "status": "timed_out", - }) -} - -/// Spawn the periodic timeout sweeper. The task ticks every `interval_ms`, -/// scans the configured state scope, and for any pending record whose -/// `expires_at` is in the past: writes the flipped record back and emits an -/// `approval_resolved` (status=timed_out) frame on `agent::events/`. -/// -/// Active sweeping closes the gap left by lazy flips: operators who never -/// open the UI for a session would otherwise leave its pending rows in -/// `pending` forever and the paused orchestrator would never see a -/// decision. -pub(crate) fn spawn_timeout_sweeper( - iii: III, - bus: Arc, - state_scope: String, - interval_ms: u64, -) -> tokio::task::JoinHandle<()> { - tokio::spawn(async move { - let mut ticker = - tokio::time::interval(std::time::Duration::from_millis(interval_ms.max(50))); - ticker.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip); - // Drop the immediate first tick so we don't sweep before any - // pending row could possibly exist. - ticker.tick().await; - loop { - ticker.tick().await; - let now_ms = std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .map(|d| d.as_millis() as u64) - .unwrap_or(0); - let all = bus.list_prefix(&state_scope, "").await; - for (key, flipped, session_id, call_id) in collect_timed_out_for_sweep(&all, now_ms) { - if let Err(err) = bus.set(&state_scope, &key, flipped).await { - tracing::warn!( - "approval-gate sweeper: failed to flip {key} → timed_out: {err}" - ); - continue; - } - write_event(&iii, &session_id, &timeout_resolved_event(&call_id)).await; - } - } - }) -} - -/// Append a hook reply onto `stream_name` keyed by `event_id`. No-op when -/// either id is empty so a malformed envelope can't crash the gate. -pub(crate) async fn write_hook_reply(iii: &III, stream_name: &str, event_id: &str, reply: &Value) { - if stream_name.is_empty() || event_id.is_empty() { - return; - } - let _ = iii - .trigger(TriggerRequest { - function_id: "stream::set".into(), - payload: json!({ - "stream_name": stream_name, - "group_id": event_id, - "item_id": uuid_like(), - "data": reply, - }), - action: None, - timeout_ms: None, - }) - .await; -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn timeout_resolved_event_shape() { - let evt = timeout_resolved_event("tc-1"); - assert_eq!(evt["type"], "approval_resolved"); - assert_eq!(evt["function_call_id"], "tc-1"); - assert_eq!(evt["tool_call_id"], "tc-1"); - assert_eq!(evt["decision"], "deny"); - assert_eq!(evt["status"], "timed_out"); - // timed_out is self-describing — no Denial / no legacy reason. - assert!(evt.get("decision_reason").is_none()); - assert!(evt.get("denial").is_none()); - } -} From cdfe5ba6fb15fa6ec4ae67dc57f7b07805cb085e Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Sat, 16 May 2026 07:24:27 -0300 Subject: [PATCH 23/30] feat(approval-gate): strip __from_approval marker plumbing (T10) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit IiiFunctionExecutor::invoke forwards function_id+args directly to iii.trigger — no marker stamp. Deleted from state.rs: - merge_from_approval_marker_if_needed (and its 4 unit tests) - unverified_marker_targets (and the 2 rule_for tests) - rule_for (only marker-related callers) Deleted from register.rs: - IiiFunctionExecutor's rules: Arc> field - Boot-time unverified-marker check (refused-to-start guard) Per the spec's Threat Model section: bus access ≡ shell access in the new model. The harness's bus-level access control is the perimeter; defense-in-depth via per-target marker verification is out of scope for v1. Shell-side marker verification is deleted in T13. T12 will fully retire InterceptorRule + the classifier-alias warning loop in register.rs. --- approval-gate/src/lib.rs | 10 +-- approval-gate/src/register.rs | 27 ++---- approval-gate/src/state.rs | 164 +++------------------------------- 3 files changed, 21 insertions(+), 180 deletions(-) diff --git a/approval-gate/src/lib.rs b/approval-gate/src/lib.rs index edaafde9..80c0a65a 100644 --- a/approval-gate/src/lib.rs +++ b/approval-gate/src/lib.rs @@ -26,9 +26,7 @@ pub use register::{ STATE_SCOPE, }; pub use resolve::{handle_lookup_record, handle_resolve}; -pub use state::{ - unverified_marker_targets, FunctionExecutor, IiiFunctionExecutor, IiiStateBus, StateBus, -}; +pub use state::{FunctionExecutor, IiiFunctionExecutor, IiiStateBus, StateBus}; pub use wire::{ block_reply_for, extract_call, pending_key, Decision, Denial, IncomingCall, WireDecision, }; @@ -63,9 +61,3 @@ pub(crate) fn verdict_for( } } -// Test-only re-imports: the few helpers below this line stay private to -// the crate but the integration tests need them. Will shrink as state.rs -// strips the marker plumbing (T10) and the orchestrator-side helpers -// migrate to the new wire shape (T14). -#[cfg(test)] -use state::{merge_from_approval_marker_if_needed, rule_for}; diff --git a/approval-gate/src/register.rs b/approval-gate/src/register.rs index 1b0497ec..13d94811 100644 --- a/approval-gate/src/register.rs +++ b/approval-gate/src/register.rs @@ -24,9 +24,7 @@ use crate::delivery::{handle_consume, handle_list_pending, handle_sweep_session} use crate::intercept::handle_intercept; use crate::resolve::{handle_lookup_record, handle_resolve}; use crate::rules; -use crate::state::{ - unverified_marker_targets, FunctionExecutor, IiiFunctionExecutor, IiiStateBus, StateBus, -}; +use crate::state::{FunctionExecutor, IiiFunctionExecutor, IiiStateBus, StateBus}; use crate::wire::{extract_call, pending_key}; /// The iii function ids registered by [`register`]. @@ -60,19 +58,10 @@ pub fn register(iii: &III, cfg: &WorkerConfig) -> anyhow::Result { // cascade in `handle_resolve`). See [`crate::rules`]. let policy_rules: Arc> = Arc::new(RwLock::new(cfg.rules.clone())); - // Fail fast on honor-system markers: any interceptor that asks the gate - // to inject `__from_approval` MUST also assert the target validates it. - // Without that assertion the marker is purely decorative and the gate - // has no way to know whether bypass-through-direct-trigger is contained. - let unverified = unverified_marker_targets(rules.as_slice()); - if !unverified.is_empty() { - return Err(anyhow::anyhow!( - "approval-gate: refusing to start — interceptors with inject_approval_marker=true \ - must also set marker_target_verified=true (target is asserted to validate \ - __from_approval against approval::lookup_record). Unverified: {unverified:?}" - )); - } - + // T10: the boot-time marker-target-verified check is gone with the + // marker plumbing. InterceptorRule.classifier is also retired — the + // alias-warning loop below stays as a config-hygiene check until T12 + // strips InterceptorRule from config.rs entirely. for rule in rules.iter() { if let Some(cid) = rule.classifier.as_deref() { if cid == FN_LOOKUP_RECORD @@ -97,10 +86,8 @@ pub fn register(iii: &III, cfg: &WorkerConfig) -> anyhow::Result { let bus_for_resolve = bus.clone(); let scope_resolve = state_scope.clone(); - let exec_for_resolve: Arc = Arc::new(IiiFunctionExecutor { - iii: iii.clone(), - rules: rules.clone(), - }); + let exec_for_resolve: Arc = + Arc::new(IiiFunctionExecutor { iii: iii.clone() }); let iii_for_resolve = iii.clone(); let policy_rules_for_resolve = policy_rules.clone(); let resolve = iii.register_function(( diff --git a/approval-gate/src/state.rs b/approval-gate/src/state.rs index 0e21e381..9eaffadb 100644 --- a/approval-gate/src/state.rs +++ b/approval-gate/src/state.rs @@ -1,65 +1,18 @@ //! State-store and function-executor traits, plus their iii-backed -//! implementations and the `__from_approval` marker plumbing. +//! implementations. //! -//! The traits exist purely as test seams — unit tests swap in +//! The traits exist as test seams — unit tests swap in //! `InMemoryStateBus` / `FakeExecutor` while production code uses the -//! `Iii*` implementations that call iii directly. No new abstractions -//! beyond what's needed for that seam. - -use std::sync::Arc; +//! `Iii*` implementations that call iii directly. +//! +//! The `__from_approval` marker plumbing is gone (per the refactor's +//! threat-model decision: bus access ≡ shell access in the new model; +//! defense-in-depth via per-target marker verification is out of scope). use async_trait::async_trait; use iii_sdk::{IIIError, TriggerRequest, III}; use serde_json::{json, Value}; -use crate::config::InterceptorRule; - -/// Look up the [`InterceptorRule`] for `function_id`, if one is configured. -/// Pure helper; no I/O. Used by the gate's intercept flow and by the -/// production [`IiiFunctionExecutor`] to decide whether to inject the -/// `__from_approval` marker. -pub(crate) fn rule_for<'a>( - rules: &'a [InterceptorRule], - function_id: &str, -) -> Option<&'a InterceptorRule> { - rules.iter().find(|r| r.function_id == function_id) -} - -/// Stamp the `__from_approval` marker onto a function call's args when the -/// rule asks for it. The marker carries `{ call_id, session_id }` so the -/// target function can validate the call came through approval-gate (via -/// `approval::lookup_record`) instead of via direct trigger bypass. -/// -/// Idempotent on shape: object args get the marker merged in; null args -/// become `{ __from_approval: ... }`; any other shape (array, scalar) -/// gets wrapped as `{ payload, __from_approval: ... }` so it stays -/// recoverable on the target side. -pub(crate) fn merge_from_approval_marker_if_needed( - inject: bool, - args: Value, - function_call_id: &str, - session_id: &str, -) -> Value { - if !inject { - return args; - } - let marker = json!({ - "call_id": function_call_id, - "session_id": session_id, - }); - match args { - Value::Object(mut m) => { - m.insert("__from_approval".into(), marker); - Value::Object(m) - } - other if other.is_null() => json!({ "__from_approval": marker }), - other => json!({ - "payload": other, - "__from_approval": marker, - }), - } -} - /// Abstraction over the iii state bus — the kv layer where pending and /// resolved approval records live. Exists so unit tests can swap in a /// `BTreeMap`-backed fake; production uses [`IiiStateBus`]. @@ -88,9 +41,11 @@ pub trait FunctionExecutor: Send + Sync { } /// Production [`FunctionExecutor`] backed by `iii.trigger`. +/// +/// Forwards `function_id` + `args` directly to `iii.trigger`. No +/// `__from_approval` marker injection — the target trusts the bus. pub struct IiiFunctionExecutor { pub iii: III, - pub rules: Arc>, } #[async_trait] @@ -99,17 +54,13 @@ impl FunctionExecutor for IiiFunctionExecutor { &self, function_id: &str, args: Value, - function_call_id: &str, - session_id: &str, + _function_call_id: &str, + _session_id: &str, ) -> Result { - let inject = - rule_for(self.rules.as_slice(), function_id).is_some_and(|r| r.inject_approval_marker); - let payload = - merge_from_approval_marker_if_needed(inject, args, function_call_id, session_id); self.iii .trigger(TriggerRequest { function_id: function_id.to_string(), - payload, + payload: args, action: None, timeout_ms: None, }) @@ -181,92 +132,3 @@ impl StateBus for IiiStateBus { } } -/// Return the list of function ids whose interceptor asks the gate to -/// inject `__from_approval` without asserting that the target validates it. -/// Empty list ⇒ config is safe to register. Pure — exposed for tests and -/// for the boot-time check in `register`. -pub fn unverified_marker_targets(rules: &[InterceptorRule]) -> Vec<&str> { - rules - .iter() - .filter(|r| r.inject_approval_marker && !r.marker_target_verified) - .map(|r| r.function_id.as_str()) - .collect() -} - -#[cfg(test)] -mod tests { - use super::*; - use serde_json::json; - - #[test] - fn merge_from_approval_inserts_marker_when_inject_true() { - let m = merge_from_approval_marker_if_needed( - true, - json!({"command": "git"}), - "call-1", - "sess-1", - ); - let inner = m.get("__from_approval").unwrap(); - assert_eq!(inner["call_id"], "call-1"); - assert_eq!(inner["session_id"], "sess-1"); - assert_eq!(m["command"], "git"); - } - - #[test] - fn merge_from_approval_noop_when_inject_false() { - let j = json!({"a": 1}); - let out = merge_from_approval_marker_if_needed(false, j.clone(), "c", "s"); - assert_eq!(out, j); - } - - #[test] - fn merge_from_approval_wraps_null_args_in_marker_only() { - let m = merge_from_approval_marker_if_needed(true, Value::Null, "c1", "s1"); - let obj = m.as_object().unwrap(); - assert_eq!(obj.len(), 1); - assert!(obj.contains_key("__from_approval")); - } - - #[test] - fn merge_from_approval_wraps_scalar_args_in_payload() { - let out = merge_from_approval_marker_if_needed(true, json!("scalar"), "c1", "s1"); - assert_eq!(out["payload"], json!("scalar")); - assert_eq!(out["__from_approval"]["call_id"], "c1"); - assert_eq!(out["__from_approval"]["session_id"], "s1"); - } - - #[test] - fn rule_for_returns_matching_rule() { - let rules = vec![ - InterceptorRule { - function_id: "shell::exec".into(), - classifier: Some("shell::classify_argv".into()), - classifier_timeout_ms: 2000, - inject_approval_marker: true, - marker_target_verified: true, - }, - InterceptorRule { - function_id: "other::fn".into(), - classifier: None, - classifier_timeout_ms: 2000, - inject_approval_marker: false, - marker_target_verified: false, - }, - ]; - let r = rule_for(&rules, "shell::exec").expect("match"); - assert_eq!(r.classifier.as_deref(), Some("shell::classify_argv")); - assert!(r.inject_approval_marker); - } - - #[test] - fn rule_for_returns_none_when_absent() { - let rules = vec![InterceptorRule { - function_id: "x::y".into(), - classifier: None, - classifier_timeout_ms: 2000, - inject_approval_marker: false, - marker_target_verified: false, - }]; - assert!(rule_for(&rules, "missing::id").is_none()); - } -} From 114cdf03ba96d278fe3c4c8a36ad6cca8c53d9f5 Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Sat, 16 May 2026 07:27:15 -0300 Subject: [PATCH 24/30] feat(approval-gate): strip config to topic+scope+timeout+rules (T12) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit config.rs: WorkerConfig now has just {topic, approval_state_scope, default_timeout_ms, rules}. interceptors + sweeper_interval_ms fields gone. InterceptorRule struct kept as a no-op shim with minimal fields so the classifier-alias warning loop in register.rs still compiles — the loop is fed an empty Vec so the body never runs. iii.worker.yaml: replaced interceptor config with a curated default ruleset. Read-only fs/git auto-allowed; shell::exec/exec_bg ask; catch-all asks. Operators stack their own rules on top (last-match wins). register.rs cfg.interceptors → cfg.rules wiring confirmed: policy_rules is seeded from cfg.rules at startup and remains mutable via cascade. Note: InterceptorRule shim + classifier-alias warning loop in register will be deleted in a follow-up pass when the dependent code is fully cleaned up. They're cosmetic at this point — neither has functional effect. --- approval-gate/iii.worker.yaml | 47 +++++++++++--- approval-gate/src/config.rs | 117 ++++++++++------------------------ approval-gate/src/register.rs | 15 ++--- 3 files changed, 77 insertions(+), 102 deletions(-) diff --git a/approval-gate/iii.worker.yaml b/approval-gate/iii.worker.yaml index 48148c1f..c9983931 100644 --- a/approval-gate/iii.worker.yaml +++ b/approval-gate/iii.worker.yaml @@ -4,7 +4,12 @@ language: rust deploy: binary manifest: Cargo.toml bin: approval-gate -description: Hook subscriber on agent::before_function_call that pauses function calls listed in approval_required until the UI resolves them via approval::resolve. +description: | + Hook subscriber on agent::before_function_call. Decides every LLM-initiated + function call via a layered rules engine. Allow → pass through. Deny → + structured Denial::Policy. Ask → write a Pending record and wait for + approval::resolve. The classifier surface and __from_approval marker are gone; + policy lives entirely in the rules layer. runtime: kind: rust @@ -17,12 +22,34 @@ config: topic: agent::before_function_call approval_state_scope: approvals default_timeout_ms: 300000 - interceptors: - - function_id: shell::exec - classifier: shell::classify_argv - classifier_timeout_ms: 2000 - inject_approval_marker: true - - function_id: shell::exec_bg - classifier: shell::classify_argv - classifier_timeout_ms: 2000 - inject_approval_marker: true + + # Curated default ruleset. `before_function_call` fires for every tool + # call; with no rules and no-match defaulting to Ask, an empty ruleset + # would prompt for every read-only function. The defaults below + # auto-allow safe reads and ask for everything that writes/executes/ + # mutates. Operators stack their own rules on top — last-match wins. + rules: + # Read-only filesystem / introspection + - { permission: "fs::read", pattern: "*", action: allow } + - { permission: "fs::list", pattern: "*", action: allow } + - { permission: "fs::stat", pattern: "*", action: allow } + - { permission: "fs::glob", pattern: "*", action: allow } + - { permission: "fs::grep", pattern: "*", action: allow } + + # Read-only git + - { permission: "shell::exec", pattern: "git status*", action: allow } + - { permission: "shell::exec", pattern: "git log*", action: allow } + - { permission: "shell::exec", pattern: "git diff*", action: allow } + - { permission: "shell::exec", pattern: "git show*", action: allow } + - { permission: "shell::exec", pattern: "git branch*", action: allow } + - { permission: "shell::exec", pattern: "git remote*", action: allow } + + # Approval API — the gate must not gate itself + - { permission: "approval::*", pattern: "*", action: allow } + + # All remaining shell exec calls → ask + - { permission: "shell::exec", pattern: "*", action: ask } + - { permission: "shell::exec_bg", pattern: "*", action: ask } + + # Catch-all: anything else → ask. (Operator overrides go above.) + - { permission: "*", pattern: "*", action: ask } diff --git a/approval-gate/src/config.rs b/approval-gate/src/config.rs index 33f6b0c5..3c406f13 100644 --- a/approval-gate/src/config.rs +++ b/approval-gate/src/config.rs @@ -1,4 +1,14 @@ //! YAML-backed runtime settings for [`WorkerConfig`]. +//! +//! Post-refactor surface (T12): +//! - `topic` — hook bus topic the gate subscribes to. +//! - `approval_state_scope` — iii-state scope for approval records. +//! - `default_timeout_ms` — Pending-row TTL. +//! - `rules` — the layered ruleset (default + operator-shipped), +//! evaluated in order with last-match winning. +//! +//! Deleted in T12: `interceptors`, `sweeper_interval_ms`, +//! `InterceptorRule` (the classifier surface is gone). use anyhow::{Context, Result}; use serde::{Deserialize, Serialize}; @@ -15,33 +25,16 @@ fn default_default_timeout_ms() -> u64 { 300_000 } -fn default_classifier_timeout_ms() -> u64 { - 2000 -} - -fn default_sweeper_interval_ms() -> u64 { - 2000 -} - -/// Per-function iii intercept rule: optional classifier trigger before pending + -/// optional `__from_approval` injection on post-resolve `iii.trigger`. -/// -/// `marker_target_verified` is the operator's explicit assertion that the -/// `function_id` target validates `__from_approval` against -/// `approval::lookup_record` on every invocation. When `inject_approval_marker` -/// is true, [`crate::register`] refuses to start unless this flag is also -/// true — closing the honor-system gap. +/// Temporary alias retained while register.rs's classifier-alias warning +/// loop still references the symbol. The struct is structurally unused +/// (no fields populated from config) and will be deleted alongside the +/// warning loop when there are no more callers. Provided here so the +/// crate builds. #[derive(Debug, Deserialize, Serialize, Clone, PartialEq, Eq)] pub struct InterceptorRule { pub function_id: String, #[serde(default)] pub classifier: Option, - #[serde(default = "default_classifier_timeout_ms")] - pub classifier_timeout_ms: u64, - #[serde(default)] - pub inject_approval_marker: bool, - #[serde(default)] - pub marker_target_verified: bool, } #[derive(Debug, Deserialize, Serialize, Clone, PartialEq, Eq)] @@ -52,14 +45,9 @@ pub struct WorkerConfig { pub approval_state_scope: String, #[serde(default = "default_default_timeout_ms")] pub default_timeout_ms: u64, - #[serde(default = "default_sweeper_interval_ms")] - pub sweeper_interval_ms: u64, - #[serde(default)] - pub interceptors: Vec, - /// Layered permission rules consulted before per-function interceptors. - /// `Allow` short-circuits to pass-through; `Deny` short-circuits to a - /// policy [`crate::Denial`]; `Ask` (and no-match) falls through to the - /// existing [`InterceptorRule`] flow. See [`crate::rules`]. + /// Layered permission ruleset. Allow / Deny / Ask actions. Evaluated + /// last-match-wins; the YAML's curated defaults ship at the bottom, + /// operator overrides stack on top. See [`crate::rules`]. #[serde(default)] pub rules: crate::rules::Ruleset, } @@ -70,8 +58,6 @@ impl Default for WorkerConfig { topic: default_topic(), approval_state_scope: default_approval_state_scope(), default_timeout_ms: default_default_timeout_ms(), - sweeper_interval_ms: default_sweeper_interval_ms(), - interceptors: Vec::new(), rules: Vec::new(), } } @@ -91,6 +77,7 @@ pub fn load_config(path: &str) -> Result { #[cfg(test)] mod tests { use super::*; + use crate::rules::{Action, Rule}; #[test] fn defaults_from_empty_yaml_mapping() { @@ -98,63 +85,27 @@ mod tests { assert_eq!(cfg.topic, default_topic()); assert_eq!(cfg.approval_state_scope, "approvals"); assert_eq!(cfg.default_timeout_ms, 300_000); - assert_eq!(cfg.sweeper_interval_ms, 2000); - assert!(cfg.interceptors.is_empty()); - } - - #[test] - fn marker_target_verified_defaults_false() { - let yaml = r#" -interceptors: - - function_id: shell::exec - inject_approval_marker: true -"#; - let cfg: WorkerConfig = serde_yaml::from_str(yaml).unwrap(); - assert!(cfg.interceptors[0].inject_approval_marker); - assert!(!cfg.interceptors[0].marker_target_verified); - } - - #[test] - fn interceptors_default_empty() { - assert!(WorkerConfig::default().interceptors.is_empty()); - } - - #[test] - fn interceptors_parse_from_nested_config_block() { - let yaml = r#" -interceptors: - - function_id: shell::exec - classifier: shell::classify_argv - classifier_timeout_ms: 1500 - inject_approval_marker: true - - function_id: other::fn - classifier: null -"#; - let cfg: WorkerConfig = serde_yaml::from_str(yaml).unwrap(); - assert_eq!(cfg.interceptors.len(), 2); - assert_eq!(cfg.interceptors[0].function_id, "shell::exec"); - assert_eq!( - cfg.interceptors[0].classifier.as_deref(), - Some("shell::classify_argv") - ); - assert_eq!(cfg.interceptors[0].classifier_timeout_ms, 1500); - assert!(cfg.interceptors[0].inject_approval_marker); - assert_eq!(cfg.interceptors[1].function_id, "other::fn"); - assert!(cfg.interceptors[1].classifier.is_none()); - assert!(!cfg.interceptors[1].inject_approval_marker); + assert!(cfg.rules.is_empty()); } #[test] - fn interceptor_rule_marker_defaults_false() { + fn rules_parse_from_yaml() { let yaml = r#" -interceptors: - - function_id: x::y - classifier: c::f +rules: + - { permission: "shell::exec", pattern: "git status*", action: allow } + - { permission: "shell::exec", pattern: "*", action: ask } "#; let cfg: WorkerConfig = serde_yaml::from_str(yaml).unwrap(); - assert_eq!(cfg.interceptors.len(), 1); - assert!(!cfg.interceptors[0].inject_approval_marker); - assert_eq!(cfg.interceptors[0].classifier_timeout_ms, 2000); + assert_eq!(cfg.rules.len(), 2); + assert_eq!(cfg.rules[0].permission, "shell::exec"); + assert_eq!(cfg.rules[0].pattern, "git status*"); + assert_eq!(cfg.rules[0].action, Action::Allow); + assert_eq!(cfg.rules[1].action, Action::Ask); + let _ = Rule { // smoke check on the imported type + permission: "x".into(), + pattern: "*".into(), + action: Action::Deny, + }; } #[test] diff --git a/approval-gate/src/register.rs b/approval-gate/src/register.rs index 13d94811..a1487195 100644 --- a/approval-gate/src/register.rs +++ b/approval-gate/src/register.rs @@ -51,17 +51,14 @@ pub struct Refs { } pub fn register(iii: &III, cfg: &WorkerConfig) -> anyhow::Result { - let rules: Arc> = Arc::new(cfg.interceptors.clone()); - // Layered policy rules consulted before the per-function interceptor - // flow. Wrapped in RwLock so a user reply with `always: true` on - // `approval::resolve` can push a new Allow rule at runtime (see the - // cascade in `handle_resolve`). See [`crate::rules`]. + // Layered policy ruleset, wrapped in RwLock so cascade-on-`always:true` + // can push a runtime Allow rule (see resolve.rs::cascade_allow_for_session). let policy_rules: Arc> = Arc::new(RwLock::new(cfg.rules.clone())); - // T10: the boot-time marker-target-verified check is gone with the - // marker plumbing. InterceptorRule.classifier is also retired — the - // alias-warning loop below stays as a config-hygiene check until T12 - // strips InterceptorRule from config.rs entirely. + // No-op alias-warning loop kept as a no-op for backward source + // compatibility (no interceptors are configured anymore). Empty vec + // so the loop body never runs. + let rules: Arc> = Arc::new(Vec::new()); for rule in rules.iter() { if let Some(cid) = rule.classifier.as_deref() { if cid == FN_LOOKUP_RECORD From ad2ed8e6ea890d1e375744700fe95d5afb948a64 Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Sat, 16 May 2026 09:10:39 -0300 Subject: [PATCH 25/30] feat(shell): strip classify_argv / allowlist / __from_approval marker (T13) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Shell becomes a plain executor. All policy decisions live in approval-gate's rules layer (see the layered ruleset shipped in approval-gate/iii.worker.yaml). Deleted: - shell/src/functions/approval_bypass.rs (marker validation module) - shell/src/functions/classify.rs (classifier handler) - shell/src/arity.rs (arity-aware allowlist matcher) - ApprovalMarker + ClassifyArgvRequest types - ExecRequest.from_approval + ExecBgRequest.from_approval fields - ShellConfig.allowlist + allow_any + denylist_patterns + compiled_denylist fields and their methods (compile_denylist, denylist_hit_reason, allowlist_contains, is_command_allowed) - shell::classify_argv function registration - shell::classify_argv manifest entry - All allowlist/denylist unit tests in shell/src/config.rs - One test in shell/tests/function_handlers.rs that asserted shell rejected unlisted commands Kept (independent surface): - fs::host_root jail + fs.denylist_paths (filesystem path-based denials for shell::fs::* tools; distinct from the exec-policy denylist) - The fs jail validate_fs_jail boot check Threat model implication, per the spec: bus access ≡ shell access. The harness's bus-level access control is the perimeter; any worker on the bus can call shell::exec with arbitrary args. Defense-in-depth via shell-side allowlists is gone for v1. 133 shell lib tests + 328 shell integration tests pass. --- shell/iii.worker.yaml | 2 +- shell/src/arity.rs | 361 ------------------------- shell/src/config.rs | 228 +--------------- shell/src/exec/host.rs | 1 - shell/src/functions/approval_bypass.rs | 138 ---------- shell/src/functions/classify.rs | 183 ------------- shell/src/functions/exec.rs | 61 +---- shell/src/functions/exec_bg.rs | 60 +--- shell/src/functions/mod.rs | 2 - shell/src/functions/types.rs | 44 +-- shell/src/lib.rs | 1 - shell/src/main.rs | 54 ++-- shell/src/manifest.rs | 6 +- shell/tests/function_handlers.rs | 27 +- 14 files changed, 56 insertions(+), 1112 deletions(-) delete mode 100644 shell/src/arity.rs delete mode 100644 shell/src/functions/approval_bypass.rs delete mode 100644 shell/src/functions/classify.rs diff --git a/shell/iii.worker.yaml b/shell/iii.worker.yaml index e4066dbc..ad5329ec 100644 --- a/shell/iii.worker.yaml +++ b/shell/iii.worker.yaml @@ -4,7 +4,7 @@ language: rust deploy: binary manifest: Cargo.toml bin: shell -description: Unix shell + filesystem worker — exec with allowlist/denylist/timeout/output caps and background jobs; fs::ls|stat|mkdir|rm|chmod|mv|grep|sed|read|write with host jail, denylist, size caps, and sandbox-target forwarding +description: Unix shell + filesystem worker — plain executor (policy lives in approval-gate's rules layer). exec with timeout/output caps + background jobs; fs::ls|stat|mkdir|rm|chmod|mv|grep|sed|read|write with host jail, path denylist, size caps, and sandbox-target forwarding # POSIX-only: src/fs/host.rs uses std::os::unix::fs::PermissionsExt / chown / # OpenOptionsExt directly, so the Windows triples cannot compile. diff --git a/shell/src/arity.rs b/shell/src/arity.rs deleted file mode 100644 index 8dab3c9f..00000000 --- a/shell/src/arity.rs +++ /dev/null @@ -1,361 +0,0 @@ -//! Bash-command arity dictionary, ported from opencode's -//! `packages/opencode/src/permission/arity.ts`. -//! -//! Maps a command prefix string (e.g. `"git"`, `"npm run"`, `"docker compose"`) -//! to the number of tokens that constitute its human-meaningful identity. -//! Used by [`crate::config::ShellConfig::allowlist_contains`] so operators -//! can write `"git checkout"` or `"npm run dev"` as multi-token allowlist -//! entries and have them match the right slice of an incoming argv. -//! -//! Rules (from the upstream prompt): -//! 1. Each entry is a command-prefix string → token count. -//! 2. Flags never count as tokens; only subcommands do. -//! 3. Longest matching prefix wins. -//! 4. Only include a longer prefix when its arity differs from what the -//! shorter prefix already implies (e.g. `git` arity 2 implies -//! `git checkout` arity 2, so `git checkout` is omitted; `git config` -//! arity 3 IS included because it differs). - -use std::collections::HashMap; -use std::sync::OnceLock; - -fn arity_table() -> &'static HashMap<&'static str, usize> { - static TABLE: OnceLock> = OnceLock::new(); - TABLE.get_or_init(|| { - let entries: &[(&str, usize)] = &[ - ("cat", 1), - ("cd", 1), - ("chmod", 1), - ("chown", 1), - ("cp", 1), - ("echo", 1), - ("env", 1), - ("export", 1), - ("grep", 1), - ("kill", 1), - ("killall", 1), - ("ln", 1), - ("ls", 1), - ("mkdir", 1), - ("mv", 1), - ("ps", 1), - ("pwd", 1), - ("rm", 1), - ("rmdir", 1), - ("sleep", 1), - ("source", 1), - ("tail", 1), - ("touch", 1), - ("unset", 1), - ("which", 1), - ("aws", 3), - ("az", 3), - ("bazel", 2), - ("brew", 2), - ("bun", 2), - ("bun run", 3), - ("bun x", 3), - ("cargo", 2), - ("cargo add", 3), - ("cargo run", 3), - ("cdk", 2), - ("cf", 2), - ("cmake", 2), - ("composer", 2), - ("consul", 2), - ("consul kv", 3), - ("crictl", 2), - ("deno", 2), - ("deno task", 3), - ("doctl", 3), - ("docker", 2), - ("docker builder", 3), - ("docker compose", 3), - ("docker container", 3), - ("docker image", 3), - ("docker network", 3), - ("docker volume", 3), - ("eksctl", 2), - ("eksctl create", 3), - ("firebase", 2), - ("flyctl", 2), - ("gcloud", 3), - ("gh", 3), - ("git", 2), - ("git config", 3), - ("git remote", 3), - ("git stash", 3), - ("go", 2), - ("gradle", 2), - ("helm", 2), - ("heroku", 2), - ("hugo", 2), - ("ip", 2), - ("ip addr", 3), - ("ip link", 3), - ("ip netns", 3), - ("ip route", 3), - ("kind", 2), - ("kind create", 3), - ("kubectl", 2), - ("kubectl kustomize", 3), - ("kubectl rollout", 3), - ("kustomize", 2), - ("make", 2), - ("mc", 2), - ("mc admin", 3), - ("minikube", 2), - ("mongosh", 2), - ("mysql", 2), - ("mvn", 2), - ("ng", 2), - ("npm", 2), - ("npm exec", 3), - ("npm init", 3), - ("npm run", 3), - ("npm view", 3), - ("nvm", 2), - ("nx", 2), - ("openssl", 2), - ("openssl req", 3), - ("openssl x509", 3), - ("pip", 2), - ("pipenv", 2), - ("pnpm", 2), - ("pnpm dlx", 3), - ("pnpm exec", 3), - ("pnpm run", 3), - ("poetry", 2), - ("podman", 2), - ("podman container", 3), - ("podman image", 3), - ("psql", 2), - ("pulumi", 2), - ("pulumi stack", 3), - ("pyenv", 2), - ("python", 2), - ("rake", 2), - ("rbenv", 2), - ("redis-cli", 2), - ("rustup", 2), - ("serverless", 2), - ("sfdx", 3), - ("skaffold", 2), - ("sls", 2), - ("sst", 2), - ("swift", 2), - ("systemctl", 2), - ("terraform", 2), - ("terraform workspace", 3), - ("tmux", 2), - ("turbo", 2), - ("ufw", 2), - ("vault", 2), - ("vault auth", 3), - ("vault kv", 3), - ("vercel", 2), - ("volta", 2), - ("wp", 2), - ("yarn", 2), - ("yarn dlx", 3), - ("yarn run", 3), - ]; - entries.iter().copied().collect() - }) -} - -/// Return the human-meaningful command-identity prefix of `argv`. -/// -/// Walks lengths from longest to shortest looking for the joined prefix in -/// the [`arity_table`]. On a hit, returns `argv[..arity]` (clamped to the -/// argv length so we never panic on a too-short input that happens to match -/// a longer-arity prefix). On a miss, returns `argv[..1]` — the single -/// program-name token — or an empty slice if argv was empty. -/// -/// Matches the semantics of opencode's `prefix()` exactly so the rule -/// surface is portable between the two implementations. -pub fn prefix(argv: &[String]) -> Vec { - let table = arity_table(); - for len in (1..=argv.len()).rev() { - let candidate = argv[..len].join(" "); - if let Some(&arity) = table.get(candidate.as_str()) { - let take = arity.min(argv.len()); - return argv[..take].to_vec(); - } - } - if argv.is_empty() { - Vec::new() - } else { - argv[..1].to_vec() - } -} - -/// Normalize argv[0] from a full path (e.g. `/usr/bin/ls`) to its basename -/// before arity matching. Preserves the rest of argv untouched. Used so -/// allowlist matching is path-agnostic (existing behavior of -/// [`crate::config::ShellConfig::allowlist_contains`]). -pub fn normalize_argv_head(argv: &[String]) -> Vec { - let mut out: Vec = argv.to_vec(); - if let Some(first) = out.first_mut() { - if let Some(base) = std::path::Path::new(first.as_str()) - .file_name() - .and_then(|s| s.to_str()) - { - *first = base.to_string(); - } - } - out -} - -/// True iff the arity-aware prefix of `argv` matches `entry`. An entry can -/// be a single token (`"ls"`) or a multi-token prefix (`"git checkout"`, -/// `"npm run dev"`); the match is token-aligned (so `"git"` does not match -/// argv beginning with `git-lfs`). -pub fn prefix_matches(argv: &[String], entry: &str) -> bool { - let normalized = normalize_argv_head(argv); - let pfx = prefix(&normalized); - if pfx.is_empty() { - return false; - } - let joined = pfx.join(" "); - if joined == entry { - return true; - } - // Token-aligned prefix match: the entry covers a leading subset of the - // prefix tokens. Compare token-by-token to avoid false positives on - // substrings (e.g. entry "git" should match prefix "git checkout" but - // not "git-lfs push" — already filtered by the basename step above). - let entry_tokens: Vec<&str> = entry.split_whitespace().collect(); - if entry_tokens.is_empty() || entry_tokens.len() > pfx.len() { - return false; - } - entry_tokens - .iter() - .zip(pfx.iter()) - .all(|(e, p)| *e == p.as_str()) -} - -#[cfg(test)] -mod tests { - use super::*; - - fn s(items: &[&str]) -> Vec { - items.iter().map(|s| s.to_string()).collect() - } - - #[test] - fn empty_argv_returns_empty_prefix() { - assert!(prefix(&[]).is_empty()); - } - - #[test] - fn unknown_command_returns_first_token() { - assert_eq!(prefix(&s(&["foobar", "--flag"])), s(&["foobar"])); - } - - #[test] - fn single_token_arity_one_command() { - assert_eq!(prefix(&s(&["ls", "-la"])), s(&["ls"])); - assert_eq!(prefix(&s(&["touch", "x.txt"])), s(&["touch"])); - } - - #[test] - fn git_subcommand_picked_up_via_arity_two() { - assert_eq!( - prefix(&s(&["git", "checkout", "main"])), - s(&["git", "checkout"]) - ); - assert_eq!(prefix(&s(&["git", "commit", "-am", "x"])), s(&["git", "commit"])); - } - - #[test] - fn longer_arity_wins_when_present() { - // "npm run" is arity 3 even though "npm" is arity 2. - assert_eq!( - prefix(&s(&["npm", "run", "dev", "--watch"])), - s(&["npm", "run", "dev"]) - ); - } - - #[test] - fn arity_clamps_when_argv_too_short() { - // "git" wants arity 2 but argv only has 1 token. Don't panic; return - // what we have. - assert_eq!(prefix(&s(&["git"])), s(&["git"])); - } - - #[test] - fn docker_compose_multitoken_prefix() { - assert_eq!( - prefix(&s(&["docker", "compose", "up", "-d"])), - s(&["docker", "compose", "up"]) - ); - } - - #[test] - fn unknown_command_with_no_args() { - assert_eq!(prefix(&s(&["netstat"])), s(&["netstat"])); - } - - #[test] - fn normalize_strips_path_from_head() { - let argv = s(&["/usr/bin/ls", "-la"]); - assert_eq!(normalize_argv_head(&argv), s(&["ls", "-la"])); - } - - #[test] - fn normalize_leaves_bare_command_alone() { - let argv = s(&["ls", "-la"]); - assert_eq!(normalize_argv_head(&argv), s(&["ls", "-la"])); - } - - #[test] - fn prefix_matches_single_token_entry() { - assert!(prefix_matches(&s(&["ls", "-la"]), "ls")); - assert!(!prefix_matches(&s(&["lsattr"]), "ls")); - } - - #[test] - fn prefix_matches_multi_token_entry() { - assert!(prefix_matches( - &s(&["git", "checkout", "main"]), - "git checkout" - )); - assert!(!prefix_matches( - &s(&["git", "rebase", "-i"]), - "git checkout" - )); - } - - #[test] - fn prefix_matches_shorter_entry_against_longer_prefix() { - // entry "git" should match argv that resolves to ["git", "checkout"] - assert!(prefix_matches(&s(&["git", "checkout", "main"]), "git")); - } - - #[test] - fn prefix_matches_handles_full_path_argv_head() { - assert!(prefix_matches(&s(&["/usr/bin/ls", "-la"]), "ls")); - assert!(prefix_matches( - &s(&["/opt/homebrew/bin/git", "checkout", "main"]), - "git checkout" - )); - } - - #[test] - fn prefix_does_not_match_token_boundary_collision() { - // basename normalization makes "git-lfs" survive as its own token, - // so entry "git" cannot match argv ["git-lfs", "push"]. - assert!(!prefix_matches(&s(&["git-lfs", "push"]), "git")); - } - - #[test] - fn prefix_matches_returns_false_for_empty_argv() { - assert!(!prefix_matches(&[], "ls")); - } - - #[test] - fn prefix_matches_returns_false_for_empty_entry() { - assert!(!prefix_matches(&s(&["ls"]), "")); - } -} diff --git a/shell/src/config.rs b/shell/src/config.rs index 228bad3a..a6e0da31 100644 --- a/shell/src/config.rs +++ b/shell/src/config.rs @@ -1,9 +1,11 @@ use anyhow::{Context, Result}; -use regex::Regex; use serde::{Deserialize, Serialize}; use std::fs; use std::path::PathBuf; +/// Shell worker config. Post-T13 this is just execution-runtime tunables — +/// allowlist / denylist / allow_any / compiled regex live in the +/// approval-gate's rules layer, not here. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ShellConfig { #[serde(default = "default_max_timeout_ms")] @@ -24,20 +26,6 @@ pub struct ShellConfig { #[serde(default = "default_allowed_env")] pub allowed_env: Vec, - #[serde(default)] - pub allowlist: Vec, - - /// Bypass the allowlist-miss-prompts-user behavior. When `true`, every - /// non-denylisted command is auto-approved on the classifier path. - /// Denylist still wins. Default `false` (fail-closed). - /// - /// Spec: docs/superpowers/specs/2026-05-15-shell-allowlist-approval-design.md § 6.5 - #[serde(default)] - pub allow_any: bool, - - #[serde(default)] - pub denylist_patterns: Vec, - #[serde(default = "default_max_concurrent_jobs")] pub max_concurrent_jobs: usize, @@ -49,9 +37,6 @@ pub struct ShellConfig { #[serde(default)] pub sandbox: SandboxConfig, - - #[serde(default, skip)] - pub compiled_denylist: Vec, } fn default_max_timeout_ms() -> u64 { @@ -141,101 +126,38 @@ impl Default for ShellConfig { working_dir: None, inherit_env: false, allowed_env: default_allowed_env(), - allowlist: Vec::new(), - allow_any: false, - denylist_patterns: Vec::new(), max_concurrent_jobs: default_max_concurrent_jobs(), job_retention_secs: default_job_retention_secs(), fs: FsConfig::default(), sandbox: SandboxConfig::default(), - compiled_denylist: Vec::new(), } } } pub fn load_config(path: &str) -> Result { let content = fs::read_to_string(path).with_context(|| format!("read {}", path))?; - let mut cfg: ShellConfig = + let cfg: ShellConfig = serde_yaml::from_str(&content).with_context(|| format!("parse {}", path))?; - cfg.compile_denylist()?; cfg.validate_fs_jail()?; Ok(cfg) } impl ShellConfig { - pub fn compile_denylist(&mut self) -> Result<()> { - self.compiled_denylist = self - .denylist_patterns - .iter() - .map(|p| Regex::new(p).with_context(|| format!("bad denylist pattern: {}", p))) - .collect::>>()?; - Ok(()) - } - /// Refuse to start with the host backend exposing the entire filesystem - /// behind only the (advisory) denylist — the operator must either pin a - /// host_root jail or explicitly opt in via `fs.allow_unjailed: true`. + /// unjailed — the operator must either pin a host_root jail or + /// explicitly opt in via `fs.allow_unjailed: true`. pub fn validate_fs_jail(&self) -> Result<()> { if self.fs.host_root.is_none() && !self.fs.allow_unjailed { anyhow::bail!( "fs.host_root is unset and fs.allow_unjailed is false — refusing to start \ unjailed. Set fs.host_root to a directory you intend to expose, or set \ fs.allow_unjailed: true to accept that the entire host filesystem is \ - reachable through shell::fs::* (subject only to the advisory denylist)." + reachable through shell::fs::*." ); } Ok(()) } - /// Returns `Some(reason)` if joined argv matches any compiled denylist regex. - /// Pure predicate; no allowlist consultation. - pub fn denylist_hit_reason(&self, argv: &[String]) -> Option { - let joined = argv.join(" "); - for re in &self.compiled_denylist { - if re.is_match(&joined) { - return Some(format!("command matches denylist: {}", re.as_str())); - } - } - None - } - - /// Returns `true` if the arity-aware prefix of `argv` matches any - /// entry in `allowlist`. Entries can be single tokens (`"ls"`) or - /// multi-token prefixes (`"git checkout"`, `"npm run dev"`); the - /// match is token-aligned via [`crate::arity::prefix_matches`] - /// so `"git"` matches argv beginning with `git ` but - /// not `git-lfs`. Full-path argv heads (e.g. `/usr/bin/ls`) are - /// normalized to their basename before matching, preserving the - /// pre-arity path-agnostic behavior. Empty allowlist returns - /// `false` (caller decides what to do with that). - pub fn allowlist_contains(&self, argv: &[String]) -> bool { - if argv.is_empty() || self.allowlist.is_empty() { - return false; - } - self.allowlist - .iter() - .any(|entry| crate::arity::prefix_matches(argv, entry)) - } - - /// Today's combined check, preserved unchanged on the wire for direct - /// (non-agent) callers. Empty allowlist = open. Denylist always wins. - /// Agent calls bypass this via the approval-gate classifier path - /// (see docs/superpowers/specs/2026-05-15-shell-allowlist-approval-design.md § 6.5). - pub fn is_command_allowed(&self, argv: &[String]) -> Result<(), String> { - let cmd = argv.first().ok_or_else(|| "empty command".to_string())?; - if let Some(reason) = self.denylist_hit_reason(argv) { - return Err(reason); - } - if !self.allowlist.is_empty() && !self.allowlist_contains(argv) { - let base = std::path::Path::new(cmd) - .file_name() - .and_then(|s| s.to_str()) - .unwrap_or(cmd); - return Err(format!("command '{}' not in allowlist", base)); - } - Ok(()) - } - pub fn resolve_timeout(&self, requested: Option) -> u64 { let t = requested.unwrap_or(self.default_timeout_ms); t.min(self.max_timeout_ms) @@ -246,16 +168,6 @@ impl ShellConfig { mod tests { use super::*; - fn cfg_with(allow: Vec<&str>, deny: Vec<&str>) -> ShellConfig { - let mut c = ShellConfig { - allowlist: allow.into_iter().map(String::from).collect(), - denylist_patterns: deny.into_iter().map(String::from).collect(), - ..Default::default() - }; - c.compile_denylist().unwrap(); - c - } - #[test] fn test_defaults() { let c = ShellConfig::default(); @@ -265,122 +177,6 @@ mod tests { assert_eq!(c.max_concurrent_jobs, 16); } - #[test] - fn test_allowlist_permits() { - let c = cfg_with(vec!["ls", "cat"], vec![]); - assert!(c.is_command_allowed(&["ls".into(), "-la".into()]).is_ok()); - } - - #[test] - fn test_allowlist_rejects() { - let c = cfg_with(vec!["ls"], vec![]); - let err = c - .is_command_allowed(&["nmap".into()]) - .expect_err("must reject"); - assert!(err.contains("not in allowlist")); - } - - #[test] - fn test_allowlist_empty_means_open() { - let c = cfg_with(vec![], vec![]); - assert!(c.is_command_allowed(&["anything".into()]).is_ok()); - } - - #[test] - fn test_allowlist_basename_match() { - let c = cfg_with(vec!["ls"], vec![]); - assert!(c - .is_command_allowed(&["/usr/bin/ls".into(), "-la".into()]) - .is_ok()); - } - - #[test] - fn allowlist_arity_single_token_entry_matches_subcommand_argv() { - // Allowlisting just "git" should auto-approve `git checkout main` - // because the arity dictionary resolves `git` at arity 2. - let c = cfg_with(vec!["git"], vec![]); - assert!(c - .is_command_allowed(&["git".into(), "checkout".into(), "main".into()]) - .is_ok()); - } - - #[test] - fn allowlist_arity_multi_token_entry_matches() { - // Allowlisting `git checkout` should match exactly that subcommand - // and not other git subcommands. - let c = cfg_with(vec!["git checkout"], vec![]); - assert!(c - .is_command_allowed(&["git".into(), "checkout".into(), "main".into()]) - .is_ok()); - let err = c - .is_command_allowed(&["git".into(), "push".into()]) - .expect_err("git push must be rejected when only git checkout is allowed"); - assert!(err.contains("allowlist")); - } - - #[test] - fn allowlist_arity_npm_run_dev_three_token_entry() { - let c = cfg_with(vec!["npm run dev"], vec![]); - assert!(c - .is_command_allowed(&["npm".into(), "run".into(), "dev".into(), "--watch".into()]) - .is_ok()); - let err = c - .is_command_allowed(&["npm".into(), "run".into(), "build".into()]) - .expect_err("npm run build must be rejected when only npm run dev is allowed"); - assert!(err.contains("allowlist")); - } - - #[test] - fn allowlist_arity_does_not_collide_on_hyphenated_token() { - // Allowlisting `git` must not auto-approve `git-lfs push` — the - // basename token boundary protects against substring confusion. - let c = cfg_with(vec!["git"], vec![]); - let err = c - .is_command_allowed(&["git-lfs".into(), "push".into()]) - .expect_err("git-lfs must not match an allowlist entry of 'git'"); - assert!(err.contains("allowlist")); - } - - #[test] - fn test_denylist_blocks() { - let c = cfg_with(vec![], vec![r"rm\s+-rf\s+/"]); - let err = c - .is_command_allowed(&["rm".into(), "-rf".into(), "/".into()]) - .expect_err("must reject"); - assert!(err.contains("denylist")); - } - - #[test] - fn test_empty_argv_rejected() { - let c = ShellConfig::default(); - assert!(c.is_command_allowed(&[]).is_err()); - } - - /// Loads the shipped `config.yaml` and asserts the default allowlist - /// preserves read-only env inspection (`printenv`) while rejecting the - /// `env ` exec-escape. `env` was removed from the default allowlist - /// because `is_command_allowed` only checks argv[0]; with `env` - /// allowlisted, `env nmap target` would have argv[0]=="env" and pass. - /// Loads the shipped `config.yaml` and asserts the default allowlist - /// preserves read-only env inspection (`printenv`) while rejecting the - /// `env ` exec-escape. `env` was removed from the default allowlist - /// because `is_command_allowed` only checks argv[0]; with `env` - /// allowlisted, `env nmap target` would have argv[0]=="env" and pass. - /// Parses the YAML directly (skipping `load_config`'s fs-jail check, - /// which is unrelated to the allowlist policy under test). - #[test] - fn shipped_config_blocks_env_exec_escape() { - let path = concat!(env!("CARGO_MANIFEST_DIR"), "/config.yaml"); - let content = fs::read_to_string(path).expect("read config.yaml"); - let mut c: ShellConfig = serde_yaml::from_str(&content).expect("config.yaml parses"); - c.compile_denylist().expect("denylist compiles"); - assert!(c.is_command_allowed(&["printenv".into()]).is_ok()); - let err = c - .is_command_allowed(&["env".into(), "nmap".into(), "host".into()]) - .expect_err("env must be rejected"); - assert!(err.contains("not in allowlist")); - } - #[test] fn test_resolve_timeout_caps_at_max() { let c = ShellConfig::default(); @@ -401,7 +197,6 @@ mod tests { #[test] fn yaml_with_fs_section_parses() { let yaml = r#" -allowlist: [] fs: host_root: /tmp/shell max_read_bytes: 1024 @@ -420,15 +215,6 @@ sandbox: assert_eq!(c.fs.denylist_paths.len(), 1); } - #[test] - fn missing_fs_section_uses_defaults() { - let yaml = "allowlist: []\n"; - let c: ShellConfig = serde_yaml::from_str(yaml).unwrap(); - assert_eq!(c.fs.max_read_bytes, 0); - assert_eq!(c.fs.max_write_bytes, 0); - assert!(c.sandbox.enabled); - } - #[test] fn validate_fs_jail_rejects_default_unjailed_config() { let c = ShellConfig::default(); diff --git a/shell/src/exec/host.rs b/shell/src/exec/host.rs index 588cc5b3..9f55451a 100644 --- a/shell/src/exec/host.rs +++ b/shell/src/exec/host.rs @@ -159,7 +159,6 @@ mod tests { max_output_bytes: 4096, ..Default::default() }; - c.compile_denylist().unwrap(); c } diff --git a/shell/src/functions/approval_bypass.rs b/shell/src/functions/approval_bypass.rs deleted file mode 100644 index 9c731d72..00000000 --- a/shell/src/functions/approval_bypass.rs +++ /dev/null @@ -1,138 +0,0 @@ -//! Validation for `__from_approval` on `shell::exec` / `shell::exec_bg`. - -use serde_json::Value; - -use crate::functions::types::ApprovalMarker; - -pub(crate) fn marker_wellformed(marker: &ApprovalMarker) -> Result<(), String> { - if marker.call_id.trim().is_empty() || marker.session_id.trim().is_empty() { - return Err("__from_approval marker malformed".into()); - } - Ok(()) -} - -/// Normalize `record.args` object (`command` + optional `args` tail) for argv binding. -fn normalized_command_args(stored_args: &Value) -> Result<(String, Option>), String> { - let obj = stored_args - .as_object() - .ok_or_else(|| "__from_approval approved record has invalid args shape".to_string())?; - let cmd = obj - .get("command") - .and_then(Value::as_str) - .ok_or_else(|| "__from_approval approved record args missing command".to_string())? - .to_string(); - let tail = match obj.get("args") { - None | Some(Value::Null) => None, - Some(Value::Array(arr)) => { - let mut v = Vec::with_capacity(arr.len()); - for x in arr { - let s = x - .as_str() - .ok_or_else(|| "__from_approval record args.args must be strings".to_string())? - .to_string(); - v.push(s); - } - Some(v) - } - Some(_) => { - return Err("__from_approval record args.args must be array or null".into()); - } - }; - Ok((cmd, tail)) -} - -pub(crate) fn validate_approved_record_for_bypass( - record: &Value, - handler_function_id: &str, - command: &str, - args: &Option>, -) -> Result<(), String> { - let fid = record - .get("function_id") - .and_then(Value::as_str) - .unwrap_or(""); - if fid != handler_function_id { - return Err("__from_approval marker bound to different function_id".into()); - } - let status = record.get("status").and_then(Value::as_str).unwrap_or(""); - if status != "approved" { - return Err(format!( - "__from_approval marker for non-approved record (status: {status})" - )); - } - let stored_root = record.get("args").ok_or_else(|| { - "__from_approval marker without valid pending approval record".to_string() - })?; - let (stored_cmd, stored_tail) = normalized_command_args(stored_root)?; - if stored_cmd != command { - return Err("__from_approval marker argv mismatch with approved call".into()); - } - match (&stored_tail, args) { - (None, None) => Ok(()), - (Some(a), Some(b)) if a == b => Ok(()), - _ => Err("__from_approval marker argv mismatch with approved call".into()), - } -} - -#[cfg(test)] -mod tests { - use super::*; - use serde_json::json; - - #[test] - fn rejects_mismatched_command() { - let rec = json!({ - "function_id": "shell::exec", - "status": "approved", - "args": {"command": "netstat", "args": ["-an"]} - }); - let err = validate_approved_record_for_bypass( - &rec, - "shell::exec", - "cat", - &Some(vec!["/etc/passwd".into()]), - ) - .expect_err("argv mismatch"); - assert!(err.contains("mismatch")); - } - - #[test] - fn rejects_wrong_function_id() { - let rec = json!({ - "function_id": "shell::exec", - "status": "approved", - "args": {"command": "echo"} - }); - let err = validate_approved_record_for_bypass(&rec, "shell::exec_bg", "echo", &None) - .expect_err("fid mismatch"); - assert!(err.contains("different function_id")); - } - - #[test] - fn rejects_non_approved_status() { - let rec = json!({ - "function_id": "shell::exec", - "status": "executed", - "args": {"command": "echo"} - }); - let err = validate_approved_record_for_bypass(&rec, "shell::exec", "echo", &None) - .expect_err("status"); - assert!(err.contains("non-approved")); - } - - #[test] - fn accepts_matching_payload() { - let rec = json!({ - "function_id": "shell::exec", - "status": "approved", - "args": {"command": "echo", "args": ["hi"]} - }); - validate_approved_record_for_bypass( - &rec, - "shell::exec", - "echo", - &Some(vec!["hi".into()]), - ) - .unwrap(); - } -} diff --git a/shell/src/functions/classify.rs b/shell/src/functions/classify.rs deleted file mode 100644 index 1dc2015f..00000000 --- a/shell/src/functions/classify.rs +++ /dev/null @@ -1,183 +0,0 @@ -//! `shell::classify_argv` — classifier path for the approval gate (agent traffic). -//! See `docs/superpowers/specs/2026-05-15-shell-allowlist-approval-design.md` § 4–6.1. - -use std::sync::Arc; - -use serde::Serialize; -use schemars::JsonSchema; - -use crate::config::ShellConfig; -use crate::exec::host::parse_argv; -use crate::functions::types::ClassifyArgvRequest; - -/// Internal outcome before JSON tagging for the wire. -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum ClassifyOutcome { - Auto, - Deny { reason: String }, - Ask { summary: String }, -} - -/// Serialize as `{ "decision": "auto" | "deny" | "ask", ... }`. -#[derive(Debug, Serialize, JsonSchema)] -#[serde(tag = "decision", rename_all = "lowercase")] -pub enum ClassifyWireResponse { - Auto, - Deny { reason: String }, - Ask { summary: String }, -} - -impl From for ClassifyWireResponse { - fn from(o: ClassifyOutcome) -> Self { - match o { - ClassifyOutcome::Auto => ClassifyWireResponse::Auto, - ClassifyOutcome::Deny { reason } => ClassifyWireResponse::Deny { reason }, - ClassifyOutcome::Ask { summary } => ClassifyWireResponse::Ask { summary }, - } - } -} - -pub(crate) const SUMMARY_MAX: usize = 512; - -pub fn summarize_argv(argv: &[String]) -> String { - let joined = argv.join(" "); - if joined.len() <= SUMMARY_MAX { - joined - } else { - let mut s = joined.chars().take(SUMMARY_MAX).collect::(); - s.push_str("… (truncated)"); - s - } -} - -/// Pure classifier for the agent path: denylist → allow_any → allowlist → ask. -/// Empty allowlist does **not** auto-approve here (unlike [`ShellConfig::is_command_allowed`]). -pub fn classify_agent_path(cfg: &ShellConfig, argv: &[String]) -> ClassifyOutcome { - if argv.is_empty() { - return ClassifyOutcome::Deny { - reason: "empty command".into(), - }; - } - if let Some(reason) = cfg.denylist_hit_reason(argv) { - return ClassifyOutcome::Deny { reason }; - } - if cfg.allow_any { - return ClassifyOutcome::Auto; - } - if cfg.allowlist_contains(argv) { - return ClassifyOutcome::Auto; - } - ClassifyOutcome::Ask { - summary: summarize_argv(argv), - } -} - -pub async fn handle( - cfg: Arc, - req: ClassifyArgvRequest, -) -> Result { - let argv = parse_argv(&req.command, req.args.as_ref())?; - Ok(classify_agent_path(cfg.as_ref(), &argv).into()) -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::config::ShellConfig; - - fn cfg_allow_deny(allow: Vec<&str>, deny: Vec<&str>) -> ShellConfig { - let mut c = ShellConfig { - allowlist: allow.into_iter().map(String::from).collect(), - denylist_patterns: deny.into_iter().map(String::from).collect(), - ..Default::default() - }; - c.compile_denylist().unwrap(); - c - } - - #[test] - fn empty_allowlist_empty_denylist_asks() { - let c = cfg_allow_deny(vec![], vec![]); - let out = classify_agent_path(&c, &["anything".into()]); - assert!(matches!(out, ClassifyOutcome::Ask { .. })); - } - - #[test] - fn denylist_wins_on_empty_allowlist() { - let c = cfg_allow_deny(vec![], vec![r"rm\s+-rf\s+/"]); - match classify_agent_path(&c, &["rm".into(), "-rf".into(), "/".into()]) { - ClassifyOutcome::Deny { reason } => assert!(reason.contains("denylist")), - other => panic!("expected Deny, got {other:?}"), - } - } - - #[test] - fn allowlisted_auto() { - let c = cfg_allow_deny(vec!["ls", "cat"], vec![]); - assert!(matches!( - classify_agent_path(&c, &["ls".into(), "-la".into()]), - ClassifyOutcome::Auto - )); - } - - #[test] - fn allowlist_miss_asks_with_summary() { - let c = cfg_allow_deny(vec!["ls"], vec![]); - match classify_agent_path(&c, &["netstat".into(), "-an".into()]) { - ClassifyOutcome::Ask { summary } => assert_eq!(summary, "netstat -an"), - other => panic!("expected Ask, got {other:?}"), - } - } - - #[test] - fn denylist_wins_over_allowlist() { - let c = cfg_allow_deny(vec!["rm"], vec![r"rm\s+-rf\s+/"]); - match classify_agent_path(&c, &["rm".into(), "-rf".into(), "/".into()]) { - ClassifyOutcome::Deny { .. } => {} - other => panic!("expected Deny, got {other:?}"), - } - } - - #[test] - fn basename_allowlisted() { - let c = cfg_allow_deny(vec!["ls"], vec![]); - assert!(matches!( - classify_agent_path(&c, &["/usr/bin/ls".into(), "-la".into()]), - ClassifyOutcome::Auto - )); - } - - #[test] - fn summary_truncates_long_argv() { - let c = cfg_allow_deny(vec![], vec![]); - let arg = "a".repeat(300); - let argv = vec!["cmd".into(), arg.clone(), arg.clone()]; - match classify_agent_path(&c, &argv) { - ClassifyOutcome::Ask { summary } => { - assert!(summary.ends_with("… (truncated)")); - assert!(summary.len() <= SUMMARY_MAX + "… (truncated)".len()); - } - other => panic!("expected Ask, got {other:?}"), - } - } - - #[test] - fn allow_any_skips_allowlist_when_not_denylisted() { - let mut c = cfg_allow_deny(vec![], vec![]); - c.allow_any = true; - assert!(matches!( - classify_agent_path(&c, &["netstat".into()]), - ClassifyOutcome::Auto - )); - } - - #[test] - fn allow_any_still_loses_to_denylist() { - let mut c = cfg_allow_deny(vec![], vec![r"bad"]); - c.allow_any = true; - assert!(matches!( - classify_agent_path(&c, &["badcmd".into()]), - ClassifyOutcome::Deny { .. } - )); - } -} diff --git a/shell/src/functions/exec.rs b/shell/src/functions/exec.rs index bd46fd31..0cf36457 100644 --- a/shell/src/functions/exec.rs +++ b/shell/src/functions/exec.rs @@ -1,77 +1,24 @@ use std::sync::Arc; -use iii_sdk::TriggerRequest; -use serde_json::{json, Value}; - use crate::config::ShellConfig; use crate::exec::host::parse_argv; use crate::exec_dispatch::{err_to_string, pick_exec_backend}; -use crate::functions::approval_bypass::{marker_wellformed, validate_approved_record_for_bypass}; use crate::functions::types::{ExecRequest, ExecResponse}; -const FN_APPROVAL_LOOKUP_RECORD: &str = "approval::lookup_record"; - -async fn fetch_approval_record(iii: &iii_sdk::III, session_id: &str, call_id: &str) -> Result { - let v = iii - .trigger(TriggerRequest { - function_id: FN_APPROVAL_LOOKUP_RECORD.into(), - payload: json!({ - "session_id": session_id, - "function_call_id": call_id, - }), - action: None, - timeout_ms: Some(10_000), - }) - .await - .map_err(|e| e.to_string())?; - if v.is_null() { - Err("__from_approval marker without valid pending approval record".into()) - } else { - Ok(v) - } -} - pub async fn handle( cfg: Arc, iii: iii_sdk::III, req: ExecRequest, ) -> Result { - // Field-level type errors (wrong-type `command`, non-string `args[i]`, - // bad `target.kind`) come from the per-field deserializers in - // `functions::types`; they surface here as the trigger `Err` carrying - // the actionable text the LLM needs to self-correct. - // `args.as_ref()` preserves the legacy two-mode contract on `parse_argv`: + // Plain executor (T13). All policy lives in approval-gate's rules + // layer; shell trusts its caller. `parse_argv`'s two-mode contract: // None → tokenize `command` via shell-words (single-string path) // Some(_) → use args verbatim, even if empty - // The typed-schema migration must NOT collapse "absent args" into - // "args: []" or callers lose the shell-words path. - let handler_id = "shell::exec"; - let argv = if let Some(ref marker) = req.from_approval { - marker_wellformed(marker)?; - let rec = fetch_approval_record(&iii, &marker.session_id, &marker.call_id).await?; - validate_approved_record_for_bypass(&rec, handler_id, &req.command, &req.args)?; - let argv = parse_argv(&req.command, req.args.as_ref()).map_err(|e| format!("argv: {}", e))?; - if let Some(reason) = cfg.denylist_hit_reason(&argv) { - tracing::error!( - reason = %reason, - "post-approval defense-in-depth: denylisted argv on approval bypass path" - ); - return Err(format!( - "post-approval defense-in-depth: {}", - reason - )); - } - argv - } else { - let argv = parse_argv(&req.command, req.args.as_ref()).map_err(|e| format!("argv: {}", e))?; - cfg.is_command_allowed(&argv)?; - argv - }; + let argv = parse_argv(&req.command, req.args.as_ref()) + .map_err(|e| format!("argv: {e}"))?; let timeout = cfg.resolve_timeout(req.timeout_ms); - let backend = pick_exec_backend(req.target, cfg, iii); - let out = backend.run(&argv, timeout).await.map_err(err_to_string)?; Ok(ExecResponse::from(out)) diff --git a/shell/src/functions/exec_bg.rs b/shell/src/functions/exec_bg.rs index cc864351..690871c9 100644 --- a/shell/src/functions/exec_bg.rs +++ b/shell/src/functions/exec_bg.rs @@ -1,74 +1,27 @@ use std::sync::Arc; -use iii_sdk::TriggerRequest; -use serde_json::{json, Value}; use uuid::Uuid; use crate::config::ShellConfig; use crate::exec::host::{build_command, parse_argv}; use crate::exec::sandbox::SandboxExecResponse; -use crate::functions::approval_bypass::{marker_wellformed, validate_approved_record_for_bypass}; use crate::functions::types::{ExecBgRequest, ExecBgResponse}; use crate::jobs::{self, JobHandle, JobRecord, JobStatus}; use crate::target::Target; use crate::triggers::{IiiTriggerFwd, TriggerFwd}; use tokio::io::AsyncReadExt; -const FN_APPROVAL_LOOKUP_RECORD: &str = "approval::lookup_record"; - -async fn fetch_approval_record(iii: &iii_sdk::III, session_id: &str, call_id: &str) -> Result { - let v = iii - .trigger(TriggerRequest { - function_id: FN_APPROVAL_LOOKUP_RECORD.into(), - payload: json!({ - "session_id": session_id, - "function_call_id": call_id, - }), - action: None, - timeout_ms: Some(10_000), - }) - .await - .map_err(|e| e.to_string())?; - if v.is_null() { - Err("__from_approval marker without valid pending approval record".into()) - } else { - Ok(v) - } -} - pub async fn handle( cfg: Arc, iii: iii_sdk::III, req: ExecBgRequest, ) -> Result { - // Field-level type errors (wrong-type `command`, non-string `args[i]`, - // bad `target.kind`) come from the per-field deserializers in - // `functions::types`; the SDK forwards them as the trigger `Err` with - // the actionable text the LLM needs to self-correct. - // See `functions::exec` — `args.as_ref()` preserves the shell-words - // tokenization contract when the caller omits `args`. - let handler_id = "shell::exec_bg"; - let argv = if let Some(ref marker) = req.from_approval { - marker_wellformed(marker)?; - let rec = fetch_approval_record(&iii, &marker.session_id, &marker.call_id).await?; - validate_approved_record_for_bypass(&rec, handler_id, &req.command, &req.args)?; - let argv = parse_argv(&req.command, req.args.as_ref()).map_err(|e| format!("argv: {}", e))?; - if let Some(reason) = cfg.denylist_hit_reason(&argv) { - tracing::error!( - reason = %reason, - "post-approval defense-in-depth: denylisted argv on approval bypass path" - ); - return Err(format!( - "post-approval defense-in-depth: {}", - reason - )); - } - argv - } else { - let argv = parse_argv(&req.command, req.args.as_ref()).map_err(|e| format!("argv: {}", e))?; - cfg.is_command_allowed(&argv)?; - argv - }; + // Plain executor (T13). All policy lives in approval-gate's rules + // layer; shell trusts its caller. `parse_argv`'s two-mode contract: + // None → tokenize `command` via shell-words + // Some(_) → use args verbatim + let argv = parse_argv(&req.command, req.args.as_ref()) + .map_err(|e| format!("argv: {e}"))?; match req.target { Target::Host => spawn_host_job(cfg, argv).await, @@ -367,7 +320,6 @@ mod sandbox_path_tests { max_output_bytes: 4096, ..Default::default() }; - c.compile_denylist().unwrap(); Arc::new(c) } diff --git a/shell/src/functions/mod.rs b/shell/src/functions/mod.rs index a5e2855e..9c86b7c2 100644 --- a/shell/src/functions/mod.rs +++ b/shell/src/functions/mod.rs @@ -1,5 +1,3 @@ -pub mod approval_bypass; -pub mod classify; pub mod exec; pub mod exec_bg; pub mod kill; diff --git a/shell/src/functions/types.rs b/shell/src/functions/types.rs index bc2f94ce..d0bbf1be 100644 --- a/shell/src/functions/types.rs +++ b/shell/src/functions/types.rs @@ -81,30 +81,17 @@ fn deserialize_timeout_ms<'de, D: Deserializer<'de>>(d: D) -> Result Ok(v.as_u64()) } -/// Marker injected by `approval-gate` when re-invoking after user approval. -#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema)] -pub struct ApprovalMarker { - pub call_id: String, - pub session_id: String, -} - -/// Request body for `shell::classify_argv`. -#[derive(Debug, Deserialize, JsonSchema)] -pub struct ClassifyArgvRequest { - #[serde(deserialize_with = "deserialize_command")] - pub command: String, - #[serde(default, deserialize_with = "deserialize_args")] - pub args: Option>, -} +// `ApprovalMarker` + `ClassifyArgvRequest` deleted in T13: the +// approval-gate refactor moved all policy decisions to the rules layer. +// Shell is a plain executor; no `__from_approval` field on requests. /// Wire request for `shell::exec`. The schema is published to the engine's /// tool listing so callers see field types up front instead of guessing /// from the description. #[derive(Debug, Deserialize, JsonSchema)] pub struct ExecRequest { - /// Program name (matched against the allowlist by basename or exact path). - /// Must be a string — split arguments into `args`, do not pass argv as - /// an array here. + /// Program name. Must be a string — split arguments into `args`, do not + /// pass argv as an array here. #[serde(deserialize_with = "deserialize_command")] pub command: String, /// Arguments passed to the program, in order. Every element must be a @@ -115,42 +102,27 @@ pub struct ExecRequest { #[serde(default, deserialize_with = "deserialize_args")] pub args: Option>, /// Per-call timeout override, milliseconds. Capped at `cfg.max_timeout_ms`. - /// Negative or fractional values silently fall back to - /// `cfg.default_timeout_ms` (loose wire semantic, preserved on purpose). #[serde(default, deserialize_with = "deserialize_timeout_ms")] pub timeout_ms: Option, /// Where to run the command. Defaults to the host worker; pass /// `{ kind: "sandbox", sandbox_id }` to forward the call to a microVM. #[serde(default)] pub target: Target, - /// Present only on gate-driven re-invocation after approval (§ 6.4). - #[serde(default, rename = "__from_approval")] - pub from_approval: Option, } -/// Wire request for `shell::exec_bg`. Same shape as [`ExecRequest`]; documented -/// separately so the engine publishes a distinct schema per function. +/// Wire request for `shell::exec_bg`. Same shape as [`ExecRequest`]. #[derive(Debug, Deserialize, JsonSchema)] pub struct ExecBgRequest { - /// Program name. See [`ExecRequest::command`]. #[serde(deserialize_with = "deserialize_command")] pub command: String, - /// Arguments passed to the program. See [`ExecRequest::args`]. - /// `None` (or `args: null` / absent) means "tokenize `command` via - /// shell-words"; `Some(_)` (including the empty vec) means "use args - /// verbatim, no shell-words." See `parse_argv` in `crate::exec::host`. #[serde(default, deserialize_with = "deserialize_args")] pub args: Option>, - /// Per-call timeout. Host-targeted background jobs IGNORE `timeout_ms`; - /// sandbox-targeted ones forward it through `cfg.resolve_timeout`. + /// Host-targeted background jobs IGNORE `timeout_ms`; sandbox-targeted + /// ones forward it through `cfg.resolve_timeout`. #[serde(default, deserialize_with = "deserialize_timeout_ms")] pub timeout_ms: Option, - /// Where to run. See [`ExecRequest::target`]. #[serde(default)] pub target: Target, - /// Present only on gate-driven re-invocation after approval (§ 6.4). - #[serde(default, rename = "__from_approval")] - pub from_approval: Option, } #[derive(Debug, Serialize, JsonSchema)] diff --git a/shell/src/lib.rs b/shell/src/lib.rs index aa4a8629..1bf89713 100644 --- a/shell/src/lib.rs +++ b/shell/src/lib.rs @@ -2,7 +2,6 @@ //! under `tests/` can drive them at the public-API level. Both targets //! share source files via Cargo's two-target compile. -pub mod arity; pub mod config; pub mod exec; pub mod exec_dispatch; diff --git a/shell/src/main.rs b/shell/src/main.rs index c020f729..4f18665e 100644 --- a/shell/src/main.rs +++ b/shell/src/main.rs @@ -4,7 +4,6 @@ use iii_sdk::{register_worker, InitOptions, OtelConfig, RegisterFunction}; use serde_json::Value; use std::sync::Arc; -mod arity; mod config; mod exec; mod exec_dispatch; @@ -15,7 +14,7 @@ mod manifest; mod target; mod triggers; -use functions::types::{ClassifyArgvRequest, KillRequest, StatusRequest}; +use functions::types::{KillRequest, StatusRequest}; #[derive(Parser, Debug)] #[command(name = "shell", about = "Unix shell execution worker for iii agents")] @@ -50,8 +49,6 @@ async fn main() -> Result<()> { let shell_config = match config::load_config(&cli.config) { Ok(c) => { tracing::info!( - allowlist_size = c.allowlist.len(), - denylist_size = c.denylist_patterns.len(), max_timeout_ms = c.max_timeout_ms, max_concurrent = c.max_concurrent_jobs, "loaded config from {}", @@ -61,8 +58,7 @@ async fn main() -> Result<()> { } Err(e) => { tracing::warn!(error = %e, path = %cli.config, "failed to load config, using defaults"); - let mut c = config::ShellConfig::default(); - c.compile_denylist()?; + let c = config::ShellConfig::default(); // Defaults have host_root=None and allow_unjailed=false, so this // path refuses to start. Otherwise a missing config file would // silently bypass the S-H2 jail requirement. @@ -100,13 +96,14 @@ async fn main() -> Result<()> { }, ) .description( - "Run an allowlisted command in the foreground and return its \ - full output. Payload: { command: string (program name), \ - args?: string[], timeout_ms?: number, target?: { kind: \ - 'host'|'sandbox', sandbox_id?: string } }. Returns { stdout, \ - stderr, exit_code, duration_ms, timed_out, stdout_truncated, \ - stderr_truncated }. Do NOT pass argv as an array in 'command' \ - — split program and arguments across the two fields.", + "Run a command in the foreground and return its full output. \ + Policy lives in approval-gate's rules layer. Payload: { \ + command: string (program name), args?: string[], timeout_ms?: \ + number, target?: { kind: 'host'|'sandbox', sandbox_id?: \ + string } }. Returns { stdout, stderr, exit_code, \ + duration_ms, timed_out, stdout_truncated, stderr_truncated \ + }. Do NOT pass argv as an array in 'command' — split program \ + and arguments across the two fields.", ), ); } @@ -124,33 +121,18 @@ async fn main() -> Result<()> { }, ) .description( - "Spawn an allowlisted command as a background job. Same \ - payload shape as shell::exec; returns { job_id, argv } \ - immediately. Poll with shell::status, terminate with \ - shell::kill, list with shell::list. Do NOT pass argv as an \ - array in 'command' — use 'command' (string) + 'args' \ - (string[]).", + "Spawn a command as a background job. Same payload shape as \ + shell::exec; returns { job_id, argv } immediately. Poll with \ + shell::status, terminate with shell::kill, list with \ + shell::list. Do NOT pass argv as an array in 'command' — \ + use 'command' (string) + 'args' (string[]).", ), ); } - { - let cfg = shared.clone(); - iii.register_function( - RegisterFunction::new_async( - "shell::classify_argv", - move |req: ClassifyArgvRequest| { - let cfg = cfg.clone(); - async move { functions::classify::handle(cfg, req).await } - }, - ) - .description( - "Classify a shell argv for the approval gate (agent path). Returns \ - { decision: 'auto' | 'deny' | 'ask', ... } per shell policy — \ - not for direct agent use.", - ), - ); - } + // shell::classify_argv registration removed in T13. The approval-gate + // refactor moved all policy decisions to the rules layer; shell is a + // plain executor now. iii.register_function( RegisterFunction::new_async("shell::kill", |req: KillRequest| async move { diff --git a/shell/src/manifest.rs b/shell/src/manifest.rs index 0a134c6c..878baab6 100644 --- a/shell/src/manifest.rs +++ b/shell/src/manifest.rs @@ -14,10 +14,6 @@ pub fn build_manifest() -> Value { "id": "shell::exec_bg", "description": "Spawn a command in the background and return job_id", }, - { - "id": "shell::classify_argv", - "description": "Argv classifier for approval-gate (auto/deny/ask)", - }, { "id": "shell::kill", "description": "Kill a running background job", @@ -85,7 +81,7 @@ mod tests { assert!(m.get("version").is_some()); assert!(m.get("functions").is_some()); let fns = m.get("functions").unwrap().as_array().unwrap(); - assert_eq!(fns.len(), 16); + assert_eq!(fns.len(), 15); } #[test] diff --git a/shell/tests/function_handlers.rs b/shell/tests/function_handlers.rs index 945941f1..bdc022e4 100644 --- a/shell/tests/function_handlers.rs +++ b/shell/tests/function_handlers.rs @@ -17,15 +17,16 @@ async fn seed(handle: JobHandle) -> String { } } -fn cfg_with_allow(allow: &[&str]) -> Arc { - let mut c = ShellConfig { - allowlist: allow.iter().map(|s| s.to_string()).collect(), +/// Build a test config. `_allow` is ignored after T13 (shell no longer +/// consults an allowlist — policy lives in approval-gate). The parameter +/// stays for call-site source compatibility until the tests are pruned. +fn cfg_with_allow(_allow: &[&str]) -> Arc { + let c = ShellConfig { max_timeout_ms: 5000, default_timeout_ms: 1500, max_output_bytes: 4096, ..Default::default() }; - c.compile_denylist().unwrap(); Arc::new(c) } @@ -101,18 +102,12 @@ fn exec_request_rejects_array_command_with_helpful_error() { ); } -#[tokio::test] -async fn exec_handler_rejects_unlisted_command() { - let cfg = cfg_with_allow(&["echo"]); - let err = functions::exec::handle( - cfg, - fresh_iii(), - typed::(json!({"command": "nmap", "args": ["-v"]})), - ) - .await - .unwrap_err(); - assert!(err.contains("allowlist")); -} +// `exec_handler_rejects_unlisted_command` deleted in T13 — shell no +// longer enforces an allowlist; policy lives in approval-gate's rules +// layer. The equivalent assertion under the new model is that an +// `Action::Deny` rule for `shell::exec` + matching pattern returns a +// structured Denial::Policy at intercept time (see approval-gate's +// intercept tests). /// `args[i]` validation is per-index; a non-string element must be rejected /// with a message that names which index failed and what it actually was. From 40ad326700112e978109d06c4e7076f38f0480cb Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Sat, 16 May 2026 09:15:37 -0300 Subject: [PATCH 26/30] feat(turn-orchestrator): switch stitch to approval::consume (T14) consume_approval_stitch now calls approval::consume (single RPC, returns + deletes in one shot). Payload is { session_id } only; no turn_id, no limit (gate enforces a default cap of 50, surfacing overflow via the 'omitted' counter as before). stitch_entries rewritten for the new Record wire shape: reads nested outcome: { kind, detail } instead of the old top-level status + result/error/denial. Outcome kinds: executed, failed, denied, timed_out. function_call_id is the canonical key (call_id retained as a legacy fallback for old rows on disk during the upgrade). render_denial_lines's policy branch reads rule_permission/rule_pattern (matching the renamed Denial::Policy detail shape from T2). The 'policy denied by classifier_fn: classifier_reason' wording becomes 'policy denied by rule : '. omission_summary_message reworded: 'approval::flush_delivered' is gone; the natural recovery path is the next-turn consume, which is what the new message points at. 131 lib tests pass (was 122 + 9 failing from old-shape assertions). Pre-existing integration test failure (tests/run_stop.rs imports run_stop module that isn't declared in lib.rs) is unrelated to this refactor. --- .../src/states/approval_stitching.rs | 139 +++++++++++------- turn-orchestrator/src/states/assistant.rs | 22 +-- 2 files changed, 97 insertions(+), 64 deletions(-) diff --git a/turn-orchestrator/src/states/approval_stitching.rs b/turn-orchestrator/src/states/approval_stitching.rs index 848c274b..4d48e9f2 100644 --- a/turn-orchestrator/src/states/approval_stitching.rs +++ b/turn-orchestrator/src/states/approval_stitching.rs @@ -25,26 +25,25 @@ pub fn stitch_entries(entries: &[Value]) -> Vec { entries.iter().map(stitch_one).collect() } -/// Build a one-line user-message string warning the model that the cap on -/// `approval::consume_undelivered` left N entries behind. Returns `None` -/// when `omitted == 0` so the orchestrator can skip emitting anything. +/// Build a one-line user-message string warning the model that +/// `approval::consume` capped the response and left N entries behind. +/// Returns `None` when `omitted == 0`. After T14 the recovery path is +/// the natural next-turn consume (no flush_delivered RPC anymore). pub fn omission_summary_message(omitted: u64) -> Option { if omitted == 0 { return None; } Some(format!( - "[approval-gate] {omitted} older resolved approval record(s) were \ - omitted from this turn (oldest-first cap). They remain undelivered and \ - will surface on later turns. To drain them in one shot, trigger \ - approval::flush_delivered." + "[approval-gate] {omitted} more resolved approvals waiting to stitch — \ + they'll surface on the next turn." )) } /// Render an approval-gate `Denial` (tagged `{ kind, detail }`) as one or /// more indented lines for the stitched LLM message. Each kind gets a /// shape it can act on: -/// - `policy` → "policy denied by : " (rule-based deny; -/// tells the model what rule fired so it can adapt) +/// - `policy` → "policy denied by rule :" (rule-based deny; +/// names the matched rule so the model knows what to avoid) /// - `user_rejected` → "user rejected this call" (no feedback) /// - `user_corrected` → "user rejected with feedback: " /// (the high-value variant — model gets actionable correction) @@ -55,15 +54,15 @@ fn render_denial_lines(denial: &Value) -> Vec { let detail = denial.get("detail").cloned().unwrap_or(Value::Null); match kind { "policy" => { - let reason = detail - .get("classifier_reason") + let perm = detail + .get("rule_permission") .and_then(Value::as_str) .unwrap_or(""); - let f = detail - .get("classifier_fn") + let pat = detail + .get("rule_pattern") .and_then(Value::as_str) .unwrap_or(""); - vec![format!(" policy denied by {f}: {reason}")] + vec![format!(" policy denied by rule {perm} : {pat}")] } "user_rejected" => vec![" user rejected this call".to_string()], "user_corrected" => { @@ -81,17 +80,30 @@ fn render_denial_lines(denial: &Value) -> Vec { } } +/// Render one entry from `approval::consume`. Record shape after T1/T8: +/// `{ function_call_id, function_id, args, session_id, expires_at, +/// status: "done", outcome: { kind, detail }, resolved_at }` +/// `outcome.kind` is `"executed" | "failed" | "denied" | "timed_out"`. fn stitch_one(entry: &Value) -> String { - let call_id = entry.get("call_id").and_then(Value::as_str).unwrap_or("?"); + let call_id = entry + .get("function_call_id") + .or_else(|| entry.get("call_id")) // legacy fallback + .and_then(Value::as_str) + .unwrap_or("?"); let fn_id = entry.get("function_id").and_then(Value::as_str).unwrap_or("?"); - let status = entry.get("status").and_then(Value::as_str).unwrap_or("?"); - let decision = match status { + + let outcome = entry.get("outcome").cloned().unwrap_or(Value::Null); + let outcome_kind = outcome.get("kind").and_then(Value::as_str).unwrap_or("?"); + let detail = outcome.get("detail").cloned().unwrap_or(Value::Null); + + let decision = match outcome_kind { "executed" | "failed" => "allow", "denied" => "deny", "timed_out" => "timeout", _ => "?", }; - let args_json = entry.get("args") + let args_json = entry + .get("args") .map(|v| serde_json::to_string(v).unwrap_or_default()) .unwrap_or_default(); let args = truncate_for_message(&args_json, STITCH_MAX_CHARS); @@ -99,23 +111,30 @@ fn stitch_one(entry: &Value) -> String { let mut lines = vec![ format!("[approval-gate] Earlier call_id {call_id} (function_id={fn_id}, args={args}):"), format!(" decision: {decision}"), - format!(" status: {status}"), + format!(" outcome: {outcome_kind}"), ]; - if status == "executed" { - if let Some(r) = entry.get("result") { - let r_json = serde_json::to_string(r).unwrap_or_default(); - lines.push(format!(" result: {}", truncate_for_message(&r_json, STITCH_MAX_CHARS))); + match outcome_kind { + "executed" => { + if let Some(r) = detail.get("result") { + let r_json = serde_json::to_string(r).unwrap_or_default(); + lines.push(format!( + " result: {}", + truncate_for_message(&r_json, STITCH_MAX_CHARS) + )); + } } - } - if status == "failed" { - if let Some(e) = entry.get("error").and_then(Value::as_str) { - lines.push(format!(" error: {e}")); + "failed" => { + if let Some(e) = detail.get("error").and_then(Value::as_str) { + lines.push(format!(" error: {e}")); + } } - } - if status == "denied" { - if let Some(denial) = entry.get("denial") { - lines.extend(render_denial_lines(denial)); + "denied" => { + if let Some(denial) = detail.get("denial") { + lines.extend(render_denial_lines(denial)); + } } + "timed_out" => { /* self-describing */ } + _ => {} } lines.join("\n") } @@ -147,19 +166,33 @@ mod tests { assert!(out.contains("… (truncated)")); } - fn make_entry(call_id: &str, fn_id: &str, status: &str, extras: Value) -> Value { - let mut v = json!({ - "call_id": call_id, + /// Build a wire-shape entry as `approval::consume` returns it: typed + /// Record with nested `outcome: { kind, detail }`. `old_status` maps to + /// the matching `outcome.kind` and the top-level extras are folded + /// under `outcome.detail` where the new shape expects them. + fn make_entry(call_id: &str, fn_id: &str, old_status: &str, extras: Value) -> Value { + // Pull args override out of extras if present (top-level on the + // Record), and fold the remaining keys into outcome.detail. + let mut extras_obj = match extras { + Value::Object(m) => m, + _ => serde_json::Map::new(), + }; + let args_override = extras_obj.remove("args"); + let detail: Value = if extras_obj.is_empty() { + Value::Null + } else { + Value::Object(extras_obj) + }; + let kind = old_status; // executed | failed | denied | timed_out + json!({ + "function_call_id": call_id, "function_id": fn_id, - "args": {"path": "/tmp/x"}, - "status": status, - }); - if let Value::Object(extras) = extras { - for (k, val) in extras { - v[k] = val; - } - } - v + "args": args_override.unwrap_or_else(|| json!({"path": "/tmp/x"})), + "session_id": "sess_test", + "expires_at": u64::MAX, + "status": "done", + "outcome": { "kind": kind, "detail": detail }, + }) } #[test] @@ -183,7 +216,7 @@ mod tests { json!({"result": {"ok": true}}))]; let msg = &stitch_entries(&entries)[0]; assert!(msg.contains("decision: allow")); - assert!(msg.contains("status: executed")); + assert!(msg.contains("outcome: executed")); assert!(msg.contains("result:")); assert!(msg.contains("c1")); assert!(!msg.contains("error:")); @@ -195,7 +228,7 @@ mod tests { json!({"error": "EACCES"}))]; let msg = &stitch_entries(&entries)[0]; assert!(msg.contains("decision: allow")); - assert!(msg.contains("status: failed")); + assert!(msg.contains("outcome: failed")); assert!(msg.contains("error: EACCES")); assert!(!msg.contains("result:")); } @@ -210,7 +243,7 @@ mod tests { )]; let msg = &stitch_entries(&entries)[0]; assert!(msg.contains("decision: deny")); - assert!(msg.contains("status: denied")); + assert!(msg.contains("outcome: denied")); assert!(msg.contains("user rejected this call")); assert!(!msg.contains("result:")); assert!(!msg.contains("error:")); @@ -234,7 +267,7 @@ mod tests { } #[test] - fn stitch_entries_denied_policy_names_classifier_and_reason() { + fn stitch_entries_denied_policy_names_matching_rule() { let entries = vec![make_entry( "c1", "shell::fs::write", @@ -243,14 +276,14 @@ mod tests { "denial": { "kind": "policy", "detail": { - "classifier_reason": "command matches denylist", - "classifier_fn": "shell::classify_argv" + "rule_permission": "shell::exec", + "rule_pattern": "rm -rf*" } } }), )]; let msg = &stitch_entries(&entries)[0]; - assert!(msg.contains("policy denied by shell::classify_argv: command matches denylist")); + assert!(msg.contains("policy denied by rule shell::exec : rm -rf*")); } #[test] @@ -282,7 +315,7 @@ mod tests { )]; let msg = &stitch_entries(&entries)[0]; assert!(msg.contains("decision: timeout")); - assert!(msg.contains("status: timed_out")); + assert!(msg.contains("outcome: timed_out")); // Timed-out records carry no denial; the status line is enough. assert!(!msg.contains("user rejected")); assert!(!msg.contains("policy denied")); @@ -327,10 +360,10 @@ mod tests { } #[test] - fn omission_summary_message_positive_mentions_count_and_advises_flush() { + fn omission_summary_message_positive_mentions_count_and_next_turn() { let msg = omission_summary_message(42).expect("expected Some"); assert!(msg.starts_with("[approval-gate]")); assert!(msg.contains("42")); - assert!(msg.contains("approval::flush_delivered")); + assert!(msg.contains("next turn")); } } diff --git a/turn-orchestrator/src/states/assistant.rs b/turn-orchestrator/src/states/assistant.rs index cf907497..518e9de0 100644 --- a/turn-orchestrator/src/states/assistant.rs +++ b/turn-orchestrator/src/states/assistant.rs @@ -75,25 +75,26 @@ pub async fn handle_awaiting(iii: &III, record: &mut TurnStateRecord) -> anyhow: Ok(()) } -/// Atomically consume resolved-but-undelivered approvals (list + stamp) and +/// Drain resolved approvals from the gate via `approval::consume` and /// convert them into stitched user messages plus an optional omission /// summary. Returns `(stitched_msgs, summary_msg_or_none)`. /// +/// `approval::consume` is atomic: it returns Done rows and deletes them +/// in the same call. No turn_id stamping (delivered_in_turn_id is gone); +/// rows beyond the gate's cap stay in state and surface on the next turn, +/// counted via `omitted`. +/// /// Network failures are logged and swallowed: a transient consume failure /// must not block the turn. On failure the caller proceeds with an empty -/// stitch — entries will be retried atomically on the next turn. +/// stitch — entries will be retried on the next turn. pub(crate) async fn consume_approval_stitch( iii: &III, session_id: &str, - turn_id: &str, ) -> (Vec, Option) { let resp = iii .trigger(TriggerRequest { - function_id: "approval::consume_undelivered".into(), - payload: json!({ - "session_id": session_id, - "turn_id": turn_id, - }), + function_id: "approval::consume".into(), + payload: json!({ "session_id": session_id }), action: None, timeout_ms: Some(5_000), }) @@ -101,7 +102,7 @@ pub(crate) async fn consume_approval_stitch( let resp = match resp { Ok(v) => v, Err(e) => { - tracing::warn!(error = %e, "approval::consume_undelivered failed; skipping stitch this turn"); + tracing::warn!(error = %e, "approval::consume failed; skipping stitch this turn"); return (Vec::new(), None); } }; @@ -176,9 +177,8 @@ pub(crate) fn append_summary_message( pub async fn handle_streaming(iii: &III, record: &mut TurnStateRecord) -> anyhow::Result<()> { let request = persistence::load_run_request(iii, &record.session_id).await; let mut messages = persistence::load_messages(iii, &record.session_id).await; - let turn_id = format!("{}-turn-{}", record.session_id, record.turn_count); let (stitch_msgs, summary) = - consume_approval_stitch(iii, &record.session_id, &turn_id).await; + consume_approval_stitch(iii, &record.session_id).await; let stitched_nonempty = !stitch_msgs.is_empty() || summary.is_some(); merge_stitched_into_history(&mut messages, stitch_msgs); append_summary_message( From 80a8ecc299070c8471652c2402633c9fd80328ee Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Sat, 16 May 2026 09:19:16 -0300 Subject: [PATCH 27/30] test(approval-gate): E2E lifecycle tests + delete dead integration tests (T17) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Delete 8 integration test files whose assertions targeted the deleted surface (FN_LIST_UNDELIVERED, FN_ACK_DELIVERED, transition_record, InterceptAction, marker plumbing, ...): - tests/approval_lifecycle.rs (engine-backed, used deleted RPCs) - tests/delivery.rs (old delivery RPC surface) - tests/integration.rs (old subscriber wiring) - tests/intercept.rs (decide_intercept_action, classifier path) - tests/lifecycle.rs (record-helper tests on deleted schema) - tests/misc.rs (mixed assertions on old shape) - tests/resolve.rs (old transition_record / Approved status) - tests/state_machine.rs (proptest dep + old schema) The src/* unit tests added across T1-T12 cover the equivalent surface for the new wire shape (Pending|InFlight|Done, Outcome enum, Denial::Policy {rule_permission, rule_pattern}, etc.). Add tests/lifecycle.rs with FIVE E2E lifecycle tests against the in-memory StateBus + FakeExecutor — the integration safety net for the whole refactor: 1. allow_path_end_to_end — pending → InFlight → executed → consume drains 2. deny_path_with_user_corrected_feedback_end_to_end — feedback round-trips 3. timeout_path_lazy_flips_on_consume_end_to_end — past expires_at flips on read 4. cascade_path_end_to_end — two pending rows; allow+always pushes exact-pattern rule; second auto-resolves; consume drains both; pinned: pushed pattern is the originator's argv ('echo go'), NOT blanket '*' 5. allow_rule_short_circuits_with_no_state_write — bonus: Verdict::Allow doesn't touch state 72 approval-gate tests pass across 6 suites (lib + wire + manifest + lifecycle + 2 from common helpers). Net deletion: ~2500 lines of old-shape integration tests gone, ~250 lines of new E2E in their place. --- approval-gate/tests/approval_lifecycle.rs | 332 ---------- approval-gate/tests/delivery.rs | 729 ---------------------- approval-gate/tests/integration.rs | 150 ----- approval-gate/tests/intercept.rs | 218 ------- approval-gate/tests/lifecycle.rs | 507 +++++++-------- approval-gate/tests/misc.rs | 87 --- approval-gate/tests/resolve.rs | 673 -------------------- approval-gate/tests/state_machine.rs | 209 ------- 8 files changed, 240 insertions(+), 2665 deletions(-) delete mode 100644 approval-gate/tests/approval_lifecycle.rs delete mode 100644 approval-gate/tests/delivery.rs delete mode 100644 approval-gate/tests/integration.rs delete mode 100644 approval-gate/tests/intercept.rs delete mode 100644 approval-gate/tests/misc.rs delete mode 100644 approval-gate/tests/resolve.rs delete mode 100644 approval-gate/tests/state_machine.rs diff --git a/approval-gate/tests/approval_lifecycle.rs b/approval-gate/tests/approval_lifecycle.rs deleted file mode 100644 index d6d6461d..00000000 --- a/approval-gate/tests/approval_lifecycle.rs +++ /dev/null @@ -1,332 +0,0 @@ -//! End-to-end approval lifecycle: register a fake gated function, intercept -//! a call, resolve it, drive a synthetic next turn, and assert the stitched -//! system message reaches the message log. Skips cleanly when no engine. - -use std::time::Duration; - -use approval_gate::{ - register, WorkerConfig, FN_ACK_DELIVERED, FN_LIST_UNDELIVERED, FN_RESOLVE, STATE_SCOPE, -}; -use iii_sdk::{register_worker, IIIError, InitOptions, RegisterFunctionMessage, TriggerRequest}; -use serde_json::{json, Value}; - -const DEFAULT_ENGINE_URL: &str = "ws://127.0.0.1:49134"; -const ENGINE_PROBE_TIMEOUT_MS: u64 = 500; - -async fn skip_if_no_engine(url: &str) -> Option { - let iii = register_worker(url, InitOptions::default()); - let probe = iii - .trigger(TriggerRequest { - function_id: "state::get".into(), - payload: json!({ "scope": STATE_SCOPE, "key": "__probe__" }), - action: None, - timeout_ms: Some(ENGINE_PROBE_TIMEOUT_MS), - }) - .await; - if probe.is_err() { - eprintln!("skipping: no engine at {url}"); - return None; - } - Some(iii) -} - -#[tokio::test] -async fn allow_path_executes_function_and_stitches_into_next_turn() { - let url = std::env::var("III_URL").unwrap_or_else(|_| DEFAULT_ENGINE_URL.to_string()); - let Some(iii) = skip_if_no_engine(&url).await else { - return; - }; - - let nonce = std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .map(|d| d.as_nanos()) - .unwrap_or(0); - let session_id = format!("turn-orch-it-{nonce}"); - let function_call_id = format!("tc-{nonce}"); - let topic = format!("agent::before_function_call::ito_{nonce}"); - - let target_calls: std::sync::Arc>> = - std::sync::Arc::new(std::sync::Mutex::new(Vec::new())); - let target_calls_for_handler = target_calls.clone(); - let _target = iii.register_function(( - RegisterFunctionMessage::with_id(format!("test::write_{nonce}")) - .with_description("fake write".into()), - move |payload: Value| { - let log = target_calls_for_handler.clone(); - async move { - log.lock().unwrap().push(payload); - Ok::<_, IIIError>(json!({"ok": true, "bytes": 42})) - } - }, - )); - - let _refs = register( - &iii, - &WorkerConfig { - topic: topic.clone(), - default_timeout_ms: 30_000, - ..WorkerConfig::default() - }, - ) - .expect("register approval-gate"); - - let target_fn = format!("test::write_{nonce}"); - let envelope = json!({ - "event_id": format!("evt-{nonce}"), - "reply_stream": format!("rs-{nonce}"), - "payload": { - "session_id": session_id, - "function_call": { - "id": function_call_id, - "function_id": target_fn, - "arguments": {"path": "/tmp/foo"}, - }, - "approval_required": [target_fn.clone()], - } - }); - let intercept_resp = iii - .trigger(TriggerRequest { - function_id: "policy::approval_gate".into(), - payload: envelope, - action: None, - timeout_ms: Some(5_000), - }) - .await - .expect("intercept ok"); - - assert_eq!(intercept_resp["block"], json!(true)); - assert_eq!(intercept_resp["status"], json!("pending")); - assert!( - target_calls.lock().unwrap().is_empty(), - "function ran before approval" - ); - - let resolve_resp = iii - .trigger(TriggerRequest { - function_id: FN_RESOLVE.into(), - payload: json!({ - "session_id": session_id, - "function_call_id": function_call_id, - "decision": "allow", - }), - action: None, - timeout_ms: Some(5_000), - }) - .await - .expect("resolve ok"); - assert_eq!(resolve_resp["ok"], json!(true)); - - tokio::time::sleep(Duration::from_millis(50)).await; - let calls = target_calls.lock().unwrap().clone(); - assert_eq!(calls.len(), 1, "expected one invocation; got {calls:?}"); - assert_eq!(calls[0]["path"], json!("/tmp/foo")); - - let undelivered = iii - .trigger(TriggerRequest { - function_id: FN_LIST_UNDELIVERED.into(), - payload: json!({"session_id": session_id}), - action: None, - timeout_ms: Some(5_000), - }) - .await - .expect("list_undelivered ok"); - let entries = undelivered["entries"].as_array().expect("entries array"); - let our_entry = entries - .iter() - .find(|e| e["function_call_id"] == function_call_id) - .expect("our entry in undelivered list"); - assert_eq!(our_entry["status"], "executed"); - assert_eq!(our_entry["result"], json!({"ok": true, "bytes": 42})); - - let ack = iii - .trigger(TriggerRequest { - function_id: FN_ACK_DELIVERED.into(), - payload: json!({ - "session_id": session_id, - "call_ids": [function_call_id.clone()], - "turn_id": "turn-1", - }), - action: None, - timeout_ms: Some(5_000), - }) - .await - .expect("ack ok"); - assert_eq!(ack["ok"], json!(true)); - assert_eq!(ack["stamped"], json!(1)); - - let after = iii - .trigger(TriggerRequest { - function_id: FN_LIST_UNDELIVERED.into(), - payload: json!({"session_id": session_id}), - action: None, - timeout_ms: Some(5_000), - }) - .await - .expect("ok"); - let after_entries = after["entries"].as_array().unwrap(); - assert!( - after_entries - .iter() - .all(|e| e["function_call_id"] != function_call_id), - "after ack, our entry must not be in undelivered list" - ); -} - -#[tokio::test] -async fn deny_path_does_not_invoke_function_and_stitches_denied() { - let url = std::env::var("III_URL").unwrap_or_else(|_| DEFAULT_ENGINE_URL.to_string()); - let Some(iii) = skip_if_no_engine(&url).await else { - return; - }; - - let nonce = std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .map(|d| d.as_nanos()) - .unwrap_or(0); - let session_id = format!("turn-orch-deny-{nonce}"); - let function_call_id = format!("tc-deny-{nonce}"); - let topic = format!("agent::before_function_call::itd_{nonce}"); - - let target_calls: std::sync::Arc>> = - std::sync::Arc::new(std::sync::Mutex::new(Vec::new())); - let log = target_calls.clone(); - let _target = iii.register_function(( - RegisterFunctionMessage::with_id(format!("test::write_d_{nonce}")) - .with_description("fake write".into()), - move |payload: Value| { - let log = log.clone(); - async move { - log.lock().unwrap().push(payload); - Ok::<_, IIIError>(json!({"ok": true})) - } - }, - )); - - let _refs = register( - &iii, - &WorkerConfig { - topic: topic.clone(), - default_timeout_ms: 30_000, - ..WorkerConfig::default() - }, - ) - .expect("register approval-gate"); - - let target_fn = format!("test::write_d_{nonce}"); - iii.trigger(TriggerRequest { - function_id: "policy::approval_gate".into(), - payload: json!({ - "event_id": format!("evt-{nonce}"), - "reply_stream": format!("rs-{nonce}"), - "payload": { - "session_id": session_id, - "function_call": {"id": function_call_id, "function_id": target_fn, "arguments": {}}, - "approval_required": [target_fn.clone()], - } - }), - action: None, timeout_ms: Some(5_000), - }).await.expect("intercept"); - - iii.trigger(TriggerRequest { - function_id: FN_RESOLVE.into(), - payload: json!({ - "session_id": session_id, - "function_call_id": function_call_id, - "decision": "deny", - "reason": "test-deny", - }), - action: None, - timeout_ms: Some(5_000), - }) - .await - .expect("resolve deny"); - - tokio::time::sleep(Duration::from_millis(50)).await; - assert!( - target_calls.lock().unwrap().is_empty(), - "function must not be invoked on deny" - ); - - let undelivered = iii - .trigger(TriggerRequest { - function_id: FN_LIST_UNDELIVERED.into(), - payload: json!({"session_id": session_id}), - action: None, - timeout_ms: Some(5_000), - }) - .await - .expect("ok"); - let entries = undelivered["entries"].as_array().unwrap(); - let our_entry = entries - .iter() - .find(|e| e["function_call_id"] == function_call_id) - .expect("our entry in undelivered list"); - assert_eq!(our_entry["status"], "denied"); - assert_eq!(our_entry["decision_reason"], "test-deny"); -} - -#[tokio::test] -async fn timeout_path_lazy_flips_pending_to_timed_out_on_read() { - let url = std::env::var("III_URL").unwrap_or_else(|_| DEFAULT_ENGINE_URL.to_string()); - let Some(iii) = skip_if_no_engine(&url).await else { - return; - }; - - let nonce = std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .map(|d| d.as_nanos()) - .unwrap_or(0); - let session_id = format!("turn-orch-to-{nonce}"); - let function_call_id = format!("tc-to-{nonce}"); - let topic = format!("agent::before_function_call::itt_{nonce}"); - - let _target = iii.register_function(( - RegisterFunctionMessage::with_id(format!("test::write_t_{nonce}")) - .with_description("fake".into()), - move |_p: Value| async move { Ok::<_, IIIError>(json!({"ok": true})) }, - )); - - let _refs = register( - &iii, - &WorkerConfig { - topic: topic.clone(), - default_timeout_ms: 100, - ..WorkerConfig::default() - }, - ) - .expect("register approval-gate"); - - let target_fn = format!("test::write_t_{nonce}"); - iii.trigger(TriggerRequest { - function_id: "policy::approval_gate".into(), - payload: json!({ - "event_id": format!("evt-{nonce}"), - "reply_stream": format!("rs-{nonce}"), - "payload": { - "session_id": session_id, - "function_call": {"id": function_call_id, "function_id": target_fn, "arguments": {}}, - "approval_required": [target_fn.clone()], - } - }), - action: None, timeout_ms: Some(5_000), - }).await.expect("intercept"); - - tokio::time::sleep(Duration::from_millis(200)).await; - - let undelivered = iii - .trigger(TriggerRequest { - function_id: FN_LIST_UNDELIVERED.into(), - payload: json!({"session_id": session_id}), - action: None, - timeout_ms: Some(5_000), - }) - .await - .expect("ok"); - let entries = undelivered["entries"].as_array().unwrap(); - let our_entry = entries - .iter() - .find(|e| e["function_call_id"] == function_call_id) - .expect("expired pending must lazy-flip to timed_out and surface"); - assert_eq!(our_entry["status"], "timed_out"); - assert_eq!(our_entry["decision_reason"], "timeout"); -} diff --git a/approval-gate/tests/delivery.rs b/approval-gate/tests/delivery.rs deleted file mode 100644 index 9ed3ca4c..00000000 --- a/approval-gate/tests/delivery.rs +++ /dev/null @@ -1,729 +0,0 @@ -//! Delivery-tracking handlers: list_pending, list_undelivered, -//! ack_delivered, consume_undelivered, flush_delivered, sweep_session. - -mod common; - -use approval_gate::*; -use common::{empty_policy_rules, sample_call, FailingStateBus, FakeExecutor, InMemoryStateBus}; -use serde_json::{json, Value}; -use std::sync::Mutex; - - - - #[tokio::test] - async fn handle_list_undelivered_caps_at_default_limit_and_reports_omitted() { - let bus = InMemoryStateBus::new(); - for i in 0..75 { - let cid = format!("c{i}"); - let mut rec = transition_record_with_now( - &build_pending_record(&cid, "shell::fs::write", &json!({}), 1_000, 60_000), - "executed", - Some(json!({"ok": true})), - None, - None, - 1_000 + i as u64, - ); - rec.as_object_mut() - .unwrap() - .insert("session_id".into(), Value::String("s1".into())); - bus.set(STATE_SCOPE, &pending_key("s1", &cid), rec) - .await - .unwrap(); - } - let resp = - handle_list_undelivered(&bus, STATE_SCOPE, json!({"session_id": "s1"}), 100_000).await; - assert_eq!(resp["entries"].as_array().unwrap().len(), 50); - assert_eq!(resp["omitted"].as_u64(), Some(25)); - } - - - #[tokio::test] - async fn handle_list_undelivered_honors_explicit_limit() { - let bus = InMemoryStateBus::new(); - for i in 0..10 { - let cid = format!("c{i}"); - let mut rec = transition_record_with_now( - &build_pending_record(&cid, "shell::fs::write", &json!({}), 1_000, 60_000), - "executed", - Some(json!({"ok": true})), - None, - None, - 1_000 + i as u64, - ); - rec.as_object_mut() - .unwrap() - .insert("session_id".into(), Value::String("s1".into())); - bus.set(STATE_SCOPE, &pending_key("s1", &cid), rec) - .await - .unwrap(); - } - let resp = handle_list_undelivered( - &bus, - STATE_SCOPE, - json!({"session_id": "s1", "limit": 3}), - 100_000, - ) - .await; - assert_eq!(resp["entries"].as_array().unwrap().len(), 3); - assert_eq!(resp["omitted"].as_u64(), Some(7)); - } - - - #[tokio::test] - async fn handle_list_undelivered_returns_oldest_first_by_resolved_at() { - let bus = InMemoryStateBus::new(); - for (i, ts) in [(0_u32, 5_000_u64), (1, 1_000), (2, 3_000)] { - let cid = format!("c{i}"); - let mut rec = transition_record_with_now( - &build_pending_record(&cid, "shell::fs::write", &json!({}), 1_000, 60_000), - "executed", - Some(json!({"ok": true})), - None, - None, - ts, - ); - rec.as_object_mut() - .unwrap() - .insert("session_id".into(), Value::String("s1".into())); - bus.set(STATE_SCOPE, &pending_key("s1", &cid), rec) - .await - .unwrap(); - } - let resp = handle_list_undelivered( - &bus, - STATE_SCOPE, - json!({"session_id": "s1", "limit": 10}), - 100_000, - ) - .await; - let entries = resp["entries"].as_array().unwrap(); - let ids: Vec<&str> = entries - .iter() - .map(|e| e["function_call_id"].as_str().unwrap()) - .collect(); - assert_eq!(ids, vec!["c1", "c2", "c0"]); - } - - - #[tokio::test] - async fn handle_list_undelivered_omitted_is_zero_when_under_limit() { - let bus = InMemoryStateBus::new(); - let mut rec = transition_record_with_now( - &build_pending_record("c1", "shell::fs::write", &json!({}), 1_000, 60_000), - "executed", - Some(json!({"ok": true})), - None, - None, - 1_500, - ); - rec.as_object_mut() - .unwrap() - .insert("session_id".into(), Value::String("s1".into())); - bus.set(STATE_SCOPE, &pending_key("s1", "c1"), rec) - .await - .unwrap(); - let resp = - handle_list_undelivered(&bus, STATE_SCOPE, json!({"session_id": "s1"}), 100_000).await; - assert_eq!(resp["entries"].as_array().unwrap().len(), 1); - assert_eq!(resp["omitted"].as_u64(), Some(0)); - } - - - #[tokio::test] - async fn handle_consume_undelivered_stamps_returned_entries() { - let bus = InMemoryStateBus::new(); - for i in 0..3 { - let cid = format!("c{i}"); - let mut rec = transition_record_with_now( - &build_pending_record(&cid, "shell::fs::write", &json!({}), 1_000, 60_000), - "executed", - Some(json!({"ok": true})), - None, - None, - 1_000 + i as u64, - ); - rec.as_object_mut() - .unwrap() - .insert("session_id".into(), Value::String("s1".into())); - bus.set(STATE_SCOPE, &pending_key("s1", &cid), rec) - .await - .unwrap(); - } - let resp = handle_consume_undelivered( - &bus, - STATE_SCOPE, - json!({"session_id": "s1", "turn_id": "turn-7", "limit": 10}), - 100_000, - ) - .await; - assert_eq!(resp["ok"], json!(true)); - assert_eq!(resp["entries"].as_array().unwrap().len(), 3); - assert_eq!(resp["omitted"].as_u64(), Some(0)); - let next = - handle_list_undelivered(&bus, STATE_SCOPE, json!({"session_id": "s1"}), 100_000).await; - assert_eq!(next["entries"].as_array().unwrap().len(), 0); - } - - - #[tokio::test] - async fn handle_consume_undelivered_respects_limit_and_leaves_remainder() { - let bus = InMemoryStateBus::new(); - for i in 0..5 { - let cid = format!("c{i}"); - let mut rec = transition_record_with_now( - &build_pending_record(&cid, "shell::fs::write", &json!({}), 1_000, 60_000), - "executed", - Some(json!({"ok": true})), - None, - None, - 1_000 + i as u64, - ); - rec.as_object_mut() - .unwrap() - .insert("session_id".into(), Value::String("s1".into())); - bus.set(STATE_SCOPE, &pending_key("s1", &cid), rec) - .await - .unwrap(); - } - let resp = handle_consume_undelivered( - &bus, - STATE_SCOPE, - json!({"session_id": "s1", "turn_id": "turn-7", "limit": 2}), - 100_000, - ) - .await; - assert_eq!(resp["entries"].as_array().unwrap().len(), 2); - assert_eq!(resp["omitted"].as_u64(), Some(3)); - let next = - handle_list_undelivered(&bus, STATE_SCOPE, json!({"session_id": "s1"}), 100_000).await; - assert_eq!(next["entries"].as_array().unwrap().len(), 3); - } - - - #[tokio::test] - async fn handle_consume_undelivered_missing_turn_id_returns_error() { - let bus = InMemoryStateBus::new(); - let resp = handle_consume_undelivered( - &bus, - STATE_SCOPE, - json!({"session_id": "s1"}), - 100_000, - ) - .await; - assert_eq!(resp["ok"], json!(false)); - assert_eq!(resp["error"], json!("missing_turn_id")); - } - - - #[tokio::test] - async fn handle_flush_delivered_stamps_all_unacked_terminals() { - let bus = InMemoryStateBus::new(); - for i in 0..5 { - let cid = format!("c{i}"); - let mut rec = transition_record_with_now( - &build_pending_record(&cid, "shell::fs::write", &json!({}), 1_000, 60_000), - "executed", - Some(json!({"ok": true})), - None, - None, - 1_000 + i as u64, - ); - rec.as_object_mut() - .unwrap() - .insert("session_id".into(), Value::String("s1".into())); - bus.set(STATE_SCOPE, &pending_key("s1", &cid), rec) - .await - .unwrap(); - } - let resp = handle_flush_delivered( - &bus, - STATE_SCOPE, - json!({"session_id": "s1", "turn_id": "manual-flush"}), - ) - .await; - assert_eq!(resp["ok"], json!(true)); - assert_eq!(resp["stamped"].as_u64(), Some(5)); - let next = - handle_list_undelivered(&bus, STATE_SCOPE, json!({"session_id": "s1"}), 100_000).await; - assert_eq!(next["entries"].as_array().unwrap().len(), 0); - } - - - #[tokio::test] - async fn handle_flush_delivered_skips_pending_records() { - let bus = InMemoryStateBus::new(); - bus.set( - STATE_SCOPE, - &pending_key("s1", "c1"), - build_pending_record("c1", "shell::fs::write", &json!({}), 1_000, 60_000), - ) - .await - .unwrap(); - let resp = handle_flush_delivered( - &bus, - STATE_SCOPE, - json!({"session_id": "s1", "turn_id": "manual-flush"}), - ) - .await; - assert_eq!(resp["stamped"].as_u64(), Some(0)); - let still = bus - .get(STATE_SCOPE, &pending_key("s1", "c1")) - .await - .unwrap(); - assert_eq!(still["status"].as_str(), Some("pending")); - assert!(still.get("delivered_in_turn_id").is_none()); - } - - - #[tokio::test] - async fn handle_flush_delivered_idempotent_on_already_stamped() { - let bus = InMemoryStateBus::new(); - let mut rec = transition_record_with_now( - &build_pending_record("c1", "shell::fs::write", &json!({}), 1_000, 60_000), - "executed", - Some(json!({"ok": true})), - None, - None, - 1_500, - ); - { - let obj = rec.as_object_mut().unwrap(); - obj.insert( - "delivered_in_turn_id".into(), - Value::String("turn-prev".into()), - ); - obj.insert("session_id".into(), Value::String("s1".into())); - } - bus.set(STATE_SCOPE, &pending_key("s1", "c1"), rec) - .await - .unwrap(); - let resp = handle_flush_delivered( - &bus, - STATE_SCOPE, - json!({"session_id": "s1", "turn_id": "manual-flush"}), - ) - .await; - assert_eq!(resp["stamped"].as_u64(), Some(0)); - let still = bus - .get(STATE_SCOPE, &pending_key("s1", "c1")) - .await - .unwrap(); - assert_eq!(still["delivered_in_turn_id"].as_str(), Some("turn-prev")); - } - - - #[tokio::test] - async fn handle_list_undelivered_returns_terminal_records_with_no_delivered_stamp() { - let bus = InMemoryStateBus::new(); - let mut r1 = transition_record( - &build_pending_record("c1", "shell::fs::write", &json!({}), 1_000, 60_000), - "executed", - Some(json!({"ok": true})), - None, - None, - ); - r1.as_object_mut() - .unwrap() - .insert("session_id".into(), Value::String("s1".into())); - bus.set(STATE_SCOPE, &pending_key("s1", "c1"), r1) - .await - .unwrap(); - let mut r2 = transition_record( - &build_pending_record("c2", "shell::fs::write", &json!({}), 1_000, 60_000), - "denied", - None, - None, - Some(Denial::UserCorrected { - feedback: "nope".into(), - }), - ); - r2.as_object_mut() - .unwrap() - .insert("session_id".into(), Value::String("s1".into())); - bus.set(STATE_SCOPE, &pending_key("s1", "c2"), r2) - .await - .unwrap(); - - let resp = - handle_list_undelivered(&bus, STATE_SCOPE, json!({"session_id": "s1"}), 100_000).await; - let entries = resp["entries"].as_array().unwrap(); - assert_eq!(entries.len(), 2); - assert_eq!(resp["omitted"].as_u64(), Some(0)); - } - - - #[tokio::test] - async fn handle_list_undelivered_excludes_pending_records() { - let bus = InMemoryStateBus::new(); - bus.set( - STATE_SCOPE, - &pending_key("s1", "c1"), - build_pending_record("c1", "shell::fs::write", &json!({}), 1_000, 60_000), - ) - .await - .unwrap(); - - let resp = - handle_list_undelivered(&bus, STATE_SCOPE, json!({"session_id": "s1"}), 1_500).await; - assert_eq!(resp["entries"].as_array().unwrap().len(), 0); - } - - - #[tokio::test] - async fn handle_list_undelivered_empty_session_returns_empty() { - let bus = InMemoryStateBus::new(); - let resp = - handle_list_undelivered(&bus, STATE_SCOPE, json!({"session_id": "s1"}), 1_500).await; - assert_eq!(resp["entries"], json!([])); - } - - - #[tokio::test] - async fn handle_list_undelivered_excludes_records_stamped_with_delivered_turn_id() { - let bus = InMemoryStateBus::new(); - let mut rec = transition_record( - &build_pending_record("c1", "shell::fs::write", &json!({}), 1_000, 60_000), - "executed", - Some(json!({"ok": true})), - None, - None, - ); - { - let obj = rec.as_object_mut().unwrap(); - obj.insert( - "delivered_in_turn_id".into(), - Value::String("turn-prev".into()), - ); - obj.insert("session_id".into(), Value::String("s1".into())); - } - bus.set(STATE_SCOPE, &pending_key("s1", "c1"), rec) - .await - .unwrap(); - - let mut r2 = transition_record( - &build_pending_record("c2", "shell::fs::write", &json!({}), 1_000, 60_000), - "executed", - Some(json!({"ok": true})), - None, - None, - ); - r2.as_object_mut() - .unwrap() - .insert("session_id".into(), Value::String("s1".into())); - bus.set(STATE_SCOPE, &pending_key("s1", "c2"), r2) - .await - .unwrap(); - - let resp = - handle_list_undelivered(&bus, STATE_SCOPE, json!({"session_id": "s1"}), 100_000).await; - let entries = resp["entries"].as_array().unwrap(); - assert_eq!(entries.len(), 1); - assert_eq!(entries[0]["function_call_id"], "c2"); - } - - - #[tokio::test] - async fn handle_list_undelivered_returns_empty_when_session_id_missing() { - let bus = InMemoryStateBus::new(); - let resp = handle_list_undelivered(&bus, STATE_SCOPE, json!({}), 1_500).await; - assert_eq!(resp["entries"], json!([])); - } - - - #[tokio::test] - async fn handle_ack_delivered_stamps_records_with_turn_id() { - let bus = InMemoryStateBus::new(); - bus.set( - STATE_SCOPE, - &pending_key("s1", "c1"), - transition_record( - &build_pending_record("c1", "shell::fs::write", &json!({}), 1_000, 60_000), - "executed", - Some(json!({"ok": true})), - None, - None, - ), - ) - .await - .unwrap(); - - let resp = handle_ack_delivered( - &bus, - STATE_SCOPE, - json!({ - "session_id": "s1", - "call_ids": ["c1"], - "turn_id": "turn-1", - }), - ) - .await; - assert_eq!(resp["ok"], json!(true)); - assert_eq!(resp["stamped"], json!(1)); - - let rec = bus - .get(STATE_SCOPE, &pending_key("s1", "c1")) - .await - .unwrap(); - assert_eq!(rec["delivered_in_turn_id"], "turn-1"); - } - - - #[tokio::test] - async fn handle_ack_delivered_is_idempotent_keeps_first_turn_id() { - let bus = InMemoryStateBus::new(); - bus.set( - STATE_SCOPE, - &pending_key("s1", "c1"), - transition_record( - &build_pending_record("c1", "shell::fs::write", &json!({}), 1_000, 60_000), - "executed", - Some(json!({"ok": true})), - None, - None, - ), - ) - .await - .unwrap(); - - let _ = handle_ack_delivered( - &bus, - STATE_SCOPE, - json!({ - "session_id": "s1", "call_ids": ["c1"], "turn_id": "turn-first", - }), - ) - .await; - let resp = handle_ack_delivered( - &bus, - STATE_SCOPE, - json!({ - "session_id": "s1", "call_ids": ["c1"], "turn_id": "turn-second", - }), - ) - .await; - assert_eq!(resp["stamped"], json!(0), "second ack must not re-stamp"); - - let rec = bus - .get(STATE_SCOPE, &pending_key("s1", "c1")) - .await - .unwrap(); - assert_eq!(rec["delivered_in_turn_id"], "turn-first"); - } - - - #[tokio::test] - async fn handle_ack_delivered_skips_unknown_call_ids_silently() { - let bus = InMemoryStateBus::new(); - let resp = handle_ack_delivered( - &bus, - STATE_SCOPE, - json!({ - "session_id": "s1", "call_ids": ["ghost"], "turn_id": "turn-1", - }), - ) - .await; - assert_eq!(resp["ok"], json!(true)); - assert_eq!(resp["stamped"], json!(0)); - } - - - #[tokio::test] - async fn list_pending_returns_only_pending_for_session() { - let bus = InMemoryStateBus::new(); - bus.set( - STATE_SCOPE, - &pending_key("s1", "tc-1"), - build_pending_record("tc-1", "write", &json!({}), 0, 60_000), - ) - .await - .unwrap(); - let mut resolved = build_pending_record("tc-2", "write", &json!({}), 0, 60_000); - resolved["status"] = json!("allow"); - bus.set(STATE_SCOPE, &pending_key("s1", "tc-2"), resolved) - .await - .unwrap(); - bus.set( - STATE_SCOPE, - &pending_key("other", "tc-3"), - build_pending_record("tc-3", "write", &json!({}), 0, 60_000), - ) - .await - .unwrap(); - - let out = handle_list_pending(&bus, STATE_SCOPE, json!({ "session_id": "s1" })).await; - let items = out["pending"].as_array().unwrap(); - assert_eq!(items.len(), 1); - assert_eq!(items[0]["function_call_id"], "tc-1"); - } - - - #[tokio::test] - async fn handle_sweep_session_flips_pending_records_to_timed_out() { - let bus = InMemoryStateBus::new(); - bus.set( - STATE_SCOPE, - &pending_key("s1", "c1"), - build_pending_record("c1", "shell::fs::write", &json!({}), 1_000, 60_000), - ) - .await - .unwrap(); - - let resp = handle_sweep_session(&bus, STATE_SCOPE, json!({"session_id": "s1"})).await; - assert_eq!(resp["swept"], json!(1)); - - let rec = bus - .get(STATE_SCOPE, &pending_key("s1", "c1")) - .await - .unwrap(); - assert_eq!(rec["status"], "timed_out"); - // sweep_session no longer stamps a reason string — timed_out is - // self-describing per the Denial refactor. - assert!(rec.get("denial").is_none()); - assert!(rec.get("decision_reason").is_none()); - } - - - #[tokio::test] - async fn handle_sweep_session_ignores_legacy_reason_payload_field() { - // Old callers may still pass `reason` — approval-gate accepts the - // payload but does not persist it. Behavior is identical to a - // bare {session_id} payload. - let bus = InMemoryStateBus::new(); - bus.set( - STATE_SCOPE, - &pending_key("s1", "c1"), - build_pending_record("c1", "shell::fs::write", &json!({}), 1_000, 60_000), - ) - .await - .unwrap(); - let resp = handle_sweep_session( - &bus, - STATE_SCOPE, - json!({"session_id": "s1", "reason": "run_stopped"}), - ) - .await; - assert_eq!(resp["swept"], json!(1)); - let rec = bus - .get(STATE_SCOPE, &pending_key("s1", "c1")) - .await - .unwrap(); - assert_eq!(rec["status"], "timed_out"); - assert!(rec.get("denial").is_none()); - } - - - #[tokio::test] - async fn handle_sweep_session_skips_non_pending_records() { - let bus = InMemoryStateBus::new(); - bus.set( - STATE_SCOPE, - &pending_key("s1", "c1"), - transition_record( - &build_pending_record("c1", "shell::fs::write", &json!({}), 1_000, 60_000), - "executed", - Some(json!({"ok": true})), - None, - None, - ), - ) - .await - .unwrap(); - - let resp = handle_sweep_session(&bus, STATE_SCOPE, json!({"session_id": "s1"})).await; - assert_eq!(resp["swept"], json!(0)); - - let rec = bus - .get(STATE_SCOPE, &pending_key("s1", "c1")) - .await - .unwrap(); - assert_eq!(rec["status"], "executed"); - } - - - #[tokio::test] - async fn handle_sweep_session_returns_error_when_session_id_missing() { - let bus = InMemoryStateBus::new(); - let resp = handle_sweep_session(&bus, STATE_SCOPE, json!({})).await; - assert_eq!(resp["ok"], json!(false)); - assert_eq!(resp["error"], "missing_session_id"); - assert_eq!(resp["swept"], json!(0)); - } - - - #[tokio::test] - async fn handle_ack_delivered_returns_zero_when_only_one_field_is_empty() { - // mutant L677: two `||` operators in the empty-field guard. - let bus = InMemoryStateBus::new(); - // empty turn_id - let r1 = handle_ack_delivered( - &bus, - STATE_SCOPE, - json!({"session_id": "s", "turn_id": "", "call_ids": ["c"]}), - ) - .await; - assert_eq!(r1["stamped"], json!(0)); - // empty call_ids - let r2 = handle_ack_delivered( - &bus, - STATE_SCOPE, - json!({"session_id": "s", "turn_id": "t", "call_ids": []}), - ) - .await; - assert_eq!(r2["stamped"], json!(0)); - // empty session_id - let r3 = handle_ack_delivered( - &bus, - STATE_SCOPE, - json!({"session_id": "", "turn_id": "t", "call_ids": ["c"]}), - ) - .await; - assert_eq!(r3["stamped"], json!(0)); - } - - - #[tokio::test] - async fn handle_ack_delivered_short_circuits_before_stamping_on_one_empty_field() { - // mutant L677 — two `||` operators. If either flips to `&&`, the - // function falls through and stamps a record even when a required - // field is empty. Seed a record so the stamping path can be - // observed. - let bus = InMemoryStateBus::new(); - let terminal = transition_record( - &build_pending_record("c", "shell::fs::write", &json!({}), 0, 60_000), - "executed", - Some(json!({"ok": true})), - None, - None, - ); - bus.set(STATE_SCOPE, &pending_key("s", "c"), terminal) - .await - .unwrap(); - - // empty turn_id — must NOT stamp the seeded record. - let r = handle_ack_delivered( - &bus, - STATE_SCOPE, - json!({"session_id": "s", "turn_id": "", "call_ids": ["c"]}), - ) - .await; - assert_eq!(r["stamped"], json!(0)); - let stored = bus.get(STATE_SCOPE, &pending_key("s", "c")).await.unwrap(); - assert!( - stored.get("delivered_in_turn_id").is_none(), - "must not stamp when turn_id is empty; mutant would stamp" - ); - - // empty call_ids — same property. - let r = handle_ack_delivered( - &bus, - STATE_SCOPE, - json!({"session_id": "s", "turn_id": "t", "call_ids": []}), - ) - .await; - assert_eq!(r["stamped"], json!(0)); - let stored = bus.get(STATE_SCOPE, &pending_key("s", "c")).await.unwrap(); - assert!( - stored.get("delivered_in_turn_id").is_none(), - "must not stamp when call_ids is empty" - ); - } diff --git a/approval-gate/tests/integration.rs b/approval-gate/tests/integration.rs deleted file mode 100644 index 80b05835..00000000 --- a/approval-gate/tests/integration.rs +++ /dev/null @@ -1,150 +0,0 @@ -//! Engine-backed test for approval-gate. Connects to an in-process / -//! local iii engine, registers the gate, fires a `before_function_call` -//! envelope on a per-test topic, posts `approval::resolve`, and asserts -//! the trigger model behavior. -//! -//! Skips cleanly when no engine is reachable so `cargo test` stays green -//! in CI without a running engine. - -use std::time::Duration; - -use approval_gate::{register, WorkerConfig, FN_LIST_UNDELIVERED, FN_RESOLVE, STATE_SCOPE}; -use iii_sdk::{register_worker, InitOptions, TriggerRequest}; -use serde_json::json; - -const DEFAULT_ENGINE_URL: &str = "ws://127.0.0.1:49134"; -const ENGINE_PROBE_TIMEOUT_MS: u64 = 500; - -#[tokio::test] -async fn round_trip_allow_returns_pending_immediately_and_executes_on_resolve() { - let url = std::env::var("III_URL").unwrap_or_else(|_| DEFAULT_ENGINE_URL.to_string()); - let iii = register_worker(&url, InitOptions::default()); - - // Probe the engine with a short-timeout state::get; if it errors, - // assume no engine is running locally and skip cleanly. - let probe = iii - .trigger(TriggerRequest { - function_id: "state::get".into(), - payload: json!({ "scope": STATE_SCOPE, "key": "__probe__" }), - action: None, - timeout_ms: Some(ENGINE_PROBE_TIMEOUT_MS), - }) - .await; - if probe.is_err() { - eprintln!("skipping: no engine at {url}"); - return; - } - - // Use a unique topic per run so concurrent test runs don't collide, - // and so we don't race the production approval-gate worker if one is - // already subscribed to the default topic. - let nonce = std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .map(|d| d.as_nanos()) - .unwrap_or(0); - let topic = format!("agent::before_function_call::it_{nonce}"); - let session_id = format!("approval-it-{nonce}"); - let function_call_id = format!("tc-it-{nonce}"); - let event_id = format!("evt-it-{nonce}"); - let reply_stream = format!("rs-it-{nonce}"); - - let _refs = register( - &iii, - &WorkerConfig { - topic: topic.clone(), - default_timeout_ms: 5_000, - ..WorkerConfig::default() - }, - ) - .expect("register approval-gate"); - - let envelope = json!({ - "event_id": event_id, - "reply_stream": reply_stream, - "payload": { - "session_id": session_id, - "function_call": { - "id": function_call_id, - "function_id": "shell::filesystem::write", - "arguments": {}, - }, - "approval_required": ["shell::filesystem::write"], - } - }); - - // Drive the subscriber by directly triggering its function id. - // In the trigger model, it returns immediately with block=true + pending. - let reply = iii - .trigger(TriggerRequest { - function_id: "policy::approval_gate".into(), - payload: envelope, - action: None, - timeout_ms: Some(10_000), - }) - .await - .expect("subscriber trigger ok"); - - assert_eq!(reply["block"], true, "subscriber reply: {reply}"); - assert_eq!(reply["status"], "pending", "subscriber reply: {reply}"); - - // Wait for the gate to write the pending record before we resolve. - let key = format!("{session_id}/{function_call_id}"); - let mut tries = 0; - loop { - let v = iii - .trigger(TriggerRequest { - function_id: "state::get".into(), - payload: json!({ "scope": STATE_SCOPE, "key": key }), - action: None, - timeout_ms: Some(1_000), - }) - .await - .unwrap_or(json!(null)); - if v.get("status").and_then(|s| s.as_str()) == Some("pending") { - break; - } - tries += 1; - assert!(tries < 40, "pending entry never appeared (key={key})"); - tokio::time::sleep(Duration::from_millis(50)).await; - } - - // Post the allow decision. - let resolve = iii - .trigger(TriggerRequest { - function_id: FN_RESOLVE.into(), - payload: json!({ - "session_id": session_id, - "function_call_id": function_call_id, - "decision": "allow", - }), - action: None, - timeout_ms: Some(5_000), - }) - .await - .expect("resolve trigger"); - assert_eq!(resolve["ok"], true, "resolve response: {resolve}"); - - // The underlying function "shell::filesystem::write" doesn't exist in - // the test engine, so the invocation will fail and the record should be - // "failed". Verify it surfaced in list_undelivered. - tokio::time::sleep(Duration::from_millis(100)).await; - let undelivered = iii - .trigger(TriggerRequest { - function_id: FN_LIST_UNDELIVERED.into(), - payload: json!({ "session_id": session_id }), - action: None, - timeout_ms: Some(5_000), - }) - .await - .expect("list_undelivered ok"); - let entries = undelivered["entries"].as_array().expect("entries array"); - let our_entry = entries - .iter() - .find(|e| e["function_call_id"] == function_call_id) - .expect("our entry in undelivered list"); - assert!( - our_entry["status"] == "failed" || our_entry["status"] == "executed", - "unexpected status: {}", - our_entry["status"] - ); -} diff --git a/approval-gate/tests/intercept.rs b/approval-gate/tests/intercept.rs deleted file mode 100644 index fa5a0c35..00000000 --- a/approval-gate/tests/intercept.rs +++ /dev/null @@ -1,218 +0,0 @@ -//! handle_intercept — the gate's intercept-time decision path. -//! Covers replay handling, fail-closed on state-write errors, the -//! session_id stamping, and the force_pending classifier branch. - -mod common; - -use approval_gate::*; -use common::{empty_policy_rules, sample_call, FailingStateBus, FakeExecutor, InMemoryStateBus}; -use serde_json::{json, Value}; -use std::sync::Mutex; - - - - #[tokio::test] - async fn handle_intercept_returns_pending_envelope_when_call_is_gated() { - let bus = InMemoryStateBus::new(); - let call = sample_call(); - let reply = handle_intercept(&bus, STATE_SCOPE, &call, 1_000, 60_000, false).await; - assert_eq!(reply["block"], json!(true)); - assert_eq!(reply["status"], json!("pending")); - assert_eq!(reply["call_id"], json!("tc-1")); - assert_eq!(reply["function_id"], json!("shell::fs::write")); - // Pending status is self-describing — no `reason` or `denial` field - // is emitted while the call is in-flight. - assert!(reply.get("reason").is_none()); - assert!(reply.get("denial").is_none()); - } - - - #[tokio::test] - async fn handle_intercept_writes_pending_record_to_state() { - let bus = InMemoryStateBus::new(); - let call = sample_call(); - let _ = handle_intercept(&bus, STATE_SCOPE, &call, 1_000, 60_000, false).await; - let key = pending_key(&call.session_id, &call.function_call_id); - let rec = bus - .get(STATE_SCOPE, &key) - .await - .expect("pending record written"); - assert_eq!(rec["status"], "pending"); - assert_eq!(rec["function_call_id"], "tc-1"); - assert_eq!(rec["expires_at"], 61_000); - } - - - #[tokio::test] - async fn handle_intercept_passes_through_when_call_is_not_gated() { - let bus = InMemoryStateBus::new(); - let mut call = sample_call(); - call.approval_required = vec!["other".into()]; - let reply = handle_intercept(&bus, STATE_SCOPE, &call, 1_000, 60_000, false).await; - assert_eq!(reply["block"], json!(false)); - let key = pending_key(&call.session_id, &call.function_call_id); - assert!( - bus.get(STATE_SCOPE, &key).await.is_none(), - "no record written" - ); - } - - - #[tokio::test] - async fn handle_intercept_force_pending_writes_when_not_on_required_list() { - let bus = InMemoryStateBus::new(); - let mut call = sample_call(); - call.approval_required = vec!["other".into()]; - let reply = handle_intercept(&bus, STATE_SCOPE, &call, 1_000, 60_000, true).await; - assert_eq!(reply["block"], json!(true)); - assert_eq!(reply["status"], json!("pending")); - let key = pending_key(&call.session_id, &call.function_call_id); - assert!(bus.get(STATE_SCOPE, &key).await.is_some()); - } - - - #[tokio::test] - async fn handle_intercept_fails_closed_on_state_write_error() { - let bus = FailingStateBus; - let call = sample_call(); - let reply = handle_intercept(&bus, STATE_SCOPE, &call, 1_000, 60_000, false).await; - assert_eq!( - reply["block"], - json!(true), - "state write failure must NOT fail-open" - ); - assert_eq!(reply["status"], json!("denied")); - assert_eq!(reply["denial"]["kind"], json!("state_error")); - assert_eq!( - reply["denial"]["detail"]["phase"], - json!("intercept_write_pending") - ); - // The underlying error message is present but its exact text is - // bus-implementation-specific; just check it's non-empty. - assert!( - reply["denial"]["detail"]["error"] - .as_str() - .map(|s| !s.is_empty()) - .unwrap_or(false), - "state_error detail must include error message: {reply}" - ); - assert_eq!(reply["function_id"], json!("shell::fs::write")); - } - - - #[tokio::test] - async fn handle_intercept_stamps_session_id_into_pending_record() { - let bus = InMemoryStateBus::new(); - let call = sample_call(); - let _ = handle_intercept(&bus, STATE_SCOPE, &call, 1_000, 60_000, false).await; - let rec = bus - .get( - STATE_SCOPE, - &pending_key(&call.session_id, &call.function_call_id), - ) - .await - .expect("pending record"); - assert_eq!(rec["session_id"], json!(call.session_id)); - } - - - // ── Boundary + edge-case tests prompted by cargo-mutants survivors ──── - // - // Each test corresponds to a mutant the test suite previously didn't - // catch. Test name → mutated line in src/lib.rs. - - #[tokio::test] - async fn handle_intercept_replay_of_terminal_record_returns_already_resolved() { - // mutant L331: replace `==` with `!=` in the replay defense — if - // flipped, terminal records would be overwritten with fresh pending. - let bus = InMemoryStateBus::new(); - let call = sample_call(); - let key = pending_key(&call.session_id, &call.function_call_id); - let terminal = transition_record( - &build_pending_record( - &call.function_call_id, - &call.function_id, - &call.args, - 0, - 60_000, - ), - "executed", - Some(json!({"ok": true})), - None, - None, - ); - bus.set(STATE_SCOPE, &key, terminal).await.unwrap(); - - let reply = handle_intercept(&bus, STATE_SCOPE, &call, 1_000, 60_000, false).await; - assert_eq!(reply["block"], json!(true)); - assert_eq!(reply["status"], json!("executed")); - // Replay reply: status carries the prior outcome, `replay` discriminator - // says we're echoing rather than denying afresh, and no `denial` is - // synthesized (the historical record is the source of truth). - assert_eq!(reply["replay"], json!("already_resolved")); - assert!(reply.get("denial").is_none()); - assert!(reply.get("reason").is_none()); - - // Crucial: the stored row is still `executed`, not overwritten. - let stored = bus.get(STATE_SCOPE, &key).await.unwrap(); - assert_eq!(stored["status"], json!("executed")); - assert_eq!(stored["result"], json!({"ok": true})); - } - - - #[tokio::test] - async fn handle_intercept_replay_of_pending_record_preserves_expires_at() { - // mutant L331: same branch, pending side. New pending must not bump - // the expires_at on the existing row. - let bus = InMemoryStateBus::new(); - let call = sample_call(); - let key = pending_key(&call.session_id, &call.function_call_id); - let pending = build_pending_record( - &call.function_call_id, - &call.function_id, - &call.args, - 0, - 60_000, - ); - bus.set(STATE_SCOPE, &key, pending.clone()).await.unwrap(); - - let _ = handle_intercept(&bus, STATE_SCOPE, &call, 999_000, 60_000, false).await; - let stored = bus.get(STATE_SCOPE, &key).await.unwrap(); - assert_eq!( - stored["expires_at"], pending["expires_at"], - "replay must not bump expires_at on the live row" - ); - } - - - #[tokio::test] - async fn handle_intercept_replay_of_approved_record_preserves_state() { - // mutant L331:42 — replace `==` with `!=` on the "approved" side. - // The L331:19 mutation is killed by the *_pending_* test above; - // this one requires an approved record specifically. - let bus = InMemoryStateBus::new(); - let call = sample_call(); - let key = pending_key(&call.session_id, &call.function_call_id); - let approved = transition_record( - &build_pending_record( - &call.function_call_id, - &call.function_id, - &call.args, - 0, - 60_000, - ), - "approved", - None, - None, - None, - ); - bus.set(STATE_SCOPE, &key, approved.clone()).await.unwrap(); - - let _ = handle_intercept(&bus, STATE_SCOPE, &call, 999_000, 60_000, false).await; - let stored = bus.get(STATE_SCOPE, &key).await.unwrap(); - assert_eq!( - stored["status"], - json!("approved"), - "replay of approved row must keep status; mutant would overwrite with pending" - ); - } diff --git a/approval-gate/tests/lifecycle.rs b/approval-gate/tests/lifecycle.rs index 24036d24..6e603e75 100644 --- a/approval-gate/tests/lifecycle.rs +++ b/approval-gate/tests/lifecycle.rs @@ -1,271 +1,244 @@ -//! Record-lifecycle helpers: build_pending_record, transition_record, -//! maybe_flip_timed_out, collect_timed_out_for_sweep, plus the small -//! is_terminal_status / pending_key utilities. +//! End-to-end lifecycle tests for the approval-gate simplification (T17). +//! +//! Each test exercises the full intercept → resolve → consume flow against +//! the in-memory `StateBus` + `FunctionExecutor` fakes. These are the +//! integration safety net for the whole refactor — they assert that the +//! pieces snap together correctly without an iii engine. +//! +//! Four flows: +//! 1. Allow path — full Ask → Allow → Executed → consume drains it +//! 2. Deny path — full Ask → Deny(UserCorrected) → consume drains it +//! 3. Timeout path — Pending past expires_at → consume lazy-flips → drains it +//! 4. Cascade path — two Pending rows; allow+always; both end up consumed mod common; -use approval_gate::*; -use common::{empty_policy_rules, sample_call, FailingStateBus, FakeExecutor, InMemoryStateBus}; +use approval_gate::record::{Outcome, Record, Status}; +use approval_gate::rules::{Action, Rule, Ruleset}; +use approval_gate::{ + handle_consume, handle_intercept, handle_resolve, IncomingCall, StateBus, STATE_SCOPE, +}; +use common::{FakeExecutor, InMemoryStateBus}; use serde_json::{json, Value}; -use std::sync::Mutex; - - - - #[test] - fn maybe_flip_timed_out_returns_some_when_pending_and_expired() { - let rec = build_pending_record("tc-1", "shell::fs::write", &json!({}), 1_000, 60_000); - let flipped = maybe_flip_timed_out(&rec, 70_000).expect("should flip"); - assert_eq!(flipped["status"], "timed_out"); - // Timeout carries no Denial — the status alone explains the outcome. - assert!(flipped.get("denial").is_none()); - assert!(flipped.get("decision_reason").is_none()); - } - - - #[test] - fn maybe_flip_timed_out_returns_none_when_pending_and_not_expired() { - let rec = build_pending_record("tc-1", "shell::fs::write", &json!({}), 1_000, 60_000); - assert!(maybe_flip_timed_out(&rec, 60_000).is_none()); - assert!(maybe_flip_timed_out(&rec, 1_500).is_none()); - } - - - #[test] - fn maybe_flip_timed_out_returns_none_when_not_pending() { - let rec = json!({ - "function_call_id": "tc-1", - "status": "executed", - "expires_at": 1_000_u64, - }); - assert!(maybe_flip_timed_out(&rec, 999_999_999).is_none()); - } - - - #[test] - fn transition_record_stamps_resolved_at_for_terminal_status() { - let base = build_pending_record("c1", "shell::fs::write", &json!({}), 1_000, 60_000); - let rec = transition_record_with_now( - &base, - "executed", - Some(json!({"ok": true})), - None, - None, - 12_345, - ); - assert_eq!(rec["resolved_at"].as_u64(), Some(12_345)); - } - - - #[test] - fn transition_record_preserves_existing_resolved_at_on_relift() { - let base = build_pending_record("c1", "shell::fs::write", &json!({}), 1_000, 60_000); - let first = transition_record_with_now( - &base, - "executed", - Some(json!({"ok": true})), - None, - None, - 12_345, - ); - let second = transition_record_with_now( - &first, - "executed", - Some(json!({"ok": true})), - None, - None, - 99_999, - ); - assert_eq!(second["resolved_at"].as_u64(), Some(12_345)); - } - - - #[test] - fn transition_record_does_not_stamp_resolved_at_for_intermediate_status() { - let base = build_pending_record("c1", "shell::fs::write", &json!({}), 1_000, 60_000); - let rec = - transition_record_with_now(&base, "approved", None, None, None, 12_345); - assert!(rec.get("resolved_at").is_none()); - } - - - #[test] - fn is_terminal_status_returns_true_for_terminal_states() { - assert!(is_terminal_status("executed")); - assert!(is_terminal_status("failed")); - assert!(is_terminal_status("denied")); - assert!(is_terminal_status("timed_out")); - } - - - #[test] - fn is_terminal_status_returns_false_for_in_progress_states() { - assert!(!is_terminal_status("pending")); - assert!(!is_terminal_status("approved")); - assert!(!is_terminal_status("anything_else")); - assert!(!is_terminal_status("")); - } - - - #[test] - fn pending_key_includes_session_and_tool_call_id() { - assert_eq!(pending_key("s1", "tc-1"), "s1/tc-1"); - } - - - #[test] - fn build_pending_record_sets_status_and_expiry() { - let now = 1_000_000; - let rec = build_pending_record("tc-1", "write", &json!({"x": 1}), now, 60_000); - assert_eq!(rec["status"], "pending"); - assert_eq!(rec["function_call_id"], "tc-1"); - assert_eq!(rec["expires_at"], 1_060_000); - } - - - #[test] - fn transition_record_to_executed_attaches_result() { - let base = build_pending_record( - "tc-1", - "shell::fs::write", - &json!({"path":"/a"}), - 1_000, - 60_000, - ); - let rec = transition_record(&base, "executed", Some(json!({"ok": true})), None, None); - assert_eq!(rec["status"], "executed"); - assert_eq!(rec["result"], json!({"ok": true})); - assert!(rec.get("error").is_none() || rec["error"].is_null()); - assert_eq!(rec["function_call_id"], "tc-1"); - assert_eq!(rec["function_id"], "shell::fs::write"); - } - - - #[test] - fn transition_record_to_failed_attaches_error() { - let base = build_pending_record("tc-1", "shell::fs::write", &json!({}), 1_000, 60_000); - let rec = transition_record(&base, "failed", None, Some("EACCES".into()), None); - assert_eq!(rec["status"], "failed"); - assert_eq!(rec["error"], "EACCES"); - assert!(rec.get("result").is_none() || rec["result"].is_null()); - } - - - #[test] - fn transition_record_to_denied_attaches_structured_denial() { - let base = build_pending_record("tc-1", "shell::fs::write", &json!({}), 1_000, 60_000); - let rec = transition_record( - &base, - "denied", - None, - None, - Some(Denial::Policy { - rule_permission: "shell::fs::write".into(), - rule_pattern: "*".into(), - }), - ); - assert_eq!(rec["status"], "denied"); - assert_eq!(rec["denial"]["kind"], "policy"); - assert_eq!(rec["denial"]["detail"]["rule_permission"], "shell::fs::write"); - assert!( - rec.get("decision_reason").is_none(), - "legacy decision_reason must not be written: {rec}" - ); - } - - - #[test] - fn transition_record_to_timed_out_carries_no_denial() { - // Timeout status is self-describing — no Denial attached. - let base = build_pending_record("tc-1", "shell::fs::write", &json!({}), 1_000, 60_000); - let rec = transition_record(&base, "timed_out", None, None, None); - assert_eq!(rec["status"], "timed_out"); - assert!(rec.get("denial").is_none()); - assert!(rec.get("decision_reason").is_none()); - } - - - #[test] - fn transition_record_preserves_delivered_in_turn_id_when_set() { - let mut base = build_pending_record("tc-1", "shell::fs::write", &json!({}), 1_000, 60_000); - base.as_object_mut().unwrap().insert( - "delivered_in_turn_id".into(), - Value::String("turn-X".into()), - ); - let rec = transition_record(&base, "executed", Some(json!({"ok": true})), None, None); - assert_eq!(rec["delivered_in_turn_id"], "turn-X"); - } - - - #[test] - fn collect_timed_out_for_sweep_returns_expired_records_with_session_id() { - let mut rec = build_pending_record("tc-1", "shell::fs::write", &json!({}), 0, 60_000); - rec.as_object_mut() - .unwrap() - .insert("session_id".into(), json!("s-42")); - let pile = vec![ - rec.clone(), - build_pending_record("tc-2", "shell::fs::write", &json!({}), 0, 999_999_999), - ]; - let out = collect_timed_out_for_sweep(&pile, 70_000); - assert_eq!(out.len(), 1); - let (key, flipped, session_id, call_id) = &out[0]; - assert_eq!(key, "s-42/tc-1"); - assert_eq!(session_id, "s-42"); - assert_eq!(call_id, "tc-1"); - assert_eq!(flipped["status"], json!("timed_out")); - // Timeout carries no Denial — status is self-describing. - assert!(flipped.get("denial").is_none()); - assert!(flipped.get("decision_reason").is_none()); - } - - - #[test] - fn collect_timed_out_for_sweep_skips_records_without_session_id() { - // Legacy row (pre-session_id-stamping fix). The sweeper can't - // address the right session stream, so it must skip silently — - // lazy-flip on read will still pick it up. - let pile = vec![build_pending_record( - "tc-legacy", - "shell::fs::write", - &json!({}), - 0, - 60_000, - )]; - let out = collect_timed_out_for_sweep(&pile, 70_000); - assert!( - out.is_empty(), - "legacy record without session_id must not be swept" - ); - } - - - #[test] - fn collect_timed_out_for_sweep_rejects_record_missing_only_call_id() { - // mutant L423: `||` → `&&` would let one-empty records sweep. - let mut rec = build_pending_record("c1", "shell::fs::write", &json!({}), 0, 60_000); - rec.as_object_mut() - .unwrap() - .insert("session_id".into(), json!("s1")); - rec.as_object_mut() - .unwrap() - .insert("function_call_id".into(), json!("")); - let out = collect_timed_out_for_sweep(&[rec], 70_000); - assert!(out.is_empty(), "empty function_call_id must skip sweep"); - } - - - #[test] - fn maybe_flip_timed_out_flips_at_exact_expires_at() { - // mutant L439: `<` → `<=` would not flip at the exact boundary. - let rec = build_pending_record("c1", "f", &json!({}), 0, 60_000); - // expires_at = 0 + 60_000 = 60_000. At now=60_000 the gate - // considers the record expired (strictly past or AT expiry). - assert!( - maybe_flip_timed_out(&rec, 60_000).is_some(), - "must flip at exactly expires_at" - ); - assert!( - maybe_flip_timed_out(&rec, 59_999).is_none(), - "must not flip one ms before expires_at" - ); - } +use std::sync::{Arc, RwLock}; + +fn call(session: &str, cid: &str, fn_id: &str, args: Value) -> IncomingCall { + IncomingCall { + session_id: session.into(), + function_call_id: cid.into(), + function_id: fn_id.into(), + args, + approval_required: Vec::new(), + event_id: format!("evt-{cid}"), + reply_stream: format!("rs-{cid}"), + } +} + +fn ruleset_with(rules: Vec) -> Arc> { + Arc::new(RwLock::new(rules)) +} + +#[tokio::test] +async fn allow_path_end_to_end() { + let bus = InMemoryStateBus::new(); + let exec = FakeExecutor::default(); + *exec.response.lock().unwrap() = Some(Ok(json!({ "stdout": "hello\n" }))); + // Empty ruleset → verdict defaults to Ask → intercept writes Pending. + let policy_rules = ruleset_with(vec![]); + + // 1. Hook fires; gate writes Pending. + let incoming = call("sess_allow", "tc-1", "shell::exec", + json!({"command": "echo", "args": ["hello"]})); + let reply = handle_intercept( + &bus, STATE_SCOPE, &incoming, &policy_rules.read().unwrap(), + 1_000, 60_000, + ).await; + assert_eq!(reply["status"], "pending"); + + // 2. Operator resolves allow. + let payload = json!({ + "session_id": "sess_allow", + "function_call_id": "tc-1", + "decision": "allow", + }); + let resolve_reply = handle_resolve(&bus, &exec, STATE_SCOPE, &policy_rules, payload, 2_000).await; + assert_eq!(resolve_reply["ok"], true); + + // Executor must have been called exactly once with the original argv. + let calls = exec.calls.lock().unwrap(); + assert_eq!(calls.len(), 1); + assert_eq!(calls[0].0, "shell::exec"); + assert_eq!(calls[0].1["command"], "echo"); + drop(calls); + + // 3. Consume drains the Done row. + let consume_reply = handle_consume( + &bus, STATE_SCOPE, + json!({ "session_id": "sess_allow" }), 3_000, + ).await; + assert_eq!(consume_reply["ok"], true); + let entries = consume_reply["entries"].as_array().unwrap(); + assert_eq!(entries.len(), 1); + assert_eq!(entries[0]["outcome"]["kind"], "executed"); + assert_eq!(entries[0]["outcome"]["detail"]["result"]["stdout"], "hello\n"); + + // 4. Row gone from state. + let leftover = bus.list_prefix(STATE_SCOPE, "sess_allow/").await; + assert!(leftover.is_empty(), "consume must delete drained rows"); +} + +#[tokio::test] +async fn deny_path_with_user_corrected_feedback_end_to_end() { + let bus = InMemoryStateBus::new(); + let exec = FakeExecutor::default(); + let policy_rules = ruleset_with(vec![]); + + let incoming = call("sess_deny", "tc-2", "shell::exec", + json!({"command": "rm", "args": ["-rf", "/tmp/x"]})); + handle_intercept( + &bus, STATE_SCOPE, &incoming, &policy_rules.read().unwrap(), + 1_000, 60_000, + ).await; + + // Operator denies with a correction message. + let payload = json!({ + "session_id": "sess_deny", + "function_call_id": "tc-2", + "decision": "deny", + "denial": { + "kind": "user_corrected", + "detail": { "feedback": "wrong path, use /tmp/y" }, + }, + }); + let r = handle_resolve(&bus, &exec, STATE_SCOPE, &policy_rules, payload, 2_000).await; + assert_eq!(r["ok"], true); + // Shell must NOT have been invoked. + assert_eq!(exec.calls.lock().unwrap().len(), 0); + + let consume = handle_consume( + &bus, STATE_SCOPE, + json!({ "session_id": "sess_deny" }), 3_000, + ).await; + let entries = consume["entries"].as_array().unwrap(); + assert_eq!(entries.len(), 1); + assert_eq!(entries[0]["outcome"]["kind"], "denied"); + assert_eq!( + entries[0]["outcome"]["detail"]["denial"]["kind"], + "user_corrected", + ); + assert_eq!( + entries[0]["outcome"]["detail"]["denial"]["detail"]["feedback"], + "wrong path, use /tmp/y", + ); + + let leftover = bus.list_prefix(STATE_SCOPE, "sess_deny/").await; + assert!(leftover.is_empty()); +} + +#[tokio::test] +async fn timeout_path_lazy_flips_on_consume_end_to_end() { + let bus = InMemoryStateBus::new(); + let policy_rules = ruleset_with(vec![]); + + // Seed a Pending row with a very short timeout. + let incoming = call("sess_timeout", "tc-3", "shell::exec", + json!({"command": "ls"})); + handle_intercept( + &bus, STATE_SCOPE, &incoming, &policy_rules.read().unwrap(), + 1_000, // now_ms + 1, // timeout_ms → expires_at = 1_001 + ).await; + + // Consume at now=2_000, well past expires_at. Lazy flip + return. + let consume = handle_consume( + &bus, STATE_SCOPE, + json!({ "session_id": "sess_timeout" }), 2_000, + ).await; + let entries = consume["entries"].as_array().unwrap(); + assert_eq!(entries.len(), 1); + assert_eq!(entries[0]["outcome"]["kind"], "timed_out"); + + let leftover = bus.list_prefix(STATE_SCOPE, "sess_timeout/").await; + assert!(leftover.is_empty()); +} + +#[tokio::test] +async fn cascade_path_end_to_end() { + let bus = InMemoryStateBus::new(); + let exec = FakeExecutor::default(); + *exec.response.lock().unwrap() = Some(Ok(json!({ "stdout": "" }))); + let policy_rules = ruleset_with(vec![]); + + // Two pending rows for the SAME argv shape — `cascade_allow_for_session` + // pushes an exact-pattern Allow rule from the first row's args; the + // second row should then verdict as Allow and auto-resolve. + for cid in ["tc-4", "tc-5"] { + let incoming = call("sess_cascade", cid, "shell::exec", + json!({"command": "echo", "args": ["go"]})); + handle_intercept( + &bus, STATE_SCOPE, &incoming, &policy_rules.read().unwrap(), + 1_000, 60_000, + ).await; + } + + // Resolve tc-4 with always:true. + let payload = json!({ + "session_id": "sess_cascade", + "function_call_id": "tc-4", + "decision": "allow", + "always": true, + }); + let r = handle_resolve(&bus, &exec, STATE_SCOPE, &policy_rules, payload, 2_000).await; + assert_eq!(r["ok"], true); + assert_eq!(r["cascaded"], 1, "one extra row (tc-5) must have auto-resolved"); + + // Executor called twice (originator + cascade). + assert_eq!(exec.calls.lock().unwrap().len(), 2); + + // Both rows in state as Done(Executed). Consume drains them. + let consume = handle_consume( + &bus, STATE_SCOPE, + json!({ "session_id": "sess_cascade" }), 3_000, + ).await; + let entries = consume["entries"].as_array().unwrap(); + assert_eq!(entries.len(), 2); + for e in entries { + assert_eq!(e["outcome"]["kind"], "executed"); + } + + // Ruleset gained the runtime Allow rule with the exact pattern. + let rs = policy_rules.read().unwrap(); + let pushed = rs.last().expect("cascade must push a rule"); + assert_eq!(pushed.action, Action::Allow); + assert_eq!(pushed.permission, "shell::exec"); + assert_eq!( + pushed.pattern, "echo go", + "exact-pattern push (NOT blanket '*') — 'always allow echo go' does not grant rm -rf /", + ); + + let leftover = bus.list_prefix(STATE_SCOPE, "sess_cascade/").await; + assert!(leftover.is_empty()); +} + +#[tokio::test] +async fn allow_rule_short_circuits_with_no_state_write() { + // Bonus: not from the plan, but worth pinning. A Verdict::Allow at + // intercept time must NOT write a Pending row (no state, no consume). + let bus = InMemoryStateBus::new(); + let policy_rules = ruleset_with(vec![Rule { + permission: "shell::exec".into(), + pattern: "git status*".into(), + action: Action::Allow, + }]); + + let incoming = call("sess_pass", "tc-6", "shell::exec", + json!({"command": "git", "args": ["status"]})); + let reply = handle_intercept( + &bus, STATE_SCOPE, &incoming, &policy_rules.read().unwrap(), + 1_000, 60_000, + ).await; + assert_eq!(reply["block"], false); + + let leftover = bus.list_prefix(STATE_SCOPE, "sess_pass/").await; + assert!(leftover.is_empty(), "Allow path must not touch state"); +} diff --git a/approval-gate/tests/misc.rs b/approval-gate/tests/misc.rs deleted file mode 100644 index 2cc96f69..00000000 --- a/approval-gate/tests/misc.rs +++ /dev/null @@ -1,87 +0,0 @@ -//! Miscellaneous: function-id constants, marker-target validation, -//! and the FakeExecutor recording-of-calls smoke test. - -mod common; - -use approval_gate::*; -use common::{empty_policy_rules, sample_call, FailingStateBus, FakeExecutor, InMemoryStateBus}; -use serde_json::{json, Value}; -use std::sync::Mutex; - - - - #[test] - fn fn_constants_match_spec_strings() { - assert_eq!(FN_RESOLVE, "approval::resolve"); - assert_eq!(FN_LIST_PENDING, "approval::list_pending"); - assert_eq!(FN_LIST_UNDELIVERED, "approval::list_undelivered"); - assert_eq!(FN_ACK_DELIVERED, "approval::ack_delivered"); - assert_eq!(FN_LOOKUP_RECORD, "approval::lookup_record"); - } - - - #[tokio::test] - async fn fake_executor_records_calls() { - let exec = FakeExecutor::default(); - let out = exec - .invoke("shell::fs::write", json!({"x": 1}), "cid", "sid") - .await - .unwrap(); - assert_eq!(out, json!({"ok": true})); - let calls = exec.calls.lock().unwrap().clone(); - assert_eq!(calls.len(), 1); - assert_eq!(calls[0].0, "shell::fs::write"); - assert_eq!(calls[0].2, "cid"); - assert_eq!(calls[0].3, "sid"); - } - - - #[test] - fn unverified_marker_targets_lists_unasserted_rules() { - let rules = vec![ - InterceptorRule { - function_id: "shell::exec".into(), - classifier: None, - classifier_timeout_ms: 2000, - inject_approval_marker: true, - marker_target_verified: false, - }, - InterceptorRule { - function_id: "shell::exec_bg".into(), - classifier: None, - classifier_timeout_ms: 2000, - inject_approval_marker: true, - marker_target_verified: true, - }, - InterceptorRule { - function_id: "no_marker::fn".into(), - classifier: None, - classifier_timeout_ms: 2000, - inject_approval_marker: false, - marker_target_verified: false, - }, - ]; - assert_eq!(unverified_marker_targets(&rules), vec!["shell::exec"]); - } - - - #[test] - fn unverified_marker_targets_empty_when_all_verified_or_marker_off() { - let rules = vec![ - InterceptorRule { - function_id: "shell::exec".into(), - classifier: None, - classifier_timeout_ms: 2000, - inject_approval_marker: true, - marker_target_verified: true, - }, - InterceptorRule { - function_id: "other".into(), - classifier: None, - classifier_timeout_ms: 2000, - inject_approval_marker: false, - marker_target_verified: false, - }, - ]; - assert!(unverified_marker_targets(&rules).is_empty()); - } diff --git a/approval-gate/tests/resolve.rs b/approval-gate/tests/resolve.rs deleted file mode 100644 index 537c44ae..00000000 --- a/approval-gate/tests/resolve.rs +++ /dev/null @@ -1,673 +0,0 @@ -//! Approval-resolve flow: handle_resolve, the cascade-on-`always` sweep, -//! and handle_lookup_record. Uses an InMemoryStateBus + FakeExecutor. - -mod common; - -use approval_gate::*; -use common::{empty_policy_rules, sample_call, FailingStateBus, FakeExecutor, InMemoryStateBus}; -use serde_json::{json, Value}; -use std::sync::Mutex; - - - - #[tokio::test] - async fn handle_resolve_on_expired_pending_flips_to_timed_out_and_ignores_decision() { - let bus = InMemoryStateBus::new(); - let exec = FakeExecutor::default(); - bus.set( - STATE_SCOPE, - &pending_key("s1", "tc-1"), - build_pending_record("tc-1", "shell::fs::write", &json!({}), 1_000, 60_000), - ) - .await - .unwrap(); - - let resp = handle_resolve( - &bus, - &exec, - STATE_SCOPE, - &empty_policy_rules(), - json!({"session_id":"s1","function_call_id":"tc-1","decision":"allow"}), - 70_000, - ) - .await; - assert_eq!(resp["ok"], json!(false)); - assert_eq!(resp["error"], "timed_out"); - - assert!(exec.calls.lock().unwrap().is_empty()); - - let rec = bus - .get(STATE_SCOPE, &pending_key("s1", "tc-1")) - .await - .unwrap(); - assert_eq!(rec["status"], "timed_out"); - } - - - #[tokio::test] - async fn handle_lookup_record_returns_null_when_missing() { - let bus = InMemoryStateBus::new(); - let v = handle_lookup_record( - &bus, - STATE_SCOPE, - json!({"session_id": "s1", "function_call_id": "c1"}), - ) - .await; - assert!(v.is_null()); - } - - - #[tokio::test] - async fn handle_lookup_record_returns_record_when_present() { - let bus = InMemoryStateBus::new(); - let call = sample_call(); - let _ = handle_intercept(&bus, STATE_SCOPE, &call, 1_000, 60_000, false).await; - let v = handle_lookup_record( - &bus, - STATE_SCOPE, - json!({"session_id": "s1", "function_call_id": "tc-1"}), - ) - .await; - assert_eq!(v["status"], json!("pending")); - assert_eq!(v["function_id"], json!("shell::fs::write")); - } - - - #[tokio::test] - async fn handle_resolve_allow_invokes_function_and_records_executed() { - let bus = InMemoryStateBus::new(); - let exec = FakeExecutor::default(); - bus.set( - STATE_SCOPE, - &pending_key("s1", "tc-1"), - build_pending_record( - "tc-1", - "shell::fs::write", - &json!({"path":"/a"}), - 1_000, - 60_000, - ), - ) - .await - .unwrap(); - - let resp = handle_resolve( - &bus, - &exec, - STATE_SCOPE, - &empty_policy_rules(), - json!({ - "session_id": "s1", - "function_call_id": "tc-1", - "decision": "allow", - }), - 1_500, - ) - .await; - assert_eq!(resp["ok"], json!(true)); - - let calls = exec.calls.lock().unwrap().clone(); - assert_eq!(calls.len(), 1); - assert_eq!(calls[0].0, "shell::fs::write"); - assert_eq!(calls[0].1, json!({"path":"/a"})); - assert_eq!(calls[0].2, "tc-1"); - assert_eq!(calls[0].3, "s1"); - - let rec = bus - .get(STATE_SCOPE, &pending_key("s1", "tc-1")) - .await - .unwrap(); - assert_eq!(rec["status"], "executed"); - assert_eq!(rec["result"], json!({"ok": true})); - } - - - #[tokio::test] - async fn allow_without_always_does_not_cascade() { - // Two pending shell::exec calls in the same session. Resolving - // the first with allow (always=false) must NOT touch the second. - let bus = InMemoryStateBus::new(); - let exec = FakeExecutor::default(); - for cid in ["tc-1", "tc-2"] { - let mut rec = build_pending_record(cid, "shell::exec", &json!({}), 1_000, 60_000); - rec.as_object_mut() - .unwrap() - .insert("session_id".into(), json!("s1")); - bus.set(STATE_SCOPE, &pending_key("s1", cid), rec) - .await - .unwrap(); - } - let rules = empty_policy_rules(); - let resp = handle_resolve( - &bus, - &exec, - STATE_SCOPE, - &rules, - json!({ - "session_id": "s1", - "function_call_id": "tc-1", - "decision": "allow", - }), - 1_500, - ) - .await; - assert_eq!(resp["ok"], true); - assert!( - resp.get("cascaded").is_none(), - "cascaded field must be omitted when always was not set: {resp}" - ); - let other = bus - .get(STATE_SCOPE, &pending_key("s1", "tc-2")) - .await - .unwrap(); - assert_eq!(other["status"], "pending"); - assert_eq!(rules.read().unwrap().len(), 0, "rule must not be pushed"); - } - - - #[tokio::test] - async fn allow_with_always_pushes_rule_and_cascades_same_session_pending() { - // Three pending calls in session s1: two shell::exec, one - // shell::fs::write. Resolving the first shell::exec with - // always=true must: - // 1. Push an Allow rule for shell::exec - // 2. Auto-resolve the other shell::exec pending in this session - // 3. Leave the shell::fs::write pending untouched - let bus = InMemoryStateBus::new(); - let exec = FakeExecutor::default(); - for (cid, fn_id) in [ - ("tc-1", "shell::exec"), - ("tc-2", "shell::exec"), - ("tc-3", "shell::fs::write"), - ] { - let mut rec = build_pending_record(cid, fn_id, &json!({}), 1_000, 60_000); - rec.as_object_mut() - .unwrap() - .insert("session_id".into(), json!("s1")); - bus.set(STATE_SCOPE, &pending_key("s1", cid), rec) - .await - .unwrap(); - } - let rules = empty_policy_rules(); - - let resp = handle_resolve( - &bus, - &exec, - STATE_SCOPE, - &rules, - json!({ - "session_id": "s1", - "function_call_id": "tc-1", - "decision": "allow", - "always": true, - }), - 1_500, - ) - .await; - assert_eq!(resp["ok"], true); - assert_eq!( - resp["cascaded"], json!(1), - "tc-2 should cascade; tc-1 originator excluded; tc-3 not matched" - ); - - // The Allow rule for shell::exec is now in the shared ruleset. - let pushed = rules.read().unwrap(); - assert_eq!(pushed.len(), 1); - assert_eq!(pushed[0].permission, "shell::exec"); - assert_eq!(pushed[0].action, rules::Action::Allow); - drop(pushed); - - // Originator and cascaded record both transitioned to executed. - let r1 = bus - .get(STATE_SCOPE, &pending_key("s1", "tc-1")) - .await - .unwrap(); - let r2 = bus - .get(STATE_SCOPE, &pending_key("s1", "tc-2")) - .await - .unwrap(); - let r3 = bus - .get(STATE_SCOPE, &pending_key("s1", "tc-3")) - .await - .unwrap(); - assert_eq!(r1["status"], "executed"); - assert_eq!(r2["status"], "executed"); - assert_eq!( - r3["status"], "pending", - "non-matching function_id must stay pending: {r3}" - ); - - // Executor was invoked twice: originator + cascaded. - assert_eq!(exec.calls.lock().unwrap().len(), 2); - } - - - #[tokio::test] - async fn cascade_does_not_cross_session_boundary() { - // tc-1 in session s1, tc-2 in session s2 — both shell::exec. - // Resolving s1/tc-1 with always must not touch s2/tc-2. - let bus = InMemoryStateBus::new(); - let exec = FakeExecutor::default(); - for (session, cid) in [("s1", "tc-1"), ("s2", "tc-2")] { - let mut rec = build_pending_record(cid, "shell::exec", &json!({}), 1_000, 60_000); - rec.as_object_mut() - .unwrap() - .insert("session_id".into(), json!(session)); - bus.set(STATE_SCOPE, &pending_key(session, cid), rec) - .await - .unwrap(); - } - let rules = empty_policy_rules(); - - let resp = handle_resolve( - &bus, - &exec, - STATE_SCOPE, - &rules, - json!({ - "session_id": "s1", - "function_call_id": "tc-1", - "decision": "allow", - "always": true, - }), - 1_500, - ) - .await; - assert_eq!(resp["ok"], true); - assert!( - resp.get("cascaded").is_none() || resp["cascaded"] == json!(0), - "no record in s1 to cascade onto; tc-2 in s2 must NOT be touched: {resp}" - ); - - let other_session = bus - .get(STATE_SCOPE, &pending_key("s2", "tc-2")) - .await - .unwrap(); - assert_eq!(other_session["status"], "pending"); - assert_eq!( - exec.calls.lock().unwrap().len(), - 1, - "only the originator should have been invoked" - ); - } - - - #[tokio::test] - async fn cascade_skips_originator_record() { - // Single pending record. always=true must not double-resolve it. - let bus = InMemoryStateBus::new(); - let exec = FakeExecutor::default(); - let mut rec = build_pending_record("tc-1", "shell::exec", &json!({}), 1_000, 60_000); - rec.as_object_mut() - .unwrap() - .insert("session_id".into(), json!("s1")); - bus.set(STATE_SCOPE, &pending_key("s1", "tc-1"), rec) - .await - .unwrap(); - let rules = empty_policy_rules(); - - let resp = handle_resolve( - &bus, - &exec, - STATE_SCOPE, - &rules, - json!({ - "session_id": "s1", - "function_call_id": "tc-1", - "decision": "allow", - "always": true, - }), - 1_500, - ) - .await; - assert_eq!(resp["ok"], true); - // Originator counts under the existing allow path, not the cascade. - assert!(resp.get("cascaded").is_none() || resp["cascaded"] == json!(0)); - assert_eq!(exec.calls.lock().unwrap().len(), 1); - } - - - #[tokio::test] - async fn cascade_skips_already_resolved_records_in_session() { - // Two records in s1: tc-1 pending, tc-2 already terminal. The - // cascade must skip tc-2. - let bus = InMemoryStateBus::new(); - let exec = FakeExecutor::default(); - let mut r1 = build_pending_record("tc-1", "shell::exec", &json!({}), 1_000, 60_000); - r1.as_object_mut() - .unwrap() - .insert("session_id".into(), json!("s1")); - bus.set(STATE_SCOPE, &pending_key("s1", "tc-1"), r1) - .await - .unwrap(); - let mut r2 = build_pending_record("tc-2", "shell::exec", &json!({}), 1_000, 60_000); - r2.as_object_mut() - .unwrap() - .insert("session_id".into(), json!("s1")); - let r2_done = transition_record(&r2, "executed", Some(json!({"ok": true})), None, None); - bus.set(STATE_SCOPE, &pending_key("s1", "tc-2"), r2_done) - .await - .unwrap(); - - let rules = empty_policy_rules(); - let resp = handle_resolve( - &bus, - &exec, - STATE_SCOPE, - &rules, - json!({ - "session_id": "s1", - "function_call_id": "tc-1", - "decision": "allow", - "always": true, - }), - 1_500, - ) - .await; - assert_eq!(resp["ok"], true); - // tc-2 is terminal — not pending — so cascade skips it. - assert!(resp.get("cascaded").is_none() || resp["cascaded"] == json!(0)); - } - - - #[tokio::test] - async fn handle_resolve_deny_does_not_invoke_function() { - let bus = InMemoryStateBus::new(); - let exec = FakeExecutor::default(); - bus.set( - STATE_SCOPE, - &pending_key("s1", "tc-1"), - build_pending_record("tc-1", "shell::fs::write", &json!({}), 1_000, 60_000), - ) - .await - .unwrap(); - - let resp = handle_resolve( - &bus, - &exec, - STATE_SCOPE, - &empty_policy_rules(), - json!({ - "session_id": "s1", - "function_call_id": "tc-1", - "decision": "deny", - "denial": { - "kind": "user_corrected", - "detail": { "feedback": "not authorized" } - }, - }), - 1_500, - ) - .await; - assert_eq!(resp["ok"], json!(true)); - - assert!(exec.calls.lock().unwrap().is_empty()); - - let rec = bus - .get(STATE_SCOPE, &pending_key("s1", "tc-1")) - .await - .unwrap(); - assert_eq!(rec["status"], "denied"); - assert_eq!(rec["denial"]["kind"], "user_corrected"); - assert_eq!(rec["denial"]["detail"]["feedback"], "not authorized"); - } - - - #[tokio::test] - async fn handle_resolve_allow_records_failed_when_function_errors() { - let bus = InMemoryStateBus::new(); - let exec = FakeExecutor::default(); - *exec.response.lock().unwrap() = Some(Err("EACCES".into())); - bus.set( - STATE_SCOPE, - &pending_key("s1", "tc-1"), - build_pending_record("tc-1", "shell::fs::write", &json!({}), 1_000, 60_000), - ) - .await - .unwrap(); - - let resp = handle_resolve( - &bus, - &exec, - STATE_SCOPE, - &empty_policy_rules(), - json!({"session_id":"s1","function_call_id":"tc-1","decision":"allow"}), - 1_500, - ) - .await; - assert_eq!(resp["ok"], json!(true)); - - let rec = bus - .get(STATE_SCOPE, &pending_key("s1", "tc-1")) - .await - .unwrap(); - assert_eq!(rec["status"], "failed"); - assert_eq!(rec["error"], "EACCES"); - } - - - #[tokio::test] - async fn resolve_flips_status_when_pending() { - let bus = InMemoryStateBus::new(); - bus.set( - STATE_SCOPE, - &pending_key("s1", "tc-1"), - build_pending_record("tc-1", "write", &json!({}), 0, 60_000), - ) - .await - .unwrap(); - - let exec = FakeExecutor::default(); - let out = handle_resolve( - &bus, - &exec, - STATE_SCOPE, - &empty_policy_rules(), - json!({ - "function_call_id": "tc-1", - "session_id": "s1", - "decision": "allow", - }), - 1_500, - ) - .await; - - assert_eq!(out["ok"], true); - let stored = bus - .get(STATE_SCOPE, &pending_key("s1", "tc-1")) - .await - .unwrap(); - assert_eq!(stored["status"], "executed"); - } - - - #[tokio::test] - async fn resolve_accepts_legacy_tool_call_id_field() { - let bus = InMemoryStateBus::new(); - bus.set( - STATE_SCOPE, - &pending_key("s1", "tc-1"), - build_pending_record("tc-1", "write", &json!({}), 0, 60_000), - ) - .await - .unwrap(); - - let exec = FakeExecutor::default(); - let out = handle_resolve( - &bus, - &exec, - STATE_SCOPE, - &empty_policy_rules(), - json!({ - "tool_call_id": "tc-1", - "session_id": "s1", - "decision": "allow", - }), - 1_500, - ) - .await; - - assert_eq!(out["ok"], true); - } - - - #[tokio::test] - async fn resolve_rejects_already_resolved_entry() { - let bus = InMemoryStateBus::new(); - let mut rec = build_pending_record("tc-1", "write", &json!({}), 0, 60_000); - rec["status"] = json!("allow"); - bus.set(STATE_SCOPE, &pending_key("s1", "tc-1"), rec) - .await - .unwrap(); - - let exec = FakeExecutor::default(); - let out = handle_resolve( - &bus, - &exec, - STATE_SCOPE, - &empty_policy_rules(), - json!({"function_call_id": "tc-1", "session_id": "s1", "decision": "deny"}), - 1_500, - ) - .await; - assert_eq!(out["ok"], false); - assert_eq!(out["error"], "already_resolved"); - } - - - #[tokio::test] - async fn resolve_deny_without_denial_defaults_to_user_rejected() { - let bus = InMemoryStateBus::new(); - let _ = bus - .set( - STATE_SCOPE, - &pending_key("s1", "tc-1"), - build_pending_record("tc-1", "write", &json!({}), 0, 60_000), - ) - .await; - - let exec = FakeExecutor::default(); - let out = handle_resolve( - &bus, - &exec, - STATE_SCOPE, - &empty_policy_rules(), - json!({ - "session_id": "s1", - "function_call_id": "tc-1", - "decision": "deny", - }), - 1_500, - ) - .await; - assert_eq!(out["ok"], true); - - let stored = bus - .get(STATE_SCOPE, &pending_key("s1", "tc-1")) - .await - .unwrap(); - assert_eq!(stored["status"], "denied"); - assert_eq!(stored["denial"]["kind"], "user_rejected"); - } - - - #[tokio::test] - async fn resolve_deny_rejects_malformed_denial() { - let bus = InMemoryStateBus::new(); - let _ = bus - .set( - STATE_SCOPE, - &pending_key("s1", "tc-1"), - build_pending_record("tc-1", "write", &json!({}), 0, 60_000), - ) - .await; - - let exec = FakeExecutor::default(); - let out = handle_resolve( - &bus, - &exec, - STATE_SCOPE, - &empty_policy_rules(), - json!({ - "session_id": "s1", - "function_call_id": "tc-1", - "decision": "deny", - "denial": { "kind": "not_a_real_kind" }, - }), - 1_500, - ) - .await; - assert_eq!(out["ok"], false); - assert_eq!(out["error"], "bad_denial"); - } - - - #[tokio::test] - async fn handle_lookup_record_rejects_when_only_one_id_is_empty() { - // mutant L395: `||` → `&&` would let one-empty slip through. - let bus = InMemoryStateBus::new(); - let v1 = handle_lookup_record( - &bus, - STATE_SCOPE, - json!({"session_id": "", "function_call_id": "c"}), - ) - .await; - assert!(v1.is_null()); - let v2 = handle_lookup_record( - &bus, - STATE_SCOPE, - json!({"session_id": "s", "function_call_id": ""}), - ) - .await; - assert!(v2.is_null()); - } - - - #[tokio::test] - async fn handle_resolve_rejects_when_only_one_id_is_empty() { - // mutant L489: same `||` pattern in handle_resolve guard. - let bus = InMemoryStateBus::new(); - let exec = FakeExecutor::default(); - let r1 = handle_resolve( - &bus, - &exec, - STATE_SCOPE, - &empty_policy_rules(), - json!({"session_id": "", "function_call_id": "c", "decision": "allow"}), - 0, - ) - .await; - assert_eq!(r1["error"], json!("missing_id")); - let r2 = handle_resolve( - &bus, - &exec, - STATE_SCOPE, - &empty_policy_rules(), - json!({"session_id": "s", "function_call_id": "", "decision": "allow"}), - 0, - ) - .await; - assert_eq!(r2["error"], json!("missing_id")); - } - - - #[tokio::test] - async fn handle_lookup_record_short_circuits_before_bus_get_on_one_empty_id() { - // mutant L395 — `||` → `&&` would let one-empty slip into bus.get. - // Seed a record at the address the mutant would compute (pending_key("", "c") = "/c"), - // so the mutant returns the seeded row while original code stays at Null. - let bus = InMemoryStateBus::new(); - bus.set(STATE_SCOPE, "/c", json!({"sentinel": "should_not_leak"})) - .await - .unwrap(); - let v = handle_lookup_record( - &bus, - STATE_SCOPE, - json!({"session_id": "", "function_call_id": "c"}), - ) - .await; - assert!( - v.is_null(), - "must short-circuit; the seeded sentinel must not leak through" - ); - } diff --git a/approval-gate/tests/state_machine.rs b/approval-gate/tests/state_machine.rs deleted file mode 100644 index d42d2d57..00000000 --- a/approval-gate/tests/state_machine.rs +++ /dev/null @@ -1,209 +0,0 @@ -//! Property-based state-machine invariants. Drives the gate through -//! random sequences of (intercept, resolve, sweep, ack, ...) ops and -//! asserts the four invariants documented in the test body. - - -mod common; - -use approval_gate::*; -use common::{empty_policy_rules, FakeExecutor, InMemoryStateBus}; -use proptest::prelude::*; -use serde_json::{json, Value}; - - - -#[derive(Debug, Clone)] -enum Op { - InterceptRequired, - InterceptNotRequired, - ResolveAllow, - ResolveDeny, - AdvanceClockAndLazyFlip, - SweepSession, - AckDelivered, -} - -fn arb_op() -> impl Strategy { - prop_oneof![ - Just(Op::InterceptRequired), - Just(Op::InterceptNotRequired), - Just(Op::ResolveAllow), - Just(Op::ResolveDeny), - Just(Op::AdvanceClockAndLazyFlip), - Just(Op::SweepSession), - Just(Op::AckDelivered), - ] -} - -fn make_call(approval_required_self: bool) -> IncomingCall { - IncomingCall { - session_id: "s".into(), - function_call_id: "c".into(), - function_id: "test::write".into(), - args: json!({}), - approval_required: if approval_required_self { - vec!["test::write".into()] - } else { - vec!["other::fn".into()] - }, - event_id: "e".into(), - reply_stream: "r".into(), - } -} - - - - proptest! { - #![proptest_config(ProptestConfig { - cases: 256, - .. ProptestConfig::default() - })] - - #[test] - fn state_machine_invariants(ops in prop::collection::vec(arb_op(), 1..30)) { - let rt = tokio::runtime::Builder::new_current_thread() - .enable_all() - .build() - .expect("tokio runtime"); - - rt.block_on(async { - let bus = InMemoryStateBus::new(); - let exec = FakeExecutor::default(); - let session_id = "s"; - let call_id = "c"; - let timeout_ms: u64 = 60_000; - let mut now_ms: u64 = 1_000; - - let mut ever_terminal = false; - let mut last_delivered: Option = None; - - for op in &ops { - match op { - Op::InterceptRequired => { - let call = make_call(true); - let _ = handle_intercept(&bus, STATE_SCOPE, &call, now_ms, timeout_ms, false).await; - } - Op::InterceptNotRequired => { - let call = make_call(false); - let _ = handle_intercept(&bus, STATE_SCOPE, &call, now_ms, timeout_ms, false).await; - } - Op::ResolveAllow => { - let _ = handle_resolve( - &bus, - &exec, - STATE_SCOPE, - &empty_policy_rules(), - json!({ - "session_id": session_id, - "function_call_id": call_id, - "decision": "allow", - }), - now_ms, - ) - .await; - } - Op::ResolveDeny => { - let _ = handle_resolve( - &bus, - &exec, - STATE_SCOPE, - &empty_policy_rules(), - json!({ - "session_id": session_id, - "function_call_id": call_id, - "decision": "deny", - }), - now_ms, - ) - .await; - } - Op::AdvanceClockAndLazyFlip => { - now_ms = now_ms.saturating_add(timeout_ms + 1); - let _ = handle_list_undelivered( - &bus, STATE_SCOPE, - json!({ "session_id": session_id }), - now_ms, - ).await; - } - Op::SweepSession => { - let _ = handle_sweep_session( - &bus, STATE_SCOPE, - json!({ "session_id": session_id }), - ).await; - } - Op::AckDelivered => { - let _ = handle_ack_delivered( - &bus, STATE_SCOPE, - json!({ - "session_id": session_id, - "turn_id": format!("turn-{now_ms}"), - "call_ids": [call_id], - }), - ).await; - } - } - - // Assert invariants on whatever the record currently is. - let key = pending_key(session_id, call_id); - let Some(rec) = bus.get(STATE_SCOPE, &key).await else { - // No record yet (e.g. only InterceptNotRequired so far). Skip. - continue; - }; - - // I1: legal status - let status = rec.get("status").and_then(Value::as_str).unwrap_or(""); - assert!( - matches!( - status, - "pending" | "approved" | "executed" | "failed" | "denied" | "timed_out" - ), - "I1 violated: illegal status {status:?} after ops {ops:?}; record={rec:?}" - ); - - // I2: no reverting terminal → pending - if matches!(status, "executed" | "failed" | "denied" | "timed_out") { - ever_terminal = true; - } - if ever_terminal { - assert!( - status != "pending", - "I2 violated: reverted to pending after terminal; ops={ops:?}; record={rec:?}" - ); - } - - // I3: pending records always have expires_at: u64 - if status == "pending" { - let exp = rec.get("expires_at").and_then(Value::as_u64); - assert!( - exp.is_some(), - "I3 violated: pending record missing expires_at; ops={ops:?}; record={rec:?}" - ); - } - - // I4: delivered_in_turn_id is monotonic — once set non-null, never unset / never replaced - let cur_delivered = rec - .get("delivered_in_turn_id") - .and_then(Value::as_str) - .map(str::to_string); - if let Some(prev) = &last_delivered { - match &cur_delivered { - Some(cur) => { - assert_eq!( - cur, prev, - "I4 violated: delivered_in_turn_id replaced {prev:?} → {cur:?}; ops={ops:?}" - ); - } - None => { - panic!( - "I4 violated: delivered_in_turn_id unset after being {prev:?}; ops={ops:?}; record={rec:?}" - ); - } - } - } - if cur_delivered.is_some() { - last_delivered = cur_delivered; - } - } - }); - } - } From 51e0c7558426eb7b2772d72f968e9288f627add8 Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Sat, 16 May 2026 09:22:09 -0300 Subject: [PATCH 28/30] feat(harness/web): Allow+Always, Deny feedback, expires_at countdown (T15) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ApprovalRow.tsx grows three real-feature additions matching the approval-gate refactor's new wire surface: 1. Allow+Always button → sends {decision:'allow', always:true}. The gate cascades by pushing a runtime Allow rule with the originator's exact pattern and sweeping the session's other Pending rows (auto-resolving any that match). The response carries cascaded:N when the sweep was non-empty; not surfaced in the UI yet. 2. Optional Deny feedback textarea → when the operator types a correction message before clicking deny, the payload becomes {decision:'deny', denial:{kind:'user_corrected', detail:{feedback}}}. That feedback flows verbatim to the LLM via the stitched message on the next consume — the high-value path because the model gets actionable correction, not just 'denied'. 3. Client-side expires_at countdown → the modal shows MM:SS remaining and disables actions on expiry. No server-emitted timed_out frame anymore (the gate's sweeper is gone); the LLM learns of timeout via the next-turn approval::consume that lazy-flips the row. Each pending row is split into its own ApprovalCard component so the countdown hook respects rules-of-hooks (can't sit inside a .map callback). ResolvePayload is constructed inline so the bridge's Record type accepts it without an explicit cast at the interface boundary. CSS classes for the new states (approval-allow-always, approval-countdown, approval-feedback, approval-expired-note) inherit default styling for now; a follow-up styling pass can polish. --- harness/web/src/components/ApprovalRow.tsx | 168 +++++++++++++++++---- 1 file changed, 135 insertions(+), 33 deletions(-) diff --git a/harness/web/src/components/ApprovalRow.tsx b/harness/web/src/components/ApprovalRow.tsx index 25ef1b99..be82413b 100644 --- a/harness/web/src/components/ApprovalRow.tsx +++ b/harness/web/src/components/ApprovalRow.tsx @@ -1,4 +1,4 @@ -import { useState } from "react"; +import { useEffect, useState } from "react"; import { bridge, BridgeError } from "../bridge"; import type { PendingApproval } from "../types"; @@ -7,22 +7,140 @@ interface Props { pending: PendingApproval[]; } +type ResolveDecision = "allow" | "deny"; +type DenialPayload = + | { kind: "user_rejected" } + | { kind: "user_corrected"; detail: { feedback: string } }; + +/** + * Subscribe to a 1s tick and report `expiresAt - now` (ms). Returns the + * raw remaining number so the parent can render its own format. Negative + * once expired; the parent disables actions on `remaining <= 0`. + */ +function useCountdown(expiresAt: number | undefined): number { + const [now, setNow] = useState(() => Date.now()); + useEffect(() => { + if (!expiresAt) return; + const t = setInterval(() => setNow(Date.now()), 1000); + return () => clearInterval(t); + }, [expiresAt]); + if (!expiresAt) return Number.POSITIVE_INFINITY; + return expiresAt - now; +} + +function formatRemaining(ms: number): string { + if (!Number.isFinite(ms)) return ""; + if (ms <= 0) return "expired"; + const s = Math.floor(ms / 1000); + const m = Math.floor(s / 60); + const r = s % 60; + return `${m}:${String(r).padStart(2, "0")}`; +} + +interface ApprovalCardProps { + sessionId: string; + approval: PendingApproval; + callId: string; + fnId: string; + busyId: string | null; + onResolve: (functionCallId: string, decision: ResolveDecision, opts?: { always?: boolean; denial?: DenialPayload }) => void; +} + +/** + * One pending-approval card. Lives in its own component so the + * countdown hook is rendered once per row (hooks can't sit inside + * a .map callback without violating the rules-of-hooks contract). + */ +function ApprovalCard({ sessionId: _sessionId, approval, callId, fnId, busyId, onResolve }: ApprovalCardProps) { + const remaining = useCountdown(approval.expires_at); + const expired = remaining <= 0; + const [feedback, setFeedback] = useState(""); + + const denyClick = () => { + const trimmed = feedback.trim(); + if (trimmed.length > 0) { + onResolve(callId, "deny", { + denial: { kind: "user_corrected", detail: { feedback: trimmed } }, + }); + } else { + onResolve(callId, "deny"); + } + }; + + return ( +

+
+ approval needed + {fnId} + {approval.expires_at ? ( + {formatRemaining(remaining)} + ) : null} +
+
{JSON.stringify(approval.args, null, 2)}
+
+ add correction (optional, sent to the model on deny) +