From aaa5583733b0ae0b7b623021e2e6d74e56c22e82 Mon Sep 17 00:00:00 2001 From: Hunter B Date: Fri, 5 Jun 2026 22:57:39 -0700 Subject: [PATCH] fix(whaleflow): reject unknown workflow references --- CHANGELOG.md | 6 +- crates/tui/CHANGELOG.md | 6 +- crates/whaleflow/src/lib.rs | 155 ++++++++++++++++++++- crates/whaleflow/src/replay.rs | 4 +- crates/whaleflow/src/starlark_authoring.rs | 28 +++- 5 files changed, 191 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 03aa4f0b8..af852cf7c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -68,7 +68,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 execution in CI-oriented crate tests (#2679). Leaf, branch, and workflow results now also carry separate ARMH/shared-memo and provider prompt-cache telemetry counters, with mock aggregation tests, so #2671 can progress - without wiring live RLM calls or billing-affecting provider behavior yet. + without wiring live RLM calls or billing-affecting provider behavior yet. The + Starlark and typed-IR gates now also reject unknown leaf dependencies, + reducer inputs, and teacher-review candidates before mock execution or replay, + keeping generated workflows fail-closed while runtime/worktree semantics stay + deferred. Thanks @AdityaVG13 for the WhaleFlow draft and cost-tracking direction. - Added a state-store v2 schema migration for WhaleFlow trace tables covering workflow, branch, leaf, control-node, and teacher-candidate runs. The diff --git a/crates/tui/CHANGELOG.md b/crates/tui/CHANGELOG.md index 03aa4f0b8..af852cf7c 100644 --- a/crates/tui/CHANGELOG.md +++ b/crates/tui/CHANGELOG.md @@ -68,7 +68,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 execution in CI-oriented crate tests (#2679). Leaf, branch, and workflow results now also carry separate ARMH/shared-memo and provider prompt-cache telemetry counters, with mock aggregation tests, so #2671 can progress - without wiring live RLM calls or billing-affecting provider behavior yet. + without wiring live RLM calls or billing-affecting provider behavior yet. The + Starlark and typed-IR gates now also reject unknown leaf dependencies, + reducer inputs, and teacher-review candidates before mock execution or replay, + keeping generated workflows fail-closed while runtime/worktree semantics stay + deferred. Thanks @AdityaVG13 for the WhaleFlow draft and cost-tracking direction. - Added a state-store v2 schema migration for WhaleFlow trace tables covering workflow, branch, leaf, control-node, and teacher-candidate runs. The diff --git a/crates/whaleflow/src/lib.rs b/crates/whaleflow/src/lib.rs index 02a865b6d..68ef4ac24 100644 --- a/crates/whaleflow/src/lib.rs +++ b/crates/whaleflow/src/lib.rs @@ -878,7 +878,7 @@ impl MockWorkflowExecutor { if let Some(max_children) = spec.max_children { nodes.truncate(max_children); } - validate_workflow_nodes(&nodes)?; + validate_workflow_node_shapes(&nodes)?; self.execute_nodes(&nodes, execution)?; execution.control_node_results.push(ControlNodeResult { node_id: spec.id.clone(), @@ -973,6 +973,12 @@ pub enum WorkflowExecutionError { EmptyLeafPrompt { leaf: String }, #[error("duplicate workflow node `{node}`")] DuplicateNodeId { node: String }, + #[error("workflow node `{node}` has unknown {field} reference `{reference}`")] + UnknownNodeReference { + node: String, + field: &'static str, + reference: String, + }, } fn default_frontier_limit() -> usize { @@ -994,6 +1000,14 @@ fn node_id(node: &WorkflowNode) -> String { pub(crate) fn validate_workflow_nodes( nodes: &[WorkflowNode], +) -> Result<(), WorkflowExecutionError> { + let mut seen = BTreeSet::new(); + validate_workflow_nodes_inner(nodes, &mut seen)?; + validate_workflow_references(nodes, &seen) +} + +pub(crate) fn validate_workflow_node_shapes( + nodes: &[WorkflowNode], ) -> Result<(), WorkflowExecutionError> { let mut seen = BTreeSet::new(); validate_workflow_nodes_inner(nodes, &mut seen) @@ -1033,6 +1047,68 @@ fn validate_workflow_nodes_inner( Ok(()) } +fn validate_workflow_references( + nodes: &[WorkflowNode], + known_ids: &BTreeSet, +) -> Result<(), WorkflowExecutionError> { + for node in nodes { + match node { + WorkflowNode::BranchSet(spec) => { + validate_workflow_references(&spec.children, known_ids)?; + } + WorkflowNode::Leaf(spec) => { + validate_known_references( + spec.id.as_str(), + "depends_on_results", + &spec.depends_on_results, + known_ids, + )?; + } + WorkflowNode::Sequence(spec) => { + validate_workflow_references(&spec.children, known_ids)?; + } + WorkflowNode::Reduce(spec) => { + validate_known_references(spec.id.as_str(), "inputs", &spec.inputs, known_ids)?; + } + WorkflowNode::TeacherReview(spec) => { + validate_known_references( + spec.id.as_str(), + "candidates", + &spec.candidates, + known_ids, + )?; + } + WorkflowNode::LoopUntil(spec) => { + validate_workflow_references(&spec.children, known_ids)?; + } + WorkflowNode::Cond(spec) => { + validate_workflow_references(&spec.then_nodes, known_ids)?; + validate_workflow_references(&spec.else_nodes, known_ids)?; + } + WorkflowNode::Expand(_) => {} + } + } + Ok(()) +} + +fn validate_known_references( + node: &str, + field: &'static str, + references: &[String], + known_ids: &BTreeSet, +) -> Result<(), WorkflowExecutionError> { + for reference in references { + if !known_ids.contains(reference) { + return Err(WorkflowExecutionError::UnknownNodeReference { + node: node.to_string(), + field, + reference: reference.clone(), + }); + } + } + Ok(()) +} + fn control_kind_name(node: &WorkflowNode) -> &'static str { match node { WorkflowNode::BranchSet(_) => "branch_set", @@ -2074,6 +2150,83 @@ mod tests { ); } + #[test] + fn workflow_spec_rejects_unknown_leaf_dependency() { + let mut summarize = leaf_node("summarize"); + let WorkflowNode::Leaf(spec) = &mut summarize else { + panic!("expected leaf"); + }; + spec.depends_on_results = vec!["missing-scan".to_string()]; + let workflow = workflow_spec(vec![summarize]); + + let mut executor = MockWorkflowExecutor::new(); + let err = executor + .run(&workflow) + .expect_err("unknown leaf dependency should fail before execution"); + + assert_eq!( + err, + WorkflowExecutionError::UnknownNodeReference { + node: "summarize".to_string(), + field: "depends_on_results", + reference: "missing-scan".to_string(), + } + ); + } + + #[test] + fn workflow_spec_rejects_unknown_reduce_input() { + let workflow = workflow_spec(vec![ + leaf_node("scan"), + WorkflowNode::Reduce(ReduceSpec { + id: "summarize".to_string(), + inputs: vec!["scan".to_string(), "missing-review".to_string()], + prompt: "Summarize safe fixes".to_string(), + model_policy: ModelPolicy::default(), + }), + ]); + + let mut executor = MockWorkflowExecutor::new(); + let err = executor + .run(&workflow) + .expect_err("unknown reduce input should fail before execution"); + + assert_eq!( + err, + WorkflowExecutionError::UnknownNodeReference { + node: "summarize".to_string(), + field: "inputs", + reference: "missing-review".to_string(), + } + ); + } + + #[test] + fn workflow_spec_rejects_unknown_teacher_candidate() { + let workflow = workflow_spec(vec![ + leaf_node("candidate-a"), + WorkflowNode::TeacherReview(TeacherReviewSpec { + id: "teacher-review".to_string(), + candidates: vec!["candidate-a".to_string(), "candidate-b".to_string()], + promotion_policy: PromotionPolicy::default(), + }), + ]); + + let mut executor = MockWorkflowExecutor::new(); + let err = executor + .run(&workflow) + .expect_err("unknown teacher candidate should fail before execution"); + + assert_eq!( + err, + WorkflowExecutionError::UnknownNodeReference { + node: "teacher-review".to_string(), + field: "candidates", + reference: "candidate-b".to_string(), + } + ); + } + #[test] fn tournament_selects_passing_minimal_branch() { let tournament = BranchTournament { min_score: 60 }; diff --git a/crates/whaleflow/src/replay.rs b/crates/whaleflow/src/replay.rs index 6eeaea5c2..1cb8d4b15 100644 --- a/crates/whaleflow/src/replay.rs +++ b/crates/whaleflow/src/replay.rs @@ -8,7 +8,7 @@ use crate::{ BranchResult, BranchSpec, CondSpec, ControlNodeKind, ControlNodeResult, ExpandSpec, LeafResult, LeafSpec, LoopUntilSpec, SequenceSpec, WorkflowExecution, WorkflowExecutionError, WorkflowMemoUsage, WorkflowNode, WorkflowRunStatus, WorkflowSpec, WorkflowUsage, - validate_workflow_nodes, + validate_workflow_node_shapes, validate_workflow_nodes, }; #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)] @@ -316,7 +316,7 @@ impl WorkflowReplayExecutor { .as_ref() .map(|record| record.generated_nodes.clone()) .unwrap_or_default(); - validate_workflow_nodes(&generated_nodes)?; + validate_workflow_node_shapes(&generated_nodes)?; self.execute_nodes(spec, &generated_nodes, execution)?; let selected = record .as_ref() diff --git a/crates/whaleflow/src/starlark_authoring.rs b/crates/whaleflow/src/starlark_authoring.rs index 2813ac41a..d930ac557 100644 --- a/crates/whaleflow/src/starlark_authoring.rs +++ b/crates/whaleflow/src/starlark_authoring.rs @@ -13,7 +13,7 @@ use thiserror::Error; use crate::{ AgentType, BranchSpec, BudgetSpec, CondSpec, ExpandSpec, IsolationMode, LeafSpec, ModelPolicy, PermissionSpec, PromotionPolicy, ReduceSpec, SequenceSpec, TaskMode, TeacherReviewSpec, - WorkflowNode, WorkflowSpec, + WorkflowNode, WorkflowSpec, validate_workflow_nodes, }; pub type StarlarkWorkflowResult = std::result::Result; @@ -50,10 +50,13 @@ pub fn compile_starlark_workflow( eval.eval_module(ast, &globals) .map_err(StarlarkWorkflowError::Starlark)?; } - builder + let workflow = builder .into_inner() .workflow - .ok_or(StarlarkWorkflowError::MissingWorkflow) + .ok_or(StarlarkWorkflowError::MissingWorkflow)?; + validate_workflow_nodes(&workflow.nodes) + .map_err(|error| StarlarkWorkflowError::InvalidNode(error.to_string()))?; + Ok(workflow) } pub fn compile_starlark_workflow_with_repair( @@ -531,6 +534,25 @@ workflow(goal = "bad", nodes = []) )); } + #[test] + fn starlark_compile_gate_rejects_unknown_references() { + let source = r#" +workflow( + id = "bad-reference", + goal = "reject missing candidates", + nodes = [ + teacher_review(id = "review", candidates = ["missing-candidate"]), + ], +) +"#; + + let err = compile_starlark_workflow("bad-reference.star", source) + .expect_err("unknown candidate should fail at the compile gate"); + + assert!(matches!(err, StarlarkWorkflowError::InvalidNode(_))); + assert!(err.to_string().contains("missing-candidate")); + } + #[test] fn issue_fix_tournament_example_compiles() { let source = include_str!("../../../workflows/issue_fix_tournament.star");