Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
execution in CI-oriented crate tests (#2679). Leaf, branch, and workflow
results now also carry separate ARMH/shared-memo and provider prompt-cache
telemetry counters, with mock aggregation tests, so #2671 can progress
without wiring live RLM calls or billing-affecting provider behavior yet.
without wiring live RLM calls or billing-affecting provider behavior yet. The
Starlark and typed-IR gates now also reject unknown leaf dependencies,
reducer inputs, and teacher-review candidates before mock execution or replay,
keeping generated workflows fail-closed while runtime/worktree semantics stay
deferred.
Thanks @AdityaVG13 for the WhaleFlow draft and cost-tracking direction.
- Added a state-store v2 schema migration for WhaleFlow trace tables covering
workflow, branch, leaf, control-node, and teacher-candidate runs. The
Expand Down
6 changes: 5 additions & 1 deletion crates/tui/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
execution in CI-oriented crate tests (#2679). Leaf, branch, and workflow
results now also carry separate ARMH/shared-memo and provider prompt-cache
telemetry counters, with mock aggregation tests, so #2671 can progress
without wiring live RLM calls or billing-affecting provider behavior yet.
without wiring live RLM calls or billing-affecting provider behavior yet. The
Starlark and typed-IR gates now also reject unknown leaf dependencies,
reducer inputs, and teacher-review candidates before mock execution or replay,
keeping generated workflows fail-closed while runtime/worktree semantics stay
deferred.
Thanks @AdityaVG13 for the WhaleFlow draft and cost-tracking direction.
- Added a state-store v2 schema migration for WhaleFlow trace tables covering
workflow, branch, leaf, control-node, and teacher-candidate runs. The
Expand Down
155 changes: 154 additions & 1 deletion crates/whaleflow/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -878,7 +878,7 @@ impl MockWorkflowExecutor {
if let Some(max_children) = spec.max_children {
nodes.truncate(max_children);
}
validate_workflow_nodes(&nodes)?;
validate_workflow_node_shapes(&nodes)?;
self.execute_nodes(&nodes, execution)?;
execution.control_node_results.push(ControlNodeResult {
node_id: spec.id.clone(),
Expand Down Expand Up @@ -973,6 +973,12 @@ pub enum WorkflowExecutionError {
EmptyLeafPrompt { leaf: String },
#[error("duplicate workflow node `{node}`")]
DuplicateNodeId { node: String },
#[error("workflow node `{node}` has unknown {field} reference `{reference}`")]
UnknownNodeReference {
node: String,
field: &'static str,
reference: String,
},
}

fn default_frontier_limit() -> usize {
Expand All @@ -994,6 +1000,14 @@ fn node_id(node: &WorkflowNode) -> String {

pub(crate) fn validate_workflow_nodes(
nodes: &[WorkflowNode],
) -> Result<(), WorkflowExecutionError> {
let mut seen = BTreeSet::new();
validate_workflow_nodes_inner(nodes, &mut seen)?;
validate_workflow_references(nodes, &seen)
}

pub(crate) fn validate_workflow_node_shapes(
nodes: &[WorkflowNode],
) -> Result<(), WorkflowExecutionError> {
let mut seen = BTreeSet::new();
validate_workflow_nodes_inner(nodes, &mut seen)
Expand Down Expand Up @@ -1033,6 +1047,68 @@ fn validate_workflow_nodes_inner(
Ok(())
}

fn validate_workflow_references(
nodes: &[WorkflowNode],
known_ids: &BTreeSet<String>,
) -> Result<(), WorkflowExecutionError> {
for node in nodes {
match node {
WorkflowNode::BranchSet(spec) => {
validate_workflow_references(&spec.children, known_ids)?;
}
WorkflowNode::Leaf(spec) => {
validate_known_references(
spec.id.as_str(),
"depends_on_results",
&spec.depends_on_results,
known_ids,
)?;
}
WorkflowNode::Sequence(spec) => {
validate_workflow_references(&spec.children, known_ids)?;
}
WorkflowNode::Reduce(spec) => {
validate_known_references(spec.id.as_str(), "inputs", &spec.inputs, known_ids)?;
}
WorkflowNode::TeacherReview(spec) => {
validate_known_references(
spec.id.as_str(),
"candidates",
&spec.candidates,
known_ids,
)?;
}
WorkflowNode::LoopUntil(spec) => {
validate_workflow_references(&spec.children, known_ids)?;
}
WorkflowNode::Cond(spec) => {
validate_workflow_references(&spec.then_nodes, known_ids)?;
validate_workflow_references(&spec.else_nodes, known_ids)?;
}
WorkflowNode::Expand(_) => {}
}
}
Ok(())
}

fn validate_known_references(
node: &str,
field: &'static str,
references: &[String],
known_ids: &BTreeSet<String>,
) -> Result<(), WorkflowExecutionError> {
for reference in references {
if !known_ids.contains(reference) {
return Err(WorkflowExecutionError::UnknownNodeReference {
node: node.to_string(),
field,
reference: reference.clone(),
});
}
}
Ok(())
}

fn control_kind_name(node: &WorkflowNode) -> &'static str {
match node {
WorkflowNode::BranchSet(_) => "branch_set",
Expand Down Expand Up @@ -2074,6 +2150,83 @@ mod tests {
);
}

#[test]
fn workflow_spec_rejects_unknown_leaf_dependency() {
let mut summarize = leaf_node("summarize");
let WorkflowNode::Leaf(spec) = &mut summarize else {
panic!("expected leaf");
};
spec.depends_on_results = vec!["missing-scan".to_string()];
let workflow = workflow_spec(vec![summarize]);

let mut executor = MockWorkflowExecutor::new();
let err = executor
.run(&workflow)
.expect_err("unknown leaf dependency should fail before execution");

assert_eq!(
err,
WorkflowExecutionError::UnknownNodeReference {
node: "summarize".to_string(),
field: "depends_on_results",
reference: "missing-scan".to_string(),
}
);
}

#[test]
fn workflow_spec_rejects_unknown_reduce_input() {
let workflow = workflow_spec(vec![
leaf_node("scan"),
WorkflowNode::Reduce(ReduceSpec {
id: "summarize".to_string(),
inputs: vec!["scan".to_string(), "missing-review".to_string()],
prompt: "Summarize safe fixes".to_string(),
model_policy: ModelPolicy::default(),
}),
]);

let mut executor = MockWorkflowExecutor::new();
let err = executor
.run(&workflow)
.expect_err("unknown reduce input should fail before execution");

assert_eq!(
err,
WorkflowExecutionError::UnknownNodeReference {
node: "summarize".to_string(),
field: "inputs",
reference: "missing-review".to_string(),
}
);
}

#[test]
fn workflow_spec_rejects_unknown_teacher_candidate() {
let workflow = workflow_spec(vec![
leaf_node("candidate-a"),
WorkflowNode::TeacherReview(TeacherReviewSpec {
id: "teacher-review".to_string(),
candidates: vec!["candidate-a".to_string(), "candidate-b".to_string()],
promotion_policy: PromotionPolicy::default(),
}),
]);

let mut executor = MockWorkflowExecutor::new();
let err = executor
.run(&workflow)
.expect_err("unknown teacher candidate should fail before execution");

assert_eq!(
err,
WorkflowExecutionError::UnknownNodeReference {
node: "teacher-review".to_string(),
field: "candidates",
reference: "candidate-b".to_string(),
}
);
}

#[test]
fn tournament_selects_passing_minimal_branch() {
let tournament = BranchTournament { min_score: 60 };
Expand Down
4 changes: 2 additions & 2 deletions crates/whaleflow/src/replay.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use crate::{
BranchResult, BranchSpec, CondSpec, ControlNodeKind, ControlNodeResult, ExpandSpec, LeafResult,
LeafSpec, LoopUntilSpec, SequenceSpec, WorkflowExecution, WorkflowExecutionError,
WorkflowMemoUsage, WorkflowNode, WorkflowRunStatus, WorkflowSpec, WorkflowUsage,
validate_workflow_nodes,
validate_workflow_node_shapes, validate_workflow_nodes,
};

#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
Expand Down Expand Up @@ -316,7 +316,7 @@ impl WorkflowReplayExecutor {
.as_ref()
.map(|record| record.generated_nodes.clone())
.unwrap_or_default();
validate_workflow_nodes(&generated_nodes)?;
validate_workflow_node_shapes(&generated_nodes)?;
self.execute_nodes(spec, &generated_nodes, execution)?;
let selected = record
.as_ref()
Expand Down
28 changes: 25 additions & 3 deletions crates/whaleflow/src/starlark_authoring.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use thiserror::Error;
use crate::{
AgentType, BranchSpec, BudgetSpec, CondSpec, ExpandSpec, IsolationMode, LeafSpec, ModelPolicy,
PermissionSpec, PromotionPolicy, ReduceSpec, SequenceSpec, TaskMode, TeacherReviewSpec,
WorkflowNode, WorkflowSpec,
WorkflowNode, WorkflowSpec, validate_workflow_nodes,
};

pub type StarlarkWorkflowResult<T> = std::result::Result<T, StarlarkWorkflowError>;
Expand Down Expand Up @@ -50,10 +50,13 @@ pub fn compile_starlark_workflow(
eval.eval_module(ast, &globals)
.map_err(StarlarkWorkflowError::Starlark)?;
}
builder
let workflow = builder
.into_inner()
.workflow
.ok_or(StarlarkWorkflowError::MissingWorkflow)
.ok_or(StarlarkWorkflowError::MissingWorkflow)?;
validate_workflow_nodes(&workflow.nodes)
.map_err(|error| StarlarkWorkflowError::InvalidNode(error.to_string()))?;
Ok(workflow)
}

pub fn compile_starlark_workflow_with_repair(
Expand Down Expand Up @@ -531,6 +534,25 @@ workflow(goal = "bad", nodes = [])
));
}

#[test]
fn starlark_compile_gate_rejects_unknown_references() {
let source = r#"
workflow(
id = "bad-reference",
goal = "reject missing candidates",
nodes = [
teacher_review(id = "review", candidates = ["missing-candidate"]),
],
)
"#;

let err = compile_starlark_workflow("bad-reference.star", source)
.expect_err("unknown candidate should fail at the compile gate");

assert!(matches!(err, StarlarkWorkflowError::InvalidNode(_)));
assert!(err.to_string().contains("missing-candidate"));
}

#[test]
fn issue_fix_tournament_example_compiles() {
let source = include_str!("../../../workflows/issue_fix_tournament.star");
Expand Down
Loading