From a9d592d86d04672337c5efee146c96943de0d6ee Mon Sep 17 00:00:00 2001 From: Amit Singh Date: Mon, 20 Apr 2026 17:00:59 +0530 Subject: [PATCH 1/3] fix(cache): cache all system messages and stabilize task tool descriptions The main changes in this PR: 1. **Anthropic cache strategy fix**: Previously only the first system message was cached, now all system messages are cached. This ensures the static system prefix remains reusable. The logic also falls back to caching the first conversation message when there's no system prompt. 2. **Stabilize task tool descriptions**: Added sorting of agents by ID before generating task tool descriptions to ensure deterministic output regardless of agent order. 3. **Stabilize skill loading**: Added sorting of skills by name (and resources by path) to ensure deterministic skill loading order. Key implementation details: - Modified `SetCache` transformer to cache all system messages instead of just the first - Added `agents.sort_by(|left, right| left.id.as_str().cmp(right.id.as_str()))` in tool_registry.rs - Added `sort_skills()` and `sort_paths()` functions in skill.rs with comprehensive tests - Updated tests to verify stable ordering behavior Breaking changes: None Testing: - Updated existing tests in set_cache.rs to reflect new caching behavior - Added `test_task_tool_description_is_stable_across_agent_order` test - Added `test_sort_skills_orders_names_and_resources` test - Updated snapshot tests for tool descriptions --- .../src/dto/anthropic/transforms/set_cache.rs | 79 ++++++++------- ...istry__all_rendered_tool_descriptions.snap | 4 +- crates/forge_app/src/tool_registry.rs | 37 +++++++ crates/forge_repo/src/skill.rs | 97 +++++++++++++++---- 4 files changed, 161 insertions(+), 56 deletions(-) diff --git a/crates/forge_app/src/dto/anthropic/transforms/set_cache.rs b/crates/forge_app/src/dto/anthropic/transforms/set_cache.rs index 7380824cb4..8bcccbb111 100644 --- a/crates/forge_app/src/dto/anthropic/transforms/set_cache.rs +++ b/crates/forge_app/src/dto/anthropic/transforms/set_cache.rs @@ -2,20 +2,20 @@ use forge_domain::Transformer; use crate::dto::anthropic::Request; -/// Transformer that implements a simple two-breakpoint cache strategy: -/// - Always caches the first message in the conversation -/// - Always caches the last message in the conversation -/// - Removes cache control from the second-to-last message +/// Transformer that keeps Anthropic prompt-cache markers stable: +/// - Always caches every system message so the static system prefix remains reusable +/// - Falls back to caching the first conversation message when there is no system +/// prompt so single-turn requests still establish a reusable prefix +/// - Uses exactly one rolling message-level marker on the newest message pub struct SetCache; impl Transformer for SetCache { type Value = Request; - /// Implements a simple two-breakpoint cache strategy: - /// 1. Cache the first system message as it should be static. - /// 2. Cache the last message (index messages.len() - 1) - /// 3. Remove cache control from second-to-last message (index - /// messages.len() - 2) + /// Applies the default Anthropic cache strategy: + /// 1. Cache every system message when present, otherwise cache the first + /// conversation message. + /// 2. Cache only the last message as the rolling message-level marker. fn transform(&mut self, mut request: Self::Value) -> Self::Value { let len = request.get_messages().len(); let sys_len = request.system.as_ref().map_or(0, |msgs| msgs.len()); @@ -24,21 +24,27 @@ impl Transformer for SetCache { return request; } - // Cache the very first system message, ideally you should keep static content - // in it. - if let Some(system_messages) = request.system.as_mut() - && let Some(first_message) = system_messages.first_mut() - { - *first_message = std::mem::take(first_message).cached(true); - } else { - // If no system messages, we can still cache the first message in the - // conversation. + let has_system_prompt = request + .system + .as_ref() + .is_some_and(|messages| !messages.is_empty()); + + if let Some(system_messages) = request.system.as_mut() { + for message in system_messages.iter_mut() { + *message = std::mem::take(message).cached(true); + } + } + + for message in request.get_messages_mut().iter_mut() { + *message = std::mem::take(message).cached(false); + } + + if !has_system_prompt && len > 0 { if let Some(first_message) = request.get_messages_mut().first_mut() { *first_message = std::mem::take(first_message).cached(true); } } - // Add cache control to last message (if different from first) if let Some(message) = request.get_messages_mut().last_mut() { *message = std::mem::take(message).cached(true); } @@ -161,28 +167,28 @@ mod tests { } #[test] - fn test_three_messages_only_last_cached() { + fn test_three_messages_cache_first_and_last_only() { let actual = create_test_context("uau"); let expected = "[ua[u"; assert_eq!(actual, expected); } #[test] - fn test_four_messages_only_last_cached() { + fn test_four_messages_cache_first_and_last_only() { let actual = create_test_context("uaua"); let expected = "[uau[a"; assert_eq!(actual, expected); } #[test] - fn test_five_messages_only_last_cached() { + fn test_five_messages_cache_first_and_last_only() { let actual = create_test_context("uauau"); let expected = "[uaua[u"; assert_eq!(actual, expected); } #[test] - fn test_longer_conversation() { + fn test_longer_conversation_caches_first_and_last_only() { let actual = create_test_context("uauauauaua"); let expected = "[uauauauau[a"; assert_eq!(actual, expected); @@ -217,10 +223,8 @@ mod tests { } #[test] - fn test_multiple_system_messages_only_first_cached() { - // This test assumes multiple system messages are possible, but only first is - // cached - let context = Context { + fn test_multiple_system_messages_all_cached() { + let fixture = Context { conversation_id: None, messages: vec![ ContextMessage::Text(TextMessage::new(Role::System, "first")).into(), @@ -243,16 +247,21 @@ mod tests { initiator: None, }; - let request = Request::try_from(context).expect("Failed to convert context to request"); + let request = Request::try_from(fixture).expect("Failed to convert context to request"); let mut transformer = SetCache; let request = transformer.transform(request); - // Check that only first system message is cached - let system_messages = request.system.as_ref().unwrap(); - assert_eq!(system_messages[0].is_cached(), true); - assert_eq!(system_messages[1].is_cached(), false); - - // Check that last conversation message is cached - assert_eq!(request.get_messages().last().unwrap().is_cached(), true); + let expected = vec![true, true]; + let actual = request + .system + .as_ref() + .unwrap() + .iter() + .map(|message| message.is_cached()) + .collect::>(); + assert_eq!(actual, expected); + assert!(request.get_messages()[0].is_cached()); } + } + diff --git a/crates/forge_app/src/snapshots/forge_app__tool_registry__all_rendered_tool_descriptions.snap b/crates/forge_app/src/snapshots/forge_app__tool_registry__all_rendered_tool_descriptions.snap index fc39e6fa12..c3bebe419e 100644 --- a/crates/forge_app/src/snapshots/forge_app__tool_registry__all_rendered_tool_descriptions.snap +++ b/crates/forge_app/src/snapshots/forge_app__tool_registry__all_rendered_tool_descriptions.snap @@ -449,10 +449,10 @@ Launch a new agent to handle complex, multi-step tasks autonomously. The task tool launches specialized agents (subprocesses) that autonomously handle complex tasks. Each agent type has specific capabilities and tools available to it. Available agent types and the tools they have access to: -- **sage**: Specialized in researching codebases - - Tools: read, fs_search, sem_search, fetch - **debug**: Specialized in debugging issues - Tools: read, shell, fs_search, sem_search, fetch +- **sage**: Specialized in researching codebases + - Tools: read, fs_search, sem_search, fetch When using the task tool, you must specify a agent_id parameter to select which agent type to use. diff --git a/crates/forge_app/src/tool_registry.rs b/crates/forge_app/src/tool_registry.rs index 0db7741760..b923528059 100644 --- a/crates/forge_app/src/tool_registry.rs +++ b/crates/forge_app/src/tool_registry.rs @@ -291,6 +291,8 @@ impl ToolRegistry { use crate::TemplateEngine; let handlebars = TemplateEngine::handlebar_instance(); + let mut agents = agents; + agents.sort_by(|left, right| left.id.as_str().cmp(right.id.as_str())); // Build tool_names map from all available tools let tool_names: Map = ToolCatalog::iter() @@ -710,6 +712,41 @@ mod tests { ); assert!(actual.iter().all(|t| t.name.as_str() != "sem_search")); } + + #[test] + fn test_task_tool_description_is_stable_across_agent_order() { + use fake::{Fake, Faker}; + let env: Environment = Faker.fake(); + let template_config = TemplateConfig::default(); + let agents = create_test_agents(); + let mut reversed_agents = agents.clone(); + reversed_agents.reverse(); + + let fixture = + ToolRegistry::<()>::get_system_tools(true, &env, None, agents, &template_config); + let actual = ToolRegistry::<()>::get_system_tools( + true, + &env, + None, + reversed_agents, + &template_config, + ); + + let expected = fixture + .iter() + .find(|tool| tool.name.as_str() == "task") + .expect("Task tool should exist") + .description + .clone(); + let actual = actual + .iter() + .find(|tool| tool.name.as_str() == "task") + .expect("Task tool should exist") + .description + .clone(); + + assert_eq!(actual, expected); + } } #[cfg(test)] diff --git a/crates/forge_repo/src/skill.rs b/crates/forge_repo/src/skill.rs index 50b0b3a83f..7a5aa71208 100644 --- a/crates/forge_repo/src/skill.rs +++ b/crates/forge_repo/src/skill.rs @@ -1,3 +1,4 @@ +use std::path::{Path, PathBuf}; use std::sync::Arc; use anyhow::Context; @@ -108,7 +109,7 @@ impl SkillR .map(|skill| self.render_skill(skill, &env)) .collect::>(); - Ok(rendered_skills) + Ok(sort_skills(rendered_skills)) } } @@ -131,7 +132,7 @@ impl ForgeS .with_context(|| format!("Failed to list directory: {}", dir.display()))?; // Filter for directories only (entries that end with '/') - let subdirs: Vec<_> = entries + let mut subdirs: Vec<_> = entries .into_iter() .filter_map(|walked| { if walked.is_dir() && !walked.path.is_empty() { @@ -142,6 +143,7 @@ impl ForgeS } }) .collect(); + sort_paths(&mut subdirs); // Read SKILL.md from each subdirectory in parallel let futures = subdirs.into_iter().map(|subdir| { @@ -163,7 +165,7 @@ impl ForgeS // Get all resource files in the skill directory recursively let walker = Walker::unlimited().cwd(subdir.clone()); - let resources = infra + let mut resources = infra .walk(walker) .await .unwrap_or_default() @@ -182,6 +184,7 @@ impl ForgeS } }) .collect::>(); + sort_paths(&mut resources); // Try to extract skill from front matter, otherwise create with // directory name @@ -303,6 +306,30 @@ fn resolve_skill_conflicts(skills: Vec) -> Vec { result } +fn sort_skills(mut skills: Vec) -> Vec { + for skill in &mut skills { + sort_paths(&mut skill.resources); + } + + skills.sort_by(|a, b| { + a.name + .cmp(&b.name) + .then_with(|| path_sort_key(a.path.as_deref()).cmp(&path_sort_key(b.path.as_deref()))) + .then_with(|| a.description.cmp(&b.description)) + }); + + skills +} + +fn sort_paths(paths: &mut [PathBuf]) { + paths.sort_by(|a, b| path_sort_key(Some(a.as_path())).cmp(&path_sort_key(Some(b.as_path())))); +} + +fn path_sort_key(path: Option<&Path>) -> String { + path.map(|path| path.to_string_lossy().into_owned()) + .unwrap_or_default() +} + #[cfg(test)] mod tests { use forge_config::ForgeConfig; @@ -343,6 +370,35 @@ mod tests { assert_eq!(actual[1].name, "skill2"); } + #[test] + fn test_sort_skills_orders_names_and_resources() { + // Fixture + let fixture = vec![ + Skill::new("zeta", "prompt", "desc") + .path("/tmp/zeta/SKILL.md") + .resources(vec![ + PathBuf::from("/tmp/zeta/b.txt"), + PathBuf::from("/tmp/zeta/a.txt"), + ]), + Skill::new("alpha", "prompt", "desc").path("/tmp/alpha/SKILL.md"), + ]; + + // Act + let actual = sort_skills(fixture); + + // Assert + let expected = vec![ + Skill::new("alpha", "prompt", "desc").path("/tmp/alpha/SKILL.md"), + Skill::new("zeta", "prompt", "desc") + .path("/tmp/zeta/SKILL.md") + .resources(vec![ + PathBuf::from("/tmp/zeta/a.txt"), + PathBuf::from("/tmp/zeta/b.txt"), + ]), + ]; + assert_eq!(actual, expected); + } + #[test] fn test_load_builtin_skills() { // Fixture @@ -439,6 +495,13 @@ mod tests { // Assert - should load all skills assert_eq!(actual.len(), 2); // minimal-skill, test-skill + assert_eq!( + actual + .iter() + .map(|skill| skill.name.as_str()) + .collect::>(), + vec!["minimal-skill", "test-skill"] + ); // Verify skill with no resources let minimal_skill = actual.iter().find(|s| s.name == "minimal-skill").unwrap(); @@ -448,25 +511,21 @@ mod tests { let test_skill = actual.iter().find(|s| s.name == "test-skill").unwrap(); assert_eq!(test_skill.description, "A test skill with resources"); assert_eq!(test_skill.resources.len(), 3); // file_1.txt, foo/file_2.txt, foo/bar/file_3.txt - - // Verify nested directory structure is captured - assert!( - test_skill - .resources - .iter() - .any(|p| p.ends_with("file_1.txt")) - ); - assert!( - test_skill - .resources - .iter() - .any(|p| p.ends_with("foo/file_2.txt")) - ); - assert!( + assert_eq!( test_skill .resources .iter() - .any(|p| p.ends_with("foo/bar/file_3.txt")) + .map(|path| path + .strip_prefix(&skill_dir) + .unwrap() + .to_string_lossy() + .to_string()) + .collect::>(), + vec![ + "test-skill/file_1.txt".to_string(), + "test-skill/foo/bar/file_3.txt".to_string(), + "test-skill/foo/file_2.txt".to_string(), + ] ); // Ensure SKILL.md is never included in resources From d0f6cc8bb2eee3c6daa7105a04ab98742034c9af Mon Sep 17 00:00:00 2001 From: "autofix-ci[bot]" <114827586+autofix-ci[bot]@users.noreply.github.com> Date: Mon, 20 Apr 2026 11:36:05 +0000 Subject: [PATCH 2/3] [autofix.ci] apply automated fixes --- .../src/dto/anthropic/transforms/set_cache.rs | 14 ++++++-------- crates/forge_repo/src/skill.rs | 2 +- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/crates/forge_app/src/dto/anthropic/transforms/set_cache.rs b/crates/forge_app/src/dto/anthropic/transforms/set_cache.rs index 8bcccbb111..59b48a4d7d 100644 --- a/crates/forge_app/src/dto/anthropic/transforms/set_cache.rs +++ b/crates/forge_app/src/dto/anthropic/transforms/set_cache.rs @@ -3,9 +3,10 @@ use forge_domain::Transformer; use crate::dto::anthropic::Request; /// Transformer that keeps Anthropic prompt-cache markers stable: -/// - Always caches every system message so the static system prefix remains reusable -/// - Falls back to caching the first conversation message when there is no system -/// prompt so single-turn requests still establish a reusable prefix +/// - Always caches every system message so the static system prefix remains +/// reusable +/// - Falls back to caching the first conversation message when there is no +/// system prompt so single-turn requests still establish a reusable prefix /// - Uses exactly one rolling message-level marker on the newest message pub struct SetCache; @@ -39,11 +40,10 @@ impl Transformer for SetCache { *message = std::mem::take(message).cached(false); } - if !has_system_prompt && len > 0 { - if let Some(first_message) = request.get_messages_mut().first_mut() { + if !has_system_prompt && len > 0 + && let Some(first_message) = request.get_messages_mut().first_mut() { *first_message = std::mem::take(first_message).cached(true); } - } if let Some(message) = request.get_messages_mut().last_mut() { *message = std::mem::take(message).cached(true); @@ -262,6 +262,4 @@ mod tests { assert_eq!(actual, expected); assert!(request.get_messages()[0].is_cached()); } - } - diff --git a/crates/forge_repo/src/skill.rs b/crates/forge_repo/src/skill.rs index 7a5aa71208..1b598b0fb6 100644 --- a/crates/forge_repo/src/skill.rs +++ b/crates/forge_repo/src/skill.rs @@ -322,7 +322,7 @@ fn sort_skills(mut skills: Vec) -> Vec { } fn sort_paths(paths: &mut [PathBuf]) { - paths.sort_by(|a, b| path_sort_key(Some(a.as_path())).cmp(&path_sort_key(Some(b.as_path())))); + paths.sort_by_key(|a| path_sort_key(Some(a.as_path()))); } fn path_sort_key(path: Option<&Path>) -> String { From b90c71c9ba9a6d010f102ded41ada3618f0b4773 Mon Sep 17 00:00:00 2001 From: "autofix-ci[bot]" <114827586+autofix-ci[bot]@users.noreply.github.com> Date: Mon, 20 Apr 2026 11:37:54 +0000 Subject: [PATCH 3/3] [autofix.ci] apply automated fixes (attempt 2/3) --- .../src/dto/anthropic/transforms/set_cache.rs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/crates/forge_app/src/dto/anthropic/transforms/set_cache.rs b/crates/forge_app/src/dto/anthropic/transforms/set_cache.rs index 59b48a4d7d..0f1598bec7 100644 --- a/crates/forge_app/src/dto/anthropic/transforms/set_cache.rs +++ b/crates/forge_app/src/dto/anthropic/transforms/set_cache.rs @@ -40,10 +40,12 @@ impl Transformer for SetCache { *message = std::mem::take(message).cached(false); } - if !has_system_prompt && len > 0 - && let Some(first_message) = request.get_messages_mut().first_mut() { - *first_message = std::mem::take(first_message).cached(true); - } + if !has_system_prompt + && len > 0 + && let Some(first_message) = request.get_messages_mut().first_mut() + { + *first_message = std::mem::take(first_message).cached(true); + } if let Some(message) = request.get_messages_mut().last_mut() { *message = std::mem::take(message).cached(true);