diff --git a/crates/forge_app/src/dto/anthropic/transforms/set_cache.rs b/crates/forge_app/src/dto/anthropic/transforms/set_cache.rs index 7380824cb4..0f1598bec7 100644 --- a/crates/forge_app/src/dto/anthropic/transforms/set_cache.rs +++ b/crates/forge_app/src/dto/anthropic/transforms/set_cache.rs @@ -2,20 +2,21 @@ use forge_domain::Transformer; use crate::dto::anthropic::Request; -/// Transformer that implements a simple two-breakpoint cache strategy: -/// - Always caches the first message in the conversation -/// - Always caches the last message in the conversation -/// - Removes cache control from the second-to-last message +/// Transformer that keeps Anthropic prompt-cache markers stable: +/// - Always caches every system message so the static system prefix remains +/// reusable +/// - Falls back to caching the first conversation message when there is no +/// system prompt so single-turn requests still establish a reusable prefix +/// - Uses exactly one rolling message-level marker on the newest message pub struct SetCache; impl Transformer for SetCache { type Value = Request; - /// Implements a simple two-breakpoint cache strategy: - /// 1. Cache the first system message as it should be static. - /// 2. Cache the last message (index messages.len() - 1) - /// 3. Remove cache control from second-to-last message (index - /// messages.len() - 2) + /// Applies the default Anthropic cache strategy: + /// 1. Cache every system message when present, otherwise cache the first + /// conversation message. + /// 2. Cache only the last message as the rolling message-level marker. fn transform(&mut self, mut request: Self::Value) -> Self::Value { let len = request.get_messages().len(); let sys_len = request.system.as_ref().map_or(0, |msgs| msgs.len()); @@ -24,21 +25,28 @@ impl Transformer for SetCache { return request; } - // Cache the very first system message, ideally you should keep static content - // in it. - if let Some(system_messages) = request.system.as_mut() - && let Some(first_message) = system_messages.first_mut() + let has_system_prompt = request + .system + .as_ref() + .is_some_and(|messages| !messages.is_empty()); + + if let Some(system_messages) = request.system.as_mut() { + for message in system_messages.iter_mut() { + *message = std::mem::take(message).cached(true); + } + } + + for message in request.get_messages_mut().iter_mut() { + *message = std::mem::take(message).cached(false); + } + + if !has_system_prompt + && len > 0 + && let Some(first_message) = request.get_messages_mut().first_mut() { *first_message = std::mem::take(first_message).cached(true); - } else { - // If no system messages, we can still cache the first message in the - // conversation. - if let Some(first_message) = request.get_messages_mut().first_mut() { - *first_message = std::mem::take(first_message).cached(true); - } } - // Add cache control to last message (if different from first) if let Some(message) = request.get_messages_mut().last_mut() { *message = std::mem::take(message).cached(true); } @@ -161,28 +169,28 @@ mod tests { } #[test] - fn test_three_messages_only_last_cached() { + fn test_three_messages_cache_first_and_last_only() { let actual = create_test_context("uau"); let expected = "[ua[u"; assert_eq!(actual, expected); } #[test] - fn test_four_messages_only_last_cached() { + fn test_four_messages_cache_first_and_last_only() { let actual = create_test_context("uaua"); let expected = "[uau[a"; assert_eq!(actual, expected); } #[test] - fn test_five_messages_only_last_cached() { + fn test_five_messages_cache_first_and_last_only() { let actual = create_test_context("uauau"); let expected = "[uaua[u"; assert_eq!(actual, expected); } #[test] - fn test_longer_conversation() { + fn test_longer_conversation_caches_first_and_last_only() { let actual = create_test_context("uauauauaua"); let expected = "[uauauauau[a"; assert_eq!(actual, expected); @@ -217,10 +225,8 @@ mod tests { } #[test] - fn test_multiple_system_messages_only_first_cached() { - // This test assumes multiple system messages are possible, but only first is - // cached - let context = Context { + fn test_multiple_system_messages_all_cached() { + let fixture = Context { conversation_id: None, messages: vec![ ContextMessage::Text(TextMessage::new(Role::System, "first")).into(), @@ -243,16 +249,19 @@ mod tests { initiator: None, }; - let request = Request::try_from(context).expect("Failed to convert context to request"); + let request = Request::try_from(fixture).expect("Failed to convert context to request"); let mut transformer = SetCache; let request = transformer.transform(request); - // Check that only first system message is cached - let system_messages = request.system.as_ref().unwrap(); - assert_eq!(system_messages[0].is_cached(), true); - assert_eq!(system_messages[1].is_cached(), false); - - // Check that last conversation message is cached - assert_eq!(request.get_messages().last().unwrap().is_cached(), true); + let expected = vec![true, true]; + let actual = request + .system + .as_ref() + .unwrap() + .iter() + .map(|message| message.is_cached()) + .collect::>(); + assert_eq!(actual, expected); + assert!(request.get_messages()[0].is_cached()); } } diff --git a/crates/forge_app/src/snapshots/forge_app__tool_registry__all_rendered_tool_descriptions.snap b/crates/forge_app/src/snapshots/forge_app__tool_registry__all_rendered_tool_descriptions.snap index fc39e6fa12..c3bebe419e 100644 --- a/crates/forge_app/src/snapshots/forge_app__tool_registry__all_rendered_tool_descriptions.snap +++ b/crates/forge_app/src/snapshots/forge_app__tool_registry__all_rendered_tool_descriptions.snap @@ -449,10 +449,10 @@ Launch a new agent to handle complex, multi-step tasks autonomously. The task tool launches specialized agents (subprocesses) that autonomously handle complex tasks. Each agent type has specific capabilities and tools available to it. Available agent types and the tools they have access to: -- **sage**: Specialized in researching codebases - - Tools: read, fs_search, sem_search, fetch - **debug**: Specialized in debugging issues - Tools: read, shell, fs_search, sem_search, fetch +- **sage**: Specialized in researching codebases + - Tools: read, fs_search, sem_search, fetch When using the task tool, you must specify a agent_id parameter to select which agent type to use. diff --git a/crates/forge_app/src/tool_registry.rs b/crates/forge_app/src/tool_registry.rs index 0db7741760..b923528059 100644 --- a/crates/forge_app/src/tool_registry.rs +++ b/crates/forge_app/src/tool_registry.rs @@ -291,6 +291,8 @@ impl ToolRegistry { use crate::TemplateEngine; let handlebars = TemplateEngine::handlebar_instance(); + let mut agents = agents; + agents.sort_by(|left, right| left.id.as_str().cmp(right.id.as_str())); // Build tool_names map from all available tools let tool_names: Map = ToolCatalog::iter() @@ -710,6 +712,41 @@ mod tests { ); assert!(actual.iter().all(|t| t.name.as_str() != "sem_search")); } + + #[test] + fn test_task_tool_description_is_stable_across_agent_order() { + use fake::{Fake, Faker}; + let env: Environment = Faker.fake(); + let template_config = TemplateConfig::default(); + let agents = create_test_agents(); + let mut reversed_agents = agents.clone(); + reversed_agents.reverse(); + + let fixture = + ToolRegistry::<()>::get_system_tools(true, &env, None, agents, &template_config); + let actual = ToolRegistry::<()>::get_system_tools( + true, + &env, + None, + reversed_agents, + &template_config, + ); + + let expected = fixture + .iter() + .find(|tool| tool.name.as_str() == "task") + .expect("Task tool should exist") + .description + .clone(); + let actual = actual + .iter() + .find(|tool| tool.name.as_str() == "task") + .expect("Task tool should exist") + .description + .clone(); + + assert_eq!(actual, expected); + } } #[cfg(test)] diff --git a/crates/forge_repo/src/skill.rs b/crates/forge_repo/src/skill.rs index 50b0b3a83f..1b598b0fb6 100644 --- a/crates/forge_repo/src/skill.rs +++ b/crates/forge_repo/src/skill.rs @@ -1,3 +1,4 @@ +use std::path::{Path, PathBuf}; use std::sync::Arc; use anyhow::Context; @@ -108,7 +109,7 @@ impl SkillR .map(|skill| self.render_skill(skill, &env)) .collect::>(); - Ok(rendered_skills) + Ok(sort_skills(rendered_skills)) } } @@ -131,7 +132,7 @@ impl ForgeS .with_context(|| format!("Failed to list directory: {}", dir.display()))?; // Filter for directories only (entries that end with '/') - let subdirs: Vec<_> = entries + let mut subdirs: Vec<_> = entries .into_iter() .filter_map(|walked| { if walked.is_dir() && !walked.path.is_empty() { @@ -142,6 +143,7 @@ impl ForgeS } }) .collect(); + sort_paths(&mut subdirs); // Read SKILL.md from each subdirectory in parallel let futures = subdirs.into_iter().map(|subdir| { @@ -163,7 +165,7 @@ impl ForgeS // Get all resource files in the skill directory recursively let walker = Walker::unlimited().cwd(subdir.clone()); - let resources = infra + let mut resources = infra .walk(walker) .await .unwrap_or_default() @@ -182,6 +184,7 @@ impl ForgeS } }) .collect::>(); + sort_paths(&mut resources); // Try to extract skill from front matter, otherwise create with // directory name @@ -303,6 +306,30 @@ fn resolve_skill_conflicts(skills: Vec) -> Vec { result } +fn sort_skills(mut skills: Vec) -> Vec { + for skill in &mut skills { + sort_paths(&mut skill.resources); + } + + skills.sort_by(|a, b| { + a.name + .cmp(&b.name) + .then_with(|| path_sort_key(a.path.as_deref()).cmp(&path_sort_key(b.path.as_deref()))) + .then_with(|| a.description.cmp(&b.description)) + }); + + skills +} + +fn sort_paths(paths: &mut [PathBuf]) { + paths.sort_by_key(|a| path_sort_key(Some(a.as_path()))); +} + +fn path_sort_key(path: Option<&Path>) -> String { + path.map(|path| path.to_string_lossy().into_owned()) + .unwrap_or_default() +} + #[cfg(test)] mod tests { use forge_config::ForgeConfig; @@ -343,6 +370,35 @@ mod tests { assert_eq!(actual[1].name, "skill2"); } + #[test] + fn test_sort_skills_orders_names_and_resources() { + // Fixture + let fixture = vec![ + Skill::new("zeta", "prompt", "desc") + .path("/tmp/zeta/SKILL.md") + .resources(vec![ + PathBuf::from("/tmp/zeta/b.txt"), + PathBuf::from("/tmp/zeta/a.txt"), + ]), + Skill::new("alpha", "prompt", "desc").path("/tmp/alpha/SKILL.md"), + ]; + + // Act + let actual = sort_skills(fixture); + + // Assert + let expected = vec![ + Skill::new("alpha", "prompt", "desc").path("/tmp/alpha/SKILL.md"), + Skill::new("zeta", "prompt", "desc") + .path("/tmp/zeta/SKILL.md") + .resources(vec![ + PathBuf::from("/tmp/zeta/a.txt"), + PathBuf::from("/tmp/zeta/b.txt"), + ]), + ]; + assert_eq!(actual, expected); + } + #[test] fn test_load_builtin_skills() { // Fixture @@ -439,6 +495,13 @@ mod tests { // Assert - should load all skills assert_eq!(actual.len(), 2); // minimal-skill, test-skill + assert_eq!( + actual + .iter() + .map(|skill| skill.name.as_str()) + .collect::>(), + vec!["minimal-skill", "test-skill"] + ); // Verify skill with no resources let minimal_skill = actual.iter().find(|s| s.name == "minimal-skill").unwrap(); @@ -448,25 +511,21 @@ mod tests { let test_skill = actual.iter().find(|s| s.name == "test-skill").unwrap(); assert_eq!(test_skill.description, "A test skill with resources"); assert_eq!(test_skill.resources.len(), 3); // file_1.txt, foo/file_2.txt, foo/bar/file_3.txt - - // Verify nested directory structure is captured - assert!( - test_skill - .resources - .iter() - .any(|p| p.ends_with("file_1.txt")) - ); - assert!( - test_skill - .resources - .iter() - .any(|p| p.ends_with("foo/file_2.txt")) - ); - assert!( + assert_eq!( test_skill .resources .iter() - .any(|p| p.ends_with("foo/bar/file_3.txt")) + .map(|path| path + .strip_prefix(&skill_dir) + .unwrap() + .to_string_lossy() + .to_string()) + .collect::>(), + vec![ + "test-skill/file_1.txt".to_string(), + "test-skill/foo/bar/file_3.txt".to_string(), + "test-skill/foo/file_2.txt".to_string(), + ] ); // Ensure SKILL.md is never included in resources