From 8c32786046497f12436ef780bd676be7dec5352d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20S=C3=B3jka?= Date: Thu, 13 Nov 2025 06:39:23 +0100 Subject: [PATCH 1/3] Fix llama3.1 generating a non-conforming tool payload --- src/bin/qa.rs | 107 +++++++++++++++++++++++++++++++++++++++----------- src/prompt.rs | 2 +- 2 files changed, 84 insertions(+), 25 deletions(-) diff --git a/src/bin/qa.rs b/src/bin/qa.rs index 78b8ac2..b008fe4 100644 --- a/src/bin/qa.rs +++ b/src/bin/qa.rs @@ -321,6 +321,9 @@ async fn main() -> Result<()> { } } AssistantReply::Content(assistant) => { + if cli.debug { + eprintln!("[debug] assistant raw: {}", assistant); + } // Try to parse as a tool call per our plain-JSON protocol; else print the answer. match parse_tool_call(assistant.trim()) { Ok(call) => match call { @@ -528,36 +531,42 @@ async fn execute_tool_call( cfg: &mut Config, cfg_path: &Path, ) -> Result { + if debug { + eprintln!("[debug] tool call '{}' args: {}", name, arguments_json); + } let mut current_name = name.to_string(); let mut current_args = arguments_json.to_string(); loop { - match current_name.as_str() { - "read_file" => { - let args: qqqa::tools::read_file::Args = serde_json::from_str(¤t_args) - .map_err(|e| anyhow!("Failed to parse read_file args: {}", e))?; - match qqqa::tools::read_file::run(args) { - Ok(content) => print_tool_result("read_file", &content), - Err(e) => print_tool_error("read_file", &e.to_string()), - } - return Ok(true); + match current_name.as_str() { + "read_file" => { + let normalized = normalize_tool_arguments(¤t_args)?; + let args: qqqa::tools::read_file::Args = serde_json::from_str(&normalized) + .map_err(|e| anyhow!("Failed to parse read_file args: {}", e))?; + match qqqa::tools::read_file::run(args) { + Ok(content) => print_tool_result("read_file", &content), + Err(e) => print_tool_error("read_file", &e.to_string()), } - "write_file" => { - let args: qqqa::tools::write_file::Args = serde_json::from_str(¤t_args) - .map_err(|e| anyhow!("Failed to parse write_file args: {}", e))?; - match qqqa::tools::write_file::run(args) { - Ok(summary) => print_tool_result("write_file", &summary), - Err(e) => print_tool_error("write_file", &e.to_string()), - } - return Ok(true); + return Ok(true); + } + "write_file" => { + let normalized = normalize_tool_arguments(¤t_args)?; + let args: qqqa::tools::write_file::Args = serde_json::from_str(&normalized) + .map_err(|e| anyhow!("Failed to parse write_file args: {}", e))?; + match qqqa::tools::write_file::run(args) { + Ok(summary) => print_tool_result("write_file", &summary), + Err(e) => print_tool_error("write_file", &e.to_string()), } - "execute_command" => { - let args: qqqa::tools::execute_command::Args = serde_json::from_str(¤t_args) - .map_err(|e| anyhow!("Failed to parse execute_command args: {}", e))?; - match run_execute_command_with_allowlist( - args, auto_yes, debug, shell, cfg, cfg_path, - ) - .await + return Ok(true); + } + "execute_command" => { + let normalized = normalize_tool_arguments(¤t_args)?; + let args: qqqa::tools::execute_command::Args = serde_json::from_str(&normalized) + .map_err(|e| anyhow!("Failed to parse execute_command args: {}", e))?; + match run_execute_command_with_allowlist( + args, auto_yes, debug, shell, cfg, cfg_path, + ) + .await { Ok(result) => print_execute_command_result(&result, debug), Err(e) => print_tool_error("execute_command", &e.to_string()), @@ -593,6 +602,27 @@ fn print_tool_error(tool: &str, err: &str) { println!("[tool:{}:error] {}", tool, err); } +fn normalize_tool_arguments(raw: &str) -> Result { + // If the raw blob already matches our expected schema, leave it alone. + // Otherwise, detect legacy wrappers of the shape {"tool": ..., "arguments": {...}} + // and unwrap them so serde can deserialize into the tool arg structs. + #[derive(serde::Deserialize)] + struct LegacyWrapper { + tool: Option, + arguments: serde_json::Value, + } + + let trimmed = raw.trim_start(); + if !trimmed.starts_with('{') { + return Ok(raw.to_string()); + } + + match serde_json::from_str::(trimmed) { + Ok(wrapper) if wrapper.tool.is_some() => Ok(wrapper.arguments.to_string()), + _ => Ok(raw.to_string()), + } +} + fn print_execute_command_result(result: &ExecuteCommandResult, debug: bool) { if let Some(msg) = format_execute_command_result(result, debug) { print_tool_result("execute_command", &msg); @@ -785,4 +815,33 @@ mod tests { assert!(msg.contains("Exit code: 1")); assert!(msg.contains("--- stderr ---")); } + + #[test] + fn normalize_tool_arguments_unwraps_legacy_wrapper() { + let raw = r#"{"tool":"execute_command","arguments":{"command":"ls"}}"#; + let normalized = normalize_tool_arguments(raw).expect("normalize"); + assert_eq!(normalized, "{\"command\":\"ls\"}"); + } + + #[test] + fn normalize_tool_arguments_leaves_openai_function_call_intact() { + // This is the standard payload we get from OpenAI-style function calls. + let raw = r#"{"command":"ls -al","cwd":"."}"#; + let normalized = normalize_tool_arguments(raw).expect("normalize"); + assert_eq!(normalized, raw); + } + + #[test] + fn normalize_tool_arguments_passthrough_for_standard_schema() { + let raw = r#"{\"command\":\"ls\",\"cwd\":\".\"}"#; + let normalized = normalize_tool_arguments(raw).expect("normalize"); + assert_eq!(normalized, raw); + } + + #[test] + fn normalize_tool_arguments_handles_non_json_gracefully() { + let raw = "not-json"; + let normalized = normalize_tool_arguments(raw).expect("normalize"); + assert_eq!(normalized, raw); + } } diff --git a/src/prompt.rs b/src/prompt.rs index e12be95..e4b4230 100644 --- a/src/prompt.rs +++ b/src/prompt.rs @@ -203,7 +203,7 @@ pub fn build_qa_system_prompt() -> String { s.push_str("- execute_command: { \"command\": string, \"cwd?\": string }\n\n"); s.push_str("Rules:\n"); s.push_str("- Single step: at most one tool call.\n"); - s.push_str("- If using a tool, return ONLY the JSON object (no prose).\n"); + s.push_str("- If using a tool, return ONLY the JSON object (no prose) and fill the function parameters exactly as declared (no extra wrapper objects).\n"); s.push_str("- Prefer safe, non-destructive commands.\n"); s } From 7a0bc4b0d6359e0a62dc28bd8e49cbea44805a85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20S=C3=B3jka?= Date: Thu, 13 Nov 2025 06:43:03 +0100 Subject: [PATCH 2/3] Remove redundant line --- src/bin/qa.rs | 27 +++------------------------ 1 file changed, 3 insertions(+), 24 deletions(-) diff --git a/src/bin/qa.rs b/src/bin/qa.rs index b008fe4..d559216 100644 --- a/src/bin/qa.rs +++ b/src/bin/qa.rs @@ -633,20 +633,9 @@ fn format_execute_command_result( result: &ExecuteCommandResult, debug: bool, ) -> Option { + let _ = debug; if result.streamed_live { - if debug { - let exit_line = result - .summary - .lines() - .next() - .unwrap_or("Exit code: (unknown)"); - Some(format!( - "{}\nOutput streamed above; not repeating stdout/stderr.", - exit_line - )) - } else { - None - } + None } else { Some(result.summary.trim_end().to_string()) } @@ -792,17 +781,7 @@ mod tests { streamed_live: true, }; assert!(format_execute_command_result(&result, false).is_none()); - } - - #[test] - fn format_execute_command_result_emits_condensed_when_debug() { - let result = ExecuteCommandResult { - summary: "Exit code: 0\n--- stdout ---\nok\n--- stderr ---\n".into(), - streamed_live: true, - }; - let msg = format_execute_command_result(&result, true).expect("should emit"); - assert!(msg.contains("Exit code: 0")); - assert!(msg.contains("Output streamed above")); + assert!(format_execute_command_result(&result, true).is_none()); } #[test] From 3aa3e17722038b4a72745aa07cecd94ba52a21fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20S=C3=B3jka?= Date: Thu, 13 Nov 2025 06:57:10 +0100 Subject: [PATCH 3/3] Improve prompt to stabilize qa tool calling --- src/prompt.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/prompt.rs b/src/prompt.rs index e4b4230..3444314 100644 --- a/src/prompt.rs +++ b/src/prompt.rs @@ -93,7 +93,7 @@ pub fn build_qq_prompt( /// System prompt for `qq`: restrict topics to terminal/dev and enforce XML-tag formatting. pub fn build_qq_system_prompt() -> String { let mut s = String::new(); - s.push_str("Provide a quick answer / no thinking.\n"); + s.push_str("Balance quick answer with just enough thinking.\n"); s.push_str("You are a terminal assistant. Help users ONLY with command-line, programming, system administration, and technical computing tasks.\n\n"); s.push_str("TOPIC RESTRICTIONS:\n"); s.push_str("- ONLY answer questions about: terminal commands, shell scripting, file operations, system administration, programming, development tools, git, network tools, text processing, etc.\n"); @@ -193,9 +193,9 @@ pub fn build_qq_user_message( /// The CLI enforces a single tool call and will not loop. pub fn build_qa_system_prompt() -> String { let mut s = String::new(); - s.push_str("Provide a quick answer / no thinking.\n"); + s.push_str("Balance quick answer with just enough thinking.\n"); s.push_str("You are a careful CLI agent with a single tool-call step.\n"); - s.push_str("You may either answer directly in plain text OR request exactly one tool call by returning ONLY a JSON object with this shape:\n"); + s.push_str("You must satisfy every user request by issuing exactly one tool call, returned as a JSON object with this shape:\n"); s.push_str("{ \"tool\": string, \"arguments\": object }\n\n"); s.push_str("Available tools and JSON argument schemas:\n"); s.push_str("- read_file: { \"path\": string }\n"); @@ -203,6 +203,7 @@ pub fn build_qa_system_prompt() -> String { s.push_str("- execute_command: { \"command\": string, \"cwd?\": string }\n\n"); s.push_str("Rules:\n"); s.push_str("- Single step: at most one tool call.\n"); + s.push_str("- When the user asks you to run or inspect something, you must call the appropriate tool (usually execute_command) to gather real output; if running a command would be unsafe or impossible, call the `json` tool and explain why instead of replying as plain text.\n"); s.push_str("- If using a tool, return ONLY the JSON object (no prose) and fill the function parameters exactly as declared (no extra wrapper objects).\n"); s.push_str("- Prefer safe, non-destructive commands.\n"); s