From 8c32786046497f12436ef780bd676be7dec5352d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mateusz=20S=C3=B3jka?= <mati@fastmail.com>
Date: Thu, 13 Nov 2025 06:39:23 +0100
Subject: [PATCH 1/3] Fix llama3.1 generating a non-conforming tool payload

---
 src/bin/qa.rs | 107 +++++++++++++++++++++++++++++++++++++++-----------
 src/prompt.rs |   2 +-
 2 files changed, 84 insertions(+), 25 deletions(-)
diff --git a/src/bin/qa.rs b/src/bin/qa.rs
index 78b8ac2..b008fe4 100644
--- a/src/bin/qa.rs
+++ b/src/bin/qa.rs
@@ -321,6 +321,9 @@ async fn main() -> Result<()> {
             }
         }
         AssistantReply::Content(assistant) => {
+            if cli.debug {
+                eprintln!("[debug] assistant raw: {}", assistant);
+            }
             // Try to parse as a tool call per our plain-JSON protocol; else print the answer.
             match parse_tool_call(assistant.trim()) {
                 Ok(call) => match call {
@@ -528,36 +531,42 @@ async fn execute_tool_call(
     cfg: &mut Config,
     cfg_path: &Path,
 ) -> Result<bool> {
+    if debug {
+        eprintln!("[debug] tool call '{}' args: {}", name, arguments_json);
+    }
     let mut current_name = name.to_string();
     let mut current_args = arguments_json.to_string();
 
     loop {
-        match current_name.as_str() {
-            "read_file" => {
-                let args: qqqa::tools::read_file::Args = serde_json::from_str(&current_args)
-                    .map_err(|e| anyhow!("Failed to parse read_file args: {}", e))?;
-                match qqqa::tools::read_file::run(args) {
-                    Ok(content) => print_tool_result("read_file", &content),
-                    Err(e) => print_tool_error("read_file", &e.to_string()),
-                }
-                return Ok(true);
+    match current_name.as_str() {
+        "read_file" => {
+            let normalized = normalize_tool_arguments(&current_args)?;
+            let args: qqqa::tools::read_file::Args = serde_json::from_str(&normalized)
+                .map_err(|e| anyhow!("Failed to parse read_file args: {}", e))?;
+            match qqqa::tools::read_file::run(args) {
+                Ok(content) => print_tool_result("read_file", &content),
+                Err(e) => print_tool_error("read_file", &e.to_string()),
             }
-            "write_file" => {
-                let args: qqqa::tools::write_file::Args = serde_json::from_str(&current_args)
-                    .map_err(|e| anyhow!("Failed to parse write_file args: {}", e))?;
-                match qqqa::tools::write_file::run(args) {
-                    Ok(summary) => print_tool_result("write_file", &summary),
-                    Err(e) => print_tool_error("write_file", &e.to_string()),
-                }
-                return Ok(true);
+            return Ok(true);
+        }
+        "write_file" => {
+            let normalized = normalize_tool_arguments(&current_args)?;
+            let args: qqqa::tools::write_file::Args = serde_json::from_str(&normalized)
+                .map_err(|e| anyhow!("Failed to parse write_file args: {}", e))?;
+            match qqqa::tools::write_file::run(args) {
+                Ok(summary) => print_tool_result("write_file", &summary),
+                Err(e) => print_tool_error("write_file", &e.to_string()),
             }
-            "execute_command" => {
-                let args: qqqa::tools::execute_command::Args = serde_json::from_str(&current_args)
-                    .map_err(|e| anyhow!("Failed to parse execute_command args: {}", e))?;
-                match run_execute_command_with_allowlist(
-                    args, auto_yes, debug, shell, cfg, cfg_path,
-                )
-                .await
+            return Ok(true);
+        }
+        "execute_command" => {
+            let normalized = normalize_tool_arguments(&current_args)?;
+            let args: qqqa::tools::execute_command::Args = serde_json::from_str(&normalized)
+                .map_err(|e| anyhow!("Failed to parse execute_command args: {}", e))?;
+            match run_execute_command_with_allowlist(
+                args, auto_yes, debug, shell, cfg, cfg_path,
+            )
+            .await
                 {
                     Ok(result) => print_execute_command_result(&result, debug),
                     Err(e) => print_tool_error("execute_command", &e.to_string()),
@@ -593,6 +602,27 @@ fn print_tool_error(tool: &str, err: &str) {
     println!("[tool:{}:error] {}", tool, err);
 }
 
+fn normalize_tool_arguments(raw: &str) -> Result<String> {
+    // If the raw blob already matches our expected schema, leave it alone.
+    // Otherwise, detect legacy wrappers of the shape {"tool": ..., "arguments": {...}}
+    // and unwrap them so serde can deserialize into the tool arg structs.
+    #[derive(serde::Deserialize)]
+    struct LegacyWrapper {
+        tool: Option<String>,
+        arguments: serde_json::Value,
+    }
+
+    let trimmed = raw.trim_start();
+    if !trimmed.starts_with('{') {
+        return Ok(raw.to_string());
+    }
+
+    match serde_json::from_str::<LegacyWrapper>(trimmed) {
+        Ok(wrapper) if wrapper.tool.is_some() => Ok(wrapper.arguments.to_string()),
+        _ => Ok(raw.to_string()),
+    }
+}
+
 fn print_execute_command_result(result: &ExecuteCommandResult, debug: bool) {
     if let Some(msg) = format_execute_command_result(result, debug) {
         print_tool_result("execute_command", &msg);
@@ -785,4 +815,33 @@ mod tests {
         assert!(msg.contains("Exit code: 1"));
         assert!(msg.contains("--- stderr ---"));
     }
+
+    #[test]
+    fn normalize_tool_arguments_unwraps_legacy_wrapper() {
+        let raw = r#"{"tool":"execute_command","arguments":{"command":"ls"}}"#;
+        let normalized = normalize_tool_arguments(raw).expect("normalize");
+        assert_eq!(normalized, "{\"command\":\"ls\"}");
+    }
+
+    #[test]
+    fn normalize_tool_arguments_leaves_openai_function_call_intact() {
+        // This is the standard payload we get from OpenAI-style function calls.
+        let raw = r#"{"command":"ls -al","cwd":"."}"#;
+        let normalized = normalize_tool_arguments(raw).expect("normalize");
+        assert_eq!(normalized, raw);
+    }
+
+    #[test]
+    fn normalize_tool_arguments_passthrough_for_standard_schema() {
+        let raw = r#"{\"command\":\"ls\",\"cwd\":\".\"}"#;
+        let normalized = normalize_tool_arguments(raw).expect("normalize");
+        assert_eq!(normalized, raw);
+    }
+
+    #[test]
+    fn normalize_tool_arguments_handles_non_json_gracefully() {
+        let raw = "not-json";
+        let normalized = normalize_tool_arguments(raw).expect("normalize");
+        assert_eq!(normalized, raw);
+    }
 }
diff --git a/src/prompt.rs b/src/prompt.rs
index e12be95..e4b4230 100644
--- a/src/prompt.rs
+++ b/src/prompt.rs
@@ -203,7 +203,7 @@ pub fn build_qa_system_prompt() -> String {
     s.push_str("- execute_command: { \"command\": string, \"cwd?\": string }\n\n");
     s.push_str("Rules:\n");
     s.push_str("- Single step: at most one tool call.\n");
-    s.push_str("- If using a tool, return ONLY the JSON object (no prose).\n");
+    s.push_str("- If using a tool, return ONLY the JSON object (no prose) and fill the function parameters exactly as declared (no extra wrapper objects).\n");
     s.push_str("- Prefer safe, non-destructive commands.\n");
     s
 }

From 7a0bc4b0d6359e0a62dc28bd8e49cbea44805a85 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mateusz=20S=C3=B3jka?= <mati@fastmail.com>
Date: Thu, 13 Nov 2025 06:43:03 +0100
Subject: [PATCH 2/3] Remove redundant line

---
 src/bin/qa.rs | 27 +++------------------------
 1 file changed, 3 insertions(+), 24 deletions(-)

diff --git a/src/bin/qa.rs b/src/bin/qa.rs
index b008fe4..d559216 100644
--- a/src/bin/qa.rs
+++ b/src/bin/qa.rs
@@ -633,20 +633,9 @@ fn format_execute_command_result(
     result: &ExecuteCommandResult,
     debug: bool,
 ) -> Option<String> {
+    let _ = debug;
     if result.streamed_live {
-        if debug {
-            let exit_line = result
-                .summary
-                .lines()
-                .next()
-                .unwrap_or("Exit code: (unknown)");
-            Some(format!(
-                "{}\nOutput streamed above; not repeating stdout/stderr.",
-                exit_line
-            ))
-        } else {
-            None
-        }
+        None
     } else {
         Some(result.summary.trim_end().to_string())
     }
@@ -792,17 +781,7 @@ mod tests {
             streamed_live: true,
         };
         assert!(format_execute_command_result(&result, false).is_none());
-    }
-
-    #[test]
-    fn format_execute_command_result_emits_condensed_when_debug() {
-        let result = ExecuteCommandResult {
-            summary: "Exit code: 0\n--- stdout ---\nok\n--- stderr ---\n".into(),
-            streamed_live: true,
-        };
-        let msg = format_execute_command_result(&result, true).expect("should emit");
-        assert!(msg.contains("Exit code: 0"));
-        assert!(msg.contains("Output streamed above"));
+        assert!(format_execute_command_result(&result, true).is_none());
     }
 
     #[test]

From 3aa3e17722038b4a72745aa07cecd94ba52a21fc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mateusz=20S=C3=B3jka?= <mati@fastmail.com>
Date: Thu, 13 Nov 2025 06:57:10 +0100
Subject: [PATCH 3/3] Improve prompt to stabilize qa tool calling

---
 src/prompt.rs | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/prompt.rs b/src/prompt.rs
index e4b4230..3444314 100644
--- a/src/prompt.rs
+++ b/src/prompt.rs
@@ -93,7 +93,7 @@ pub fn build_qq_prompt(
 /// System prompt for `qq`: restrict topics to terminal/dev and enforce XML-tag formatting.
 pub fn build_qq_system_prompt() -> String {
     let mut s = String::new();
-    s.push_str("Provide a quick answer / no thinking.\n");
+    s.push_str("Balance quick answer with just enough thinking.\n");
     s.push_str("You are a terminal assistant. Help users ONLY with command-line, programming, system administration, and technical computing tasks.\n\n");
     s.push_str("TOPIC RESTRICTIONS:\n");
     s.push_str("- ONLY answer questions about: terminal commands, shell scripting, file operations, system administration, programming, development tools, git, network tools, text processing, etc.\n");
@@ -193,9 +193,9 @@ pub fn build_qq_user_message(
 /// The CLI enforces a single tool call and will not loop.
 pub fn build_qa_system_prompt() -> String {
     let mut s = String::new();
-    s.push_str("Provide a quick answer / no thinking.\n");
+    s.push_str("Balance quick answer with just enough thinking.\n");
     s.push_str("You are a careful CLI agent with a single tool-call step.\n");
-    s.push_str("You may either answer directly in plain text OR request exactly one tool call by returning ONLY a JSON object with this shape:\n");
+    s.push_str("You must satisfy every user request by issuing exactly one tool call, returned as a JSON object with this shape:\n");
     s.push_str("{ \"tool\": string, \"arguments\": object }\n\n");
     s.push_str("Available tools and JSON argument schemas:\n");
     s.push_str("- read_file: { \"path\": string }\n");
@@ -203,6 +203,7 @@ pub fn build_qa_system_prompt() -> String {
     s.push_str("- execute_command: { \"command\": string, \"cwd?\": string }\n\n");
     s.push_str("Rules:\n");
     s.push_str("- Single step: at most one tool call.\n");
+    s.push_str("- When the user asks you to run or inspect something, you must call the appropriate tool (usually execute_command) to gather real output; if running a command would be unsafe or impossible, call the `json` tool and explain why instead of replying as plain text.\n");
     s.push_str("- If using a tool, return ONLY the JSON object (no prose) and fill the function parameters exactly as declared (no extra wrapper objects).\n");
     s.push_str("- Prefer safe, non-destructive commands.\n");
     s