From 7a14eb83f09e9ee5fc6fb066efb376e58942abb9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20LIARD?= <ludwig@MacBook-Pro-de-Clement.local>
Date: Tue, 28 Apr 2026 23:26:35 +0200
Subject: [PATCH] fix(providers): tool-use translation correctness (3 audit
 bugs)

Surfaces silent translation failures and preserves client-supplied tool IDs
across the canonical->OpenAI / Anthropic upstream round trip.

* Bug #1 (tool_use serialization): replace `serde_json::to_string(...).unwrap_or_default()`
  in `extract_tool_calls` with proper error propagation. Adds `TransformError`
  enum (`ToolInputSerialization { tool_name, source }`) and a `From<TransformError>
  for ProviderError` bridge so existing callers continue to surface the failure
  as `ProviderError::SerializationError`.

* Bug #2 (tool ID round trip): introduces `OriginalToolIdMap` populated by
  `sanitize_tool_use_ids` before the upstream Anthropic call, and applied by
  `restore_original_tool_ids` on the response so downstream clients (e.g.
  Claude Code) can match response IDs back to their tool definitions even
  when the original ID violated Anthropic's `^[a-zA-Z0-9_-]{1,64}$` pattern.
  Map is scoped to the single provider call (no global state).

* Bug #3 (system ordering): `transform_request` now drops residual
  `role:"system"` entries from `request.messages` after hoisting the
  canonical `system` field, preventing duplicate system messages when a
  client sends `[user, system, assistant]` to the Anthropic-style endpoint
  and the request is routed to OpenAI.

Adds unit tests:
- `transform_returns_error_when_tool_input_unserializable`
- `transform_propagates_tool_input_error_to_provider_error`
- `transform_strips_system_from_messages_after_hoisting`
- `transform_strips_system_when_no_hoisted_system_field`
- `tool_use_id_round_trip_preserves_original`
- `restore_is_noop_when_map_is_empty`
- `restore_leaves_unmapped_ids_untouched`

Streaming round trip for tool IDs is left as a follow-up TODO; the common
non-streaming path is fixed.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/providers/anthropic_compatible.rs |  39 +++-
 src/providers/anthropic_sanitize.rs   | 319 +++++++++++++++++++++++++-
 src/providers/openai/transform.rs     | 314 +++++++++++++++++++++++--
 3 files changed, 638 insertions(+), 34 deletions(-)
diff --git a/src/providers/anthropic_compatible.rs b/src/providers/anthropic_compatible.rs
index b3bd3fa3..8a5376be 100644
--- a/src/providers/anthropic_compatible.rs
+++ b/src/providers/anthropic_compatible.rs
@@ -87,7 +87,8 @@ fn log_rate_limits(headers: &HashMap<String, String>, provider: &str) {
 }
 
 use super::anthropic_sanitize::{
-    sanitize_tool_use_ids, strip_all_thinking_signatures, strip_non_anthropic_thinking,
+    restore_original_tool_ids, sanitize_tool_use_ids, strip_all_thinking_signatures,
+    strip_non_anthropic_thinking, OriginalToolIdMap,
 };
 
 /// Merges server-default beta features with client-provided ones, deduplicating.
@@ -275,17 +276,22 @@ impl AnthropicCompatibleProvider {
 
     /// Common setup for both send_message and send_message_stream:
     /// builds URL, sanitizes request for Anthropic backends, resolves auth.
+    ///
+    /// Returns a tuple of `(messages_url, auth_value, is_anthropic, id_map)`.
+    /// The `id_map` carries any rewrites applied by `sanitize_tool_use_ids`
+    /// so callers can restore the originals on the response.
     async fn prepare_anthropic_request(
         &self,
         request: &mut CanonicalRequest,
-    ) -> Result<(&str, String, bool), ProviderError> {
+    ) -> Result<(&str, String, bool, OriginalToolIdMap), ProviderError> {
         let is_anthropic = self.base.base_url.contains(ANTHROPIC_DOMAIN);
+        let mut id_map = OriginalToolIdMap::new();
         if is_anthropic {
-            sanitize_tool_use_ids(request);
+            sanitize_tool_use_ids(request, &mut id_map);
             strip_non_anthropic_thinking(request);
         }
         let auth_value = self.base.resolve_auth(OAuthConfig::anthropic).await?;
-        Ok((&self.messages_url, auth_value, is_anthropic))
+        Ok((&self.messages_url, auth_value, is_anthropic, id_map))
     }
 }
 
@@ -296,9 +302,10 @@ impl LlmProvider for AnthropicCompatibleProvider {
         request: CanonicalRequest,
     ) -> Result<ProviderResponse, ProviderError> {
         let mut request = request;
-        let (url, auth_value, is_anthropic) = self.prepare_anthropic_request(&mut request).await?;
+        let (url, auth_value, is_anthropic, id_map) =
+            self.prepare_anthropic_request(&mut request).await?;
 
-        let result = self.try_send_message(url, &auth_value, &request).await;
+        let mut result = self.try_send_message(url, &auth_value, &request).await;
 
         // Fallback: if signature error, strip all signed thinking blocks and retry
         if is_anthropic {
@@ -306,11 +313,18 @@ impl LlmProvider for AnthropicCompatibleProvider {
                 if message.contains("signature") {
                     tracing::warn!("🔄 Signature error from Anthropic: {}, stripping all signed thinking blocks and retrying", message);
                     strip_all_thinking_signatures(&mut request);
-                    return self.try_send_message(url, &auth_value, &request).await;
+                    result = self.try_send_message(url, &auth_value, &request).await;
                 }
             }
         }
 
+        // Restore original tool IDs so downstream clients can map response IDs
+        // back to the IDs they sent (audit Bug #2). No-op when sanitization
+        // didn't rewrite anything.
+        if let Ok(ref mut response) = result {
+            restore_original_tool_ids(response, &id_map);
+        }
+
         result
     }
 
@@ -356,7 +370,16 @@ impl LlmProvider for AnthropicCompatibleProvider {
         use futures::stream::TryStreamExt;
 
         let mut request = request;
-        let (url, auth_value, is_anthropic) = self.prepare_anthropic_request(&mut request).await?;
+        let (url, auth_value, is_anthropic, id_map) =
+            self.prepare_anthropic_request(&mut request).await?;
+        // NOTE: Streaming responses pass through unchanged; tool ID
+        // restoration on streamed `content_block_start` events would require
+        // SSE event rewriting and is intentionally out of scope for the
+        // initial Bug #2 fix. The non-streaming path covers the common case
+        // (single response per call). Future work: see TODO at restore call
+        // site below.
+        // TODO: implement SSE-time ID rewrite using `id_map` for streaming.
+        let _ = id_map;
 
         // Try request, fallback: strip all signed thinking blocks on signature error
         let response = match self
diff --git a/src/providers/anthropic_sanitize.rs b/src/providers/anthropic_sanitize.rs
index cd33d628..32736d43 100644
--- a/src/providers/anthropic_sanitize.rs
+++ b/src/providers/anthropic_sanitize.rs
@@ -2,6 +2,55 @@
 
 use super::constants::MIN_ANTHROPIC_SIGNATURE_LENGTH;
 use crate::models::{CanonicalRequest, ContentBlock, KnownContentBlock, MessageContent};
+use std::collections::HashMap;
+
+/// Bidirectional map between sanitized (canonical) tool IDs and the originals
+/// supplied by the client.
+///
+/// Anthropic enforces `^[a-zA-Z0-9_-]{1,64}$` on tool IDs, but downstream
+/// clients (e.g. Claude Code) often track the *original* IDs they sent — IDs
+/// minted by a previous OpenAI turn may include `.`, `:`, etc. When grob
+/// rewrites those IDs before calling an Anthropic backend, the response must
+/// echo the **original** form so the client can match its tool definitions.
+///
+/// Lookup is by canonical_id (the post-sanitization form that we send
+/// upstream), returning the original_id we owe the client.
+#[derive(Debug, Default, Clone)]
+pub(super) struct OriginalToolIdMap {
+    canonical_to_original: HashMap<String, String>,
+}
+
+impl OriginalToolIdMap {
+    /// Creates an empty map.
+    pub(super) fn new() -> Self {
+        Self::default()
+    }
+
+    /// Records a sanitization edit. No-op when the canonical form equals the
+    /// original (i.e. no rewrite was needed).
+    fn record(&mut self, canonical_id: &str, original_id: &str) {
+        if canonical_id == original_id {
+            return;
+        }
+        // Last writer wins on duplicate canonical IDs (rare; happens only if
+        // two distinct originals collapse to the same canonical form).
+        self.canonical_to_original
+            .insert(canonical_id.to_string(), original_id.to_string());
+    }
+
+    /// Returns the original ID for a sanitized canonical ID, if any.
+    pub(super) fn original_for(&self, canonical_id: &str) -> Option<&str> {
+        self.canonical_to_original
+            .get(canonical_id)
+            .map(String::as_str)
+    }
+
+    /// Returns true if no rewrites were recorded — callers can skip the
+    /// response walk entirely.
+    pub(super) fn is_empty(&self) -> bool {
+        self.canonical_to_original.is_empty()
+    }
+}
 
 // Thinking block signature handling for Anthropic
 //
@@ -103,9 +152,18 @@ fn remove_empty_messages(request: &mut CanonicalRequest) {
 }
 
 /// Sanitize tool_use.id and tool_use_id fields to match Anthropic's pattern requirement.
-/// Anthropic requires tool IDs to match: ^[a-zA-Z0-9_-]+
-/// Non-Anthropic providers may generate IDs with invalid characters.
-pub(super) fn sanitize_tool_use_ids(request: &mut CanonicalRequest) {
+///
+/// Anthropic requires tool IDs to match: `^[a-zA-Z0-9_-]+`. Non-Anthropic
+/// providers may generate IDs with invalid characters (e.g. OpenAI's
+/// `functions.Bash:0`). This function rewrites them in-place and records each
+/// (canonical, original) pair into `id_map` so the response path can restore
+/// the original IDs before returning to the client (audit Bug #2 — silent
+/// tool-result lookup failures when the client tries to match response IDs
+/// against IDs it sent).
+pub(super) fn sanitize_tool_use_ids(
+    request: &mut CanonicalRequest,
+    id_map: &mut OriginalToolIdMap,
+) {
     let mut sanitized_count = 0;
 
     for message in &mut request.messages {
@@ -116,6 +174,7 @@ pub(super) fn sanitize_tool_use_ids(request: &mut CanonicalRequest) {
                         let sanitized = sanitize_tool_id(id);
                         if sanitized != *id {
                             tracing::debug!("🔧 Sanitized tool_use.id: {} → {}", id, sanitized);
+                            id_map.record(&sanitized, id);
                             *block = ContentBlock::tool_use(sanitized, name.clone(), input.clone());
                             sanitized_count += 1;
                         }
@@ -133,6 +192,7 @@ pub(super) fn sanitize_tool_use_ids(request: &mut CanonicalRequest) {
                                 tool_use_id,
                                 sanitized
                             );
+                            id_map.record(&sanitized, tool_use_id);
                             *block = ContentBlock::Known(KnownContentBlock::ToolResult {
                                 tool_use_id: sanitized,
                                 content: content.clone(),
@@ -153,6 +213,60 @@ pub(super) fn sanitize_tool_use_ids(request: &mut CanonicalRequest) {
     }
 }
 
+/// Walks a provider response and replaces any sanitized (canonical) tool IDs
+/// with the originals captured in `id_map`.
+///
+/// Applies to both `tool_use` blocks (assistant turn) and `tool_result`
+/// blocks (rare in responses, but defended for completeness). No-op when the
+/// map is empty.
+pub(super) fn restore_original_tool_ids(
+    response: &mut crate::providers::ProviderResponse,
+    id_map: &OriginalToolIdMap,
+) {
+    if id_map.is_empty() {
+        return;
+    }
+
+    let mut restored_count = 0usize;
+    for block in response.content.iter_mut() {
+        match block {
+            ContentBlock::Known(KnownContentBlock::ToolUse { id, name, input }) => {
+                if let Some(original) = id_map.original_for(id) {
+                    tracing::debug!("🔁 Restored tool_use.id: {} → {}", id, original);
+                    *block =
+                        ContentBlock::tool_use(original.to_string(), name.clone(), input.clone());
+                    restored_count += 1;
+                }
+            }
+            ContentBlock::Known(KnownContentBlock::ToolResult {
+                tool_use_id,
+                content,
+                is_error,
+                cache_control,
+            }) => {
+                if let Some(original) = id_map.original_for(tool_use_id) {
+                    tracing::debug!("🔁 Restored tool_use_id: {} → {}", tool_use_id, original);
+                    *block = ContentBlock::Known(KnownContentBlock::ToolResult {
+                        tool_use_id: original.to_string(),
+                        content: content.clone(),
+                        is_error: *is_error,
+                        cache_control: cache_control.clone(),
+                    });
+                    restored_count += 1;
+                }
+            }
+            _ => {}
+        }
+    }
+
+    if restored_count > 0 {
+        tracing::info!(
+            "🔁 Restored {} original tool ID(s) on response",
+            restored_count
+        );
+    }
+}
+
 /// Sanitize a tool ID to match pattern ^[a-zA-Z0-9_-]+
 fn sanitize_tool_id(id: &str) -> String {
     id.chars()
@@ -165,3 +279,202 @@ fn sanitize_tool_id(id: &str) -> String {
         })
         .collect()
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::models::{Message, ToolResultContent};
+    use crate::providers::{ProviderResponse, Usage};
+
+    fn user_message_with_blocks(blocks: Vec<ContentBlock>) -> Message {
+        Message {
+            role: "user".to_string(),
+            content: MessageContent::Blocks(blocks),
+        }
+    }
+
+    fn assistant_message_with_blocks(blocks: Vec<ContentBlock>) -> Message {
+        Message {
+            role: "assistant".to_string(),
+            content: MessageContent::Blocks(blocks),
+        }
+    }
+
+    fn empty_request() -> CanonicalRequest {
+        CanonicalRequest {
+            model: "claude-3-7-sonnet".to_string(),
+            messages: Vec::new(),
+            max_tokens: 100,
+            thinking: None,
+            temperature: None,
+            top_p: None,
+            top_k: None,
+            stop_sequences: None,
+            stream: None,
+            metadata: None,
+            system: None,
+            tools: None,
+            tool_choice: None,
+            extensions: Default::default(),
+        }
+    }
+
+    fn provider_response_with(blocks: Vec<ContentBlock>) -> ProviderResponse {
+        ProviderResponse {
+            id: "msg_x".to_string(),
+            r#type: "message".to_string(),
+            role: "assistant".to_string(),
+            content: blocks,
+            model: "claude-3-7-sonnet".to_string(),
+            stop_reason: Some("end_turn".to_string()),
+            stop_sequence: None,
+            usage: Usage {
+                input_tokens: 1,
+                output_tokens: 1,
+                cache_creation_input_tokens: None,
+                cache_read_input_tokens: None,
+            },
+        }
+    }
+
+    #[test]
+    fn tool_use_id_round_trip_preserves_original() {
+        // Bug #2: a client previously routed via OpenAI sends back tool IDs
+        // like `functions.Bash:0` (non-Anthropic). Grob sanitizes them for
+        // the upstream call, then must restore the originals on the response
+        // so the client can map response IDs to its tool definitions.
+        let original_use_id = "functions.Bash:0";
+        let original_result_id = "functions.Read:1";
+
+        let mut req = empty_request();
+        req.messages = vec![
+            assistant_message_with_blocks(vec![ContentBlock::tool_use(
+                original_use_id.to_string(),
+                "Bash".to_string(),
+                serde_json::json!({"command": "ls"}),
+            )]),
+            user_message_with_blocks(vec![ContentBlock::Known(KnownContentBlock::ToolResult {
+                tool_use_id: original_result_id.to_string(),
+                content: ToolResultContent::Text("done".to_string()),
+                is_error: false,
+                cache_control: None,
+            })]),
+        ];
+
+        let mut id_map = OriginalToolIdMap::new();
+        sanitize_tool_use_ids(&mut req, &mut id_map);
+
+        // Sanity: the request now carries canonical IDs.
+        let sanitized_use_id = "functions_Bash_0";
+        let sanitized_result_id = "functions_Read_1";
+        assert_eq!(
+            id_map.original_for(sanitized_use_id),
+            Some(original_use_id),
+            "use id mapping missing"
+        );
+        assert_eq!(
+            id_map.original_for(sanitized_result_id),
+            Some(original_result_id),
+            "result id mapping missing"
+        );
+        assert!(!id_map.is_empty());
+
+        // The upstream response echoes the canonical ID (Anthropic enforces
+        // the pattern and would reject the original) — restore it.
+        let mut response = provider_response_with(vec![
+            ContentBlock::tool_use(
+                sanitized_use_id.to_string(),
+                "Bash".to_string(),
+                serde_json::json!({"command": "ls"}),
+            ),
+            ContentBlock::Known(KnownContentBlock::ToolResult {
+                tool_use_id: sanitized_result_id.to_string(),
+                content: ToolResultContent::Text("ok".to_string()),
+                is_error: false,
+                cache_control: None,
+            }),
+        ]);
+
+        restore_original_tool_ids(&mut response, &id_map);
+
+        // After restoration the response carries the originals the client
+        // expects to round-trip with.
+        match &response.content[0] {
+            ContentBlock::Known(KnownContentBlock::ToolUse { id, .. }) => {
+                assert_eq!(id, original_use_id);
+            }
+            other => panic!("expected ToolUse, got {:?}", other),
+        }
+        match &response.content[1] {
+            ContentBlock::Known(KnownContentBlock::ToolResult { tool_use_id, .. }) => {
+                assert_eq!(tool_use_id, original_result_id);
+            }
+            other => panic!("expected ToolResult, got {:?}", other),
+        }
+    }
+
+    #[test]
+    fn restore_is_noop_when_map_is_empty() {
+        // Common case: client sent IDs that already match Anthropic's
+        // pattern — nothing to record, nothing to restore.
+        let mut req = empty_request();
+        req.messages = vec![assistant_message_with_blocks(vec![ContentBlock::tool_use(
+            "toolu_abc".to_string(),
+            "weather".to_string(),
+            serde_json::json!({}),
+        )])];
+
+        let mut id_map = OriginalToolIdMap::new();
+        sanitize_tool_use_ids(&mut req, &mut id_map);
+        assert!(id_map.is_empty());
+
+        let mut response = provider_response_with(vec![ContentBlock::tool_use(
+            "toolu_xyz".to_string(),
+            "weather".to_string(),
+            serde_json::json!({}),
+        )]);
+        restore_original_tool_ids(&mut response, &id_map);
+
+        match &response.content[0] {
+            ContentBlock::Known(KnownContentBlock::ToolUse { id, .. }) => {
+                assert_eq!(id, "toolu_xyz", "untracked IDs must pass through unchanged");
+            }
+            other => panic!("expected ToolUse, got {:?}", other),
+        }
+    }
+
+    #[test]
+    fn restore_leaves_unmapped_ids_untouched() {
+        // The map records a single rewrite; a different ID in the response
+        // (e.g. a fresh Anthropic-generated one) must not be touched.
+        let mut id_map = OriginalToolIdMap::new();
+        id_map.record("functions_Bash_0", "functions.Bash:0");
+
+        let mut response = provider_response_with(vec![
+            ContentBlock::tool_use(
+                "functions_Bash_0".to_string(),
+                "Bash".to_string(),
+                serde_json::json!({}),
+            ),
+            ContentBlock::tool_use(
+                "toolu_fresh".to_string(),
+                "weather".to_string(),
+                serde_json::json!({}),
+            ),
+        ]);
+
+        restore_original_tool_ids(&mut response, &id_map);
+        match &response.content[0] {
+            ContentBlock::Known(KnownContentBlock::ToolUse { id, .. }) => {
+                assert_eq!(id, "functions.Bash:0");
+            }
+            other => panic!("expected ToolUse, got {:?}", other),
+        }
+        match &response.content[1] {
+            ContentBlock::Known(KnownContentBlock::ToolUse { id, .. }) => {
+                assert_eq!(id, "toolu_fresh");
+            }
+            other => panic!("expected ToolUse, got {:?}", other),
+        }
+    }
+}
diff --git a/src/providers/openai/transform.rs b/src/providers/openai/transform.rs
index 0670654b..a882c16c 100644
--- a/src/providers/openai/transform.rs
+++ b/src/providers/openai/transform.rs
@@ -2,6 +2,60 @@ use super::types::*;
 use crate::models::{CanonicalRequest, MessageContent};
 use crate::providers::error::ProviderError;
 use crate::providers::{ContentBlock, KnownContentBlock, ProviderResponse, Usage};
+use serde::Serialize;
+use thiserror::Error;
+
+/// Errors raised while translating a [`CanonicalRequest`] into OpenAI wire format.
+///
+/// These map to user-visible 4xx responses since the offending data is
+/// always client-supplied (e.g. malformed tool-use input). The provider layer
+/// converts them to [`ProviderError::SerializationError`] for the existing
+/// error pipeline.
+#[derive(Debug, Error)]
+pub(crate) enum TransformError {
+    /// Failed to serialize a `tool_use` block's `input` field as JSON.
+    ///
+    /// OpenAI requires tool arguments as a JSON-encoded string; if the canonical
+    /// `Value` (or wrapped `Serialize` payload) cannot round-trip through
+    /// `serde_json::to_string`, surface the error rather than sending an empty
+    /// string upstream — empty arguments either parse-error in OpenAI or cause
+    /// the model to invoke the tool with no input, both of which were
+    /// previously silent.
+    #[error("failed to serialize tool_use input for tool '{tool_name}': {source}")]
+    ToolInputSerialization {
+        /// Name of the tool whose input failed to serialize.
+        tool_name: String,
+        /// Underlying serde_json error.
+        #[source]
+        source: serde_json::Error,
+    },
+}
+
+impl From<TransformError> for ProviderError {
+    fn from(err: TransformError) -> Self {
+        match err {
+            TransformError::ToolInputSerialization { source, .. } => {
+                ProviderError::SerializationError(source)
+            }
+        }
+    }
+}
+
+/// Serializes a tool-use input value as a JSON string, attaching the tool name on failure.
+///
+/// # Errors
+///
+/// Returns [`TransformError::ToolInputSerialization`] if the value cannot be
+/// encoded as JSON.
+pub(crate) fn serialize_tool_input<T: Serialize>(
+    input: &T,
+    tool_name: &str,
+) -> Result<String, TransformError> {
+    serde_json::to_string(input).map_err(|source| TransformError::ToolInputSerialization {
+        tool_name: tool_name.to_string(),
+        source,
+    })
+}
 
 /// Transform Anthropic request format to OpenAI Chat Completions format.
 ///
@@ -10,9 +64,16 @@ use crate::providers::{ContentBlock, KnownContentBlock, ProviderResponse, Usage}
 /// - `tool_result` blocks → separate `tool` role messages
 /// - `image` blocks → `image_url` content parts with data URI encoding
 /// - `thinking` blocks → dropped (OpenAI doesn't support this)
+/// - System role: hoisted to the top-level `system` message exactly once,
+///   even if the canonical `messages` array also contains a system entry.
+///
+/// # Errors
+///
+/// Returns [`TransformError::ToolInputSerialization`] if a `tool_use`
+/// block's `input` cannot be serialized as JSON.
 pub(crate) fn transform_request(
     request: &CanonicalRequest,
-) -> Result<OpenAIRequest, ProviderError> {
+) -> Result<OpenAIRequest, TransformError> {
     let mut openai_messages = Vec::new();
 
     // Add system message if present
@@ -27,8 +88,20 @@ pub(crate) fn transform_request(
         });
     }
 
-    // Transform messages
+    // Transform messages.
+    //
+    // NOTE: A canonical `system` was already hoisted to the top-level OpenAI
+    // `system` message above. Drop any residual system-role entries from the
+    // messages array to prevent duplicate system messages in the OpenAI
+    // payload (audit Bug #3 — clients may send `[user, system, assistant]`
+    // and grob previously emitted two `role:"system"` messages).
     for msg in &request.messages {
+        if msg.role == "system" {
+            tracing::debug!(
+                "Dropping system-role message from canonical messages array (already hoisted to top-level system)"
+            );
+            continue;
+        }
         match &msg.content {
             MessageContent::Text(text) => {
                 openai_messages.push(OpenAIMessage {
@@ -40,11 +113,22 @@ pub(crate) fn transform_request(
                 });
             }
             MessageContent::Blocks(blocks) => {
-                transform_block_message(&msg.role, blocks, &mut openai_messages);
+                transform_block_message(&msg.role, blocks, &mut openai_messages)?;
             }
         }
     }
 
+    // Invariant: at most one system message (the hoisted one at index 0)
+    // remains in the OpenAI payload.
+    debug_assert!(
+        openai_messages
+            .iter()
+            .filter(|m| m.role == "system")
+            .count()
+            <= 1,
+        "system role leaked into OpenAI messages array more than once"
+    );
+
     // Transform tools if present
     let tools = transform_tools(request);
 
@@ -89,9 +173,9 @@ fn transform_block_message(
     role: &str,
     blocks: &[ContentBlock],
     openai_messages: &mut Vec<OpenAIMessage>,
-) {
+) -> Result<(), TransformError> {
     let tool_results = extract_tool_results(blocks);
-    let tool_calls = extract_tool_calls(blocks);
+    let tool_calls = extract_tool_calls(blocks)?;
     let content_parts = extract_content_parts(blocks);
 
     // Add separate tool result messages FIRST (OpenAI requires this ordering)
@@ -131,6 +215,7 @@ fn transform_block_message(
             tool_call_id: None,
         });
     }
+    Ok(())
 }
 
 /// Extract tool_result blocks as (tool_use_id, content) pairs.
@@ -163,24 +248,29 @@ fn extract_tool_results(blocks: &[ContentBlock]) -> Vec<(String, String)> {
 }
 
 /// Filters Anthropic `tool_use` content blocks and reshapes them as OpenAI `tool_calls` entries with JSON-stringified arguments.
-fn extract_tool_calls(blocks: &[ContentBlock]) -> Vec<OpenAIToolCall> {
-    blocks
-        .iter()
-        .filter_map(|block| {
-            if let ContentBlock::Known(KnownContentBlock::ToolUse { id, name, input }) = block {
-                Some(OpenAIToolCall {
-                    id: id.clone(),
-                    r#type: "function".to_string(),
-                    function: OpenAIFunctionCall {
-                        name: name.clone(),
-                        arguments: serde_json::to_string(input).unwrap_or_default(),
-                    },
-                })
-            } else {
-                None
-            }
-        })
-        .collect()
+///
+/// # Errors
+///
+/// Returns [`TransformError::ToolInputSerialization`] if any tool's `input`
+/// fails JSON serialization. Previously substituted an empty string, which
+/// caused either an OpenAI parse error or a tool invocation with no
+/// arguments — both silent.
+fn extract_tool_calls(blocks: &[ContentBlock]) -> Result<Vec<OpenAIToolCall>, TransformError> {
+    let mut calls = Vec::new();
+    for block in blocks {
+        if let ContentBlock::Known(KnownContentBlock::ToolUse { id, name, input }) = block {
+            let arguments = serialize_tool_input(input, name)?;
+            calls.push(OpenAIToolCall {
+                id: id.clone(),
+                r#type: "function".to_string(),
+                function: OpenAIFunctionCall {
+                    name: name.clone(),
+                    arguments,
+                },
+            });
+        }
+    }
+    Ok(calls)
 }
 
 /// Extract text and image content parts (excluding tool blocks and thinking).
@@ -454,3 +544,181 @@ pub(crate) fn transform_to_responses_request(
         stream: true,
     })
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::models::{
+        CanonicalRequest, ContentBlock, KnownContentBlock, Message, MessageContent, SystemPrompt,
+    };
+    use serde::{Serialize, Serializer};
+
+    fn base_request() -> CanonicalRequest {
+        CanonicalRequest {
+            model: "gpt-4o".to_string(),
+            messages: Vec::new(),
+            max_tokens: 100,
+            thinking: None,
+            temperature: None,
+            top_p: None,
+            top_k: None,
+            stop_sequences: None,
+            stream: None,
+            metadata: None,
+            system: None,
+            tools: None,
+            tool_choice: None,
+            extensions: Default::default(),
+        }
+    }
+
+    #[test]
+    fn transform_strips_system_from_messages_after_hoisting() {
+        // Bug #3: client sends `[user, system, assistant]` to grob, which
+        // already hoists `request.system`; the original system role MUST be
+        // dropped from the messages array, otherwise OpenAI receives two
+        // `role:"system"` messages.
+        let mut req = base_request();
+        req.system = Some(SystemPrompt::Text("hoisted system".to_string()));
+        req.messages = vec![
+            Message {
+                role: "user".to_string(),
+                content: MessageContent::Text("hi".to_string()),
+            },
+            Message {
+                role: "system".to_string(),
+                content: MessageContent::Text("inline system that must be dropped".to_string()),
+            },
+            Message {
+                role: "assistant".to_string(),
+                content: MessageContent::Text("hello".to_string()),
+            },
+        ];
+
+        let openai = transform_request(&req).expect("transform");
+
+        let system_count = openai
+            .messages
+            .iter()
+            .filter(|m| m.role == "system")
+            .count();
+        assert_eq!(
+            system_count,
+            1,
+            "expected exactly one system message after hoisting; got {} (messages: {:?})",
+            system_count,
+            openai.messages.iter().map(|m| &m.role).collect::<Vec<_>>()
+        );
+
+        // The remaining system message must be the hoisted one.
+        match &openai.messages[0].content {
+            Some(OpenAIContent::String(s)) => assert_eq!(s, "hoisted system"),
+            other => panic!("expected hoisted system text, got {:?}", other),
+        }
+
+        // Order of remaining roles preserved.
+        let roles: Vec<&str> = openai.messages.iter().map(|m| m.role.as_str()).collect();
+        assert_eq!(roles, vec!["system", "user", "assistant"]);
+    }
+
+    #[test]
+    fn transform_strips_system_when_no_hoisted_system_field() {
+        // If there's no `request.system` set, but a stray `role:"system"`
+        // message slipped into `messages`, we still drop it — the canonical
+        // wire format reserves `role:"system"` for the dedicated field.
+        let mut req = base_request();
+        req.messages = vec![
+            Message {
+                role: "system".to_string(),
+                content: MessageContent::Text("inline".to_string()),
+            },
+            Message {
+                role: "user".to_string(),
+                content: MessageContent::Text("hi".to_string()),
+            },
+        ];
+
+        let openai = transform_request(&req).expect("transform");
+        let system_count = openai
+            .messages
+            .iter()
+            .filter(|m| m.role == "system")
+            .count();
+        assert_eq!(system_count, 0);
+        assert_eq!(openai.messages.len(), 1);
+        assert_eq!(openai.messages[0].role, "user");
+    }
+
+    /// A `Serialize` payload that always errors. Mirrors `serde_json`'s
+    /// internal failure surface so we can exercise the error path even when
+    /// `serde_json::Value` itself is effectively infallible to encode.
+    struct AlwaysFail;
+
+    impl Serialize for AlwaysFail {
+        fn serialize<S: Serializer>(&self, _serializer: S) -> Result<S::Ok, S::Error> {
+            Err(serde::ser::Error::custom("synthetic serialization failure"))
+        }
+    }
+
+    #[test]
+    fn transform_returns_error_when_tool_input_unserializable() {
+        // Direct exercise of the helper used by extract_tool_calls. A
+        // `Serialize` payload that always fails proves error propagation
+        // surfaces the tool name and underlying serde_json error.
+        let result = serialize_tool_input(&AlwaysFail, "broken_tool");
+        let err = result.expect_err("expected serialization failure");
+        match err {
+            TransformError::ToolInputSerialization { tool_name, source } => {
+                assert_eq!(tool_name, "broken_tool");
+                assert!(
+                    source
+                        .to_string()
+                        .contains("synthetic serialization failure"),
+                    "source error did not bubble through: {}",
+                    source
+                );
+            }
+        }
+    }
+
+    #[test]
+    fn transform_propagates_tool_input_error_to_provider_error() {
+        // `From<TransformError> for ProviderError` keeps the legacy callers
+        // (which return `ProviderError`) compatible while preserving the
+        // structured error category.
+        let err = TransformError::ToolInputSerialization {
+            tool_name: "broken_tool".to_string(),
+            source: serde_json::from_str::<serde_json::Value>("{").unwrap_err(),
+        };
+        let provider_err: ProviderError = err.into();
+        assert!(matches!(provider_err, ProviderError::SerializationError(_)));
+    }
+
+    #[test]
+    fn transform_succeeds_when_tool_input_well_formed() {
+        // Sanity check: typical tool_use blocks still translate cleanly.
+        let mut req = base_request();
+        req.messages = vec![Message {
+            role: "assistant".to_string(),
+            content: MessageContent::Blocks(vec![ContentBlock::Known(
+                KnownContentBlock::ToolUse {
+                    id: "call_1".to_string(),
+                    name: "weather".to_string(),
+                    input: serde_json::json!({"city": "Paris"}),
+                },
+            )]),
+        }];
+
+        let openai = transform_request(&req).expect("transform");
+        let assistant = openai
+            .messages
+            .iter()
+            .find(|m| m.role == "assistant")
+            .expect("assistant message");
+        let tool_calls = assistant.tool_calls.as_ref().expect("tool_calls present");
+        assert_eq!(tool_calls.len(), 1);
+        assert_eq!(tool_calls[0].id, "call_1");
+        assert_eq!(tool_calls[0].function.name, "weather");
+        assert_eq!(tool_calls[0].function.arguments, r#"{"city":"Paris"}"#);
+    }
+}