From f8201480b77da978be979b3c0045b8f24dc96245 Mon Sep 17 00:00:00 2001 From: jasonxue Date: Wed, 28 Jan 2026 15:27:21 +0800 Subject: [PATCH 1/2] Fix tool message content format for DeepSeek API compatibility DeepSeek API expects tool message content to be a string, not an array. Changed from .blocks() to .text() for tool messages. Also ensures structured tool outputs (JSON) are correctly serialized to JSON strings instead of using debug descriptions. --- .../Models/OpenAILanguageModel.swift | 46 ++++++++++++++++--- 1 file changed, 39 insertions(+), 7 deletions(-) diff --git a/Sources/AnyLanguageModel/Models/OpenAILanguageModel.swift b/Sources/AnyLanguageModel/Models/OpenAILanguageModel.swift index bd3d8c1..f028f97 100644 --- a/Sources/AnyLanguageModel/Models/OpenAILanguageModel.swift +++ b/Sources/AnyLanguageModel/Models/OpenAILanguageModel.swift @@ -514,9 +514,19 @@ public struct OpenAILanguageModel: LanguageModel { for invocation in invocations { let output = invocation.output entries.append(.toolOutput(output)) - let toolSegments: [Transcript.Segment] = output.segments - let blocks = convertSegmentsToOpenAIBlocks(toolSegments) - messages.append(OpenAIMessage(role: .tool(id: invocation.call.id), content: .blocks(blocks))) + // Convert tool output segments to plain text for DeepSeek API compatibility + let toolContent = output.segments.map { segment -> String in + switch segment { + case .text(let textSegment): return textSegment.content + case .structure(let structuredSegment): + switch structuredSegment.content.kind { + case .string(let text): return text + default: return structuredSegment.content.jsonString + } + case .image: return "" + } + }.joined() + messages.append(OpenAIMessage(role: .tool(id: invocation.call.id), content: .text(toolContent))) } continue } @@ -579,9 +589,19 @@ public struct OpenAILanguageModel: LanguageModel { for invocation in invocations { let output = invocation.output entries.append(.toolOutput(output)) - let toolSegments: [Transcript.Segment] = output.segments - let blocks = convertSegmentsToOpenAIBlocks(toolSegments) - messages.append(OpenAIMessage(role: .tool(id: invocation.call.id), content: .blocks(blocks))) + // Convert tool output segments to plain text for DeepSeek API compatibility + let toolContent = output.segments.map { segment -> String in + switch segment { + case .text(let textSegment): return textSegment.content + case .structure(let structuredSegment): + switch structuredSegment.content.kind { + case .string(let text): return text + default: return structuredSegment.content.jsonString + } + case .image: return "" + } + }.joined() + messages.append(OpenAIMessage(role: .tool(id: invocation.call.id), content: .text(toolContent))) } continue } @@ -1165,10 +1185,22 @@ extension Transcript { ) ) case .toolOutput(let toolOutput): + // Convert tool output segments to plain text for DeepSeek API compatibility + let toolContent = toolOutput.segments.map { segment -> String in + switch segment { + case .text(let textSegment): return textSegment.content + case .structure(let structuredSegment): + switch structuredSegment.content.kind { + case .string(let text): return text + default: return structuredSegment.content.jsonString + } + case .image: return "" + } + }.joined() messages.append( .init( role: .tool(id: toolOutput.id), - content: .blocks(convertSegmentsToOpenAIBlocks(toolOutput.segments)) + content: .text(toolContent) ) ) } From 15dcdbc117ab22a8d8b115ea4c0d03de2f85a94f Mon Sep 17 00:00:00 2001 From: Mattt Zmuda Date: Wed, 4 Feb 2026 01:57:21 -0800 Subject: [PATCH 2/2] Extract shared code into convertSegmentsToToolContentString helper Drop image segments rather than emitting --- .../Models/OpenAILanguageModel.swift | 72 +++++++++---------- 1 file changed, 33 insertions(+), 39 deletions(-) diff --git a/Sources/AnyLanguageModel/Models/OpenAILanguageModel.swift b/Sources/AnyLanguageModel/Models/OpenAILanguageModel.swift index f028f97..6299ce0 100644 --- a/Sources/AnyLanguageModel/Models/OpenAILanguageModel.swift +++ b/Sources/AnyLanguageModel/Models/OpenAILanguageModel.swift @@ -514,19 +514,12 @@ public struct OpenAILanguageModel: LanguageModel { for invocation in invocations { let output = invocation.output entries.append(.toolOutput(output)) - // Convert tool output segments to plain text for DeepSeek API compatibility - let toolContent = output.segments.map { segment -> String in - switch segment { - case .text(let textSegment): return textSegment.content - case .structure(let structuredSegment): - switch structuredSegment.content.kind { - case .string(let text): return text - default: return structuredSegment.content.jsonString - } - case .image: return "" - } - }.joined() - messages.append(OpenAIMessage(role: .tool(id: invocation.call.id), content: .text(toolContent))) + messages.append( + OpenAIMessage( + role: .tool(id: invocation.call.id), + content: .text(convertSegmentsToToolContentString(output.segments)) + ) + ) } continue } @@ -589,19 +582,12 @@ public struct OpenAILanguageModel: LanguageModel { for invocation in invocations { let output = invocation.output entries.append(.toolOutput(output)) - // Convert tool output segments to plain text for DeepSeek API compatibility - let toolContent = output.segments.map { segment -> String in - switch segment { - case .text(let textSegment): return textSegment.content - case .structure(let structuredSegment): - switch structuredSegment.content.kind { - case .string(let text): return text - default: return structuredSegment.content.jsonString - } - case .image: return "" - } - }.joined() - messages.append(OpenAIMessage(role: .tool(id: invocation.call.id), content: .text(toolContent))) + messages.append( + OpenAIMessage( + role: .tool(id: invocation.call.id), + content: .text(convertSegmentsToToolContentString(output.segments)) + ) + ) } continue } @@ -1185,22 +1171,10 @@ extension Transcript { ) ) case .toolOutput(let toolOutput): - // Convert tool output segments to plain text for DeepSeek API compatibility - let toolContent = toolOutput.segments.map { segment -> String in - switch segment { - case .text(let textSegment): return textSegment.content - case .structure(let structuredSegment): - switch structuredSegment.content.kind { - case .string(let text): return text - default: return structuredSegment.content.jsonString - } - case .image: return "" - } - }.joined() messages.append( .init( role: .tool(id: toolOutput.id), - content: .text(toolContent) + content: .text(convertSegmentsToToolContentString(toolOutput.segments)) ) ) } @@ -1340,6 +1314,26 @@ private func convertSegmentsToOpenAIBlocks(_ segments: [Transcript.Segment]) -> return blocks } +/// Converts a list of transcript segments to a string representation suitable for tool message content. +private func convertSegmentsToToolContentString(_ segments: [Transcript.Segment]) -> String { + // Tool message content must be string (or text-only parts) per API spec. + // String is used for broad provider compatibility. + // Image segments are intentionally omitted because tool outputs only support text. + segments.compactMap { segment in + switch segment { + case .text(let textSegment): + return textSegment.content + case .structure(let structuredSegment): + switch structuredSegment.content.kind { + case .string(let text): return text + default: return structuredSegment.content.jsonString + } + case .image: + return nil + } + }.joined(separator: "\n") +} + private struct OpenAITool: Hashable, Codable, Sendable { let type: String let function: OpenAIFunction