Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 16 additions & 5 deletions pkg/model/provider/openai/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -480,20 +480,31 @@ func convertMessagesToResponseInput(messages []chat.Message) []responses.Respons
},
}
} else {
// Assistant message with tool calls - convert to response input item with function calls
// Preserve assistant text content as a separate message so the
// model retains conversational context across tool-call rounds.
if strings.TrimSpace(msg.Content) != "" {
input = append(input, responses.ResponseInputItemUnionParam{
OfMessage: &responses.EasyInputMessageParam{
Role: responses.EasyInputMessageRoleAssistant,
Content: responses.EasyInputMessageContentUnionParam{
OfString: param.NewOpt(msg.Content),
},
},
})
}

for _, toolCall := range msg.ToolCalls {
if toolCall.Type == "function" {
funcCallItem := responses.ResponseInputItemUnionParam{
input = append(input, responses.ResponseInputItemUnionParam{
OfFunctionCall: &responses.ResponseFunctionToolCallParam{
CallID: toolCall.ID,
Name: toolCall.Function.Name,
Arguments: toolCall.Function.Arguments,
},
}
input = append(input, funcCallItem)
})
}
}
continue // Don't add the assistant message itself
continue
}

case chat.MessageRoleSystem:
Expand Down
157 changes: 157 additions & 0 deletions pkg/model/provider/openai/convert_messages_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
package openai

import (
"testing"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

"github.com/docker/cagent/pkg/chat"
"github.com/docker/cagent/pkg/tools"
)

func TestConvertMessagesToResponseInput_AssistantTextWithToolCalls(t *testing.T) {
// When an assistant message has both text content AND tool calls,
// the text content must be preserved as a separate assistant message
// item before the function call items. Dropping it causes the model
// to lose conversational context and potentially re-start its approach.
messages := []chat.Message{
{Role: chat.MessageRoleUser, Content: "Do something"},
{
Role: chat.MessageRoleAssistant,
Content: "Let me check that for you.",
ToolCalls: []tools.ToolCall{
{
ID: "call_1",
Type: "function",
Function: tools.FunctionCall{
Name: "read_file",
Arguments: `{"path":"foo.go"}`,
},
},
},
},
{Role: chat.MessageRoleTool, ToolCallID: "call_1", Content: "file contents here"},
}

input := convertMessagesToResponseInput(messages)

require.Len(t, input, 4, "should have user + assistant text + function_call + function_call_output")

// Item 0: user message
assert.NotNil(t, input[0].OfMessage)
assert.Equal(t, "Do something", input[0].OfMessage.Content.OfString.Value)

// Item 1: assistant text content (preserved, not dropped)
require.NotNil(t, input[1].OfMessage, "assistant text should be emitted as a separate message")
assert.Equal(t, "Let me check that for you.", input[1].OfMessage.Content.OfString.Value)

// Item 2: function call
require.NotNil(t, input[2].OfFunctionCall)
assert.Equal(t, "call_1", input[2].OfFunctionCall.CallID)
assert.Equal(t, "read_file", input[2].OfFunctionCall.Name)

// Item 3: function call output
require.NotNil(t, input[3].OfFunctionCallOutput)
assert.Equal(t, "call_1", input[3].OfFunctionCallOutput.CallID)
}

func TestConvertMessagesToResponseInput_AssistantToolCallsOnly(t *testing.T) {
// When assistant has tool calls but no text, no extra message should be emitted.
messages := []chat.Message{
{Role: chat.MessageRoleUser, Content: "Do something"},
{
Role: chat.MessageRoleAssistant,
ToolCalls: []tools.ToolCall{
{
ID: "call_1",
Type: "function",
Function: tools.FunctionCall{
Name: "read_file",
Arguments: `{"path":"foo.go"}`,
},
},
},
},
{Role: chat.MessageRoleTool, ToolCallID: "call_1", Content: "file contents"},
}

input := convertMessagesToResponseInput(messages)

require.Len(t, input, 3, "should have user + function_call + function_call_output (no extra assistant message)")

assert.NotNil(t, input[0].OfMessage)
assert.NotNil(t, input[1].OfFunctionCall)
assert.NotNil(t, input[2].OfFunctionCallOutput)
}

func TestConvertMessagesToResponseInput_MultipleToolCalls(t *testing.T) {
// Verify that multiple tool calls from a single assistant message
// all get emitted, and text content is preserved.
messages := []chat.Message{
{Role: chat.MessageRoleUser, Content: "Check these files"},
{
Role: chat.MessageRoleAssistant,
Content: "I'll read both files.",
ToolCalls: []tools.ToolCall{
{ID: "call_1", Type: "function", Function: tools.FunctionCall{Name: "read_file", Arguments: `{"path":"a.go"}`}},
{ID: "call_2", Type: "function", Function: tools.FunctionCall{Name: "read_file", Arguments: `{"path":"b.go"}`}},
},
},
{Role: chat.MessageRoleTool, ToolCallID: "call_1", Content: "contents of a"},
{Role: chat.MessageRoleTool, ToolCallID: "call_2", Content: "contents of b"},
}

input := convertMessagesToResponseInput(messages)

// user + assistant text + 2 function_calls + 2 function_call_outputs = 6
require.Len(t, input, 6)

assert.NotNil(t, input[0].OfMessage) // user
assert.NotNil(t, input[1].OfMessage) // assistant text
assert.Equal(t, "I'll read both files.", input[1].OfMessage.Content.OfString.Value) // assistant text preserved
assert.NotNil(t, input[2].OfFunctionCall) // call_1
assert.NotNil(t, input[3].OfFunctionCall) // call_2
assert.NotNil(t, input[4].OfFunctionCallOutput) // result 1
assert.NotNil(t, input[5].OfFunctionCallOutput) // result 2
}

func TestConvertMessagesToResponseInput_WhitespaceOnlyAssistantText(t *testing.T) {
// Whitespace-only text content should NOT produce an extra assistant message.
messages := []chat.Message{
{Role: chat.MessageRoleUser, Content: "Do something"},
{
Role: chat.MessageRoleAssistant,
Content: " \n\t ",
ToolCalls: []tools.ToolCall{
{ID: "call_1", Type: "function", Function: tools.FunctionCall{Name: "test", Arguments: "{}"}},
},
},
{Role: chat.MessageRoleTool, ToolCallID: "call_1", Content: "done"},
}

input := convertMessagesToResponseInput(messages)

require.Len(t, input, 3, "whitespace-only content should not produce extra message")
assert.NotNil(t, input[0].OfMessage)
assert.NotNil(t, input[1].OfFunctionCall)
assert.NotNil(t, input[2].OfFunctionCallOutput)
}

func TestConvertMessagesToResponseInput_BasicFlow(t *testing.T) {
// Verify basic conversation flow converts correctly.
messages := []chat.Message{
{Role: chat.MessageRoleSystem, Content: "You are helpful"},
{Role: chat.MessageRoleUser, Content: "Hello"},
{Role: chat.MessageRoleAssistant, Content: "Hi there!"},
{Role: chat.MessageRoleUser, Content: "Bye"},
}

input := convertMessagesToResponseInput(messages)

require.Len(t, input, 4)
assert.NotNil(t, input[0].OfInputMessage) // system uses OfInputMessage
assert.NotNil(t, input[1].OfMessage) // user
assert.NotNil(t, input[2].OfMessage) // assistant (no tool calls)
assert.NotNil(t, input[3].OfMessage) // user
}
2 changes: 1 addition & 1 deletion pkg/runtime/runtime.go
Original file line number Diff line number Diff line change
Expand Up @@ -1022,7 +1022,7 @@ func (r *LocalRuntime) RunStream(ctx context.Context, sess *session.Session) <-c

if m != nil && r.sessionCompaction {
contextLength := sess.InputTokens + sess.OutputTokens
if contextLength > int64(float64(contextLimit)*0.9) {
if contextLength > int64(float64(contextLimit)*0.8) {
r.Summarize(ctx, sess, "", events)
}
}
Expand Down
43 changes: 0 additions & 43 deletions pkg/session/session.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,6 @@ import (
"github.com/docker/cagent/pkg/tools"
)

const (
// MaxToolCallTokens is the maximum number of tokens to keep from tool call
// arguments and results. Older tool calls beyond this budget will have their
// content replaced with a placeholder. Tokens are approximated as len/4.
MaxToolCallTokens = 40000

// toolContentPlaceholder is the text used to replace truncated tool content
toolContentPlaceholder = "[content truncated]"
)

// Item represents either a message or a sub-session
type Item struct {
// Message holds a regular conversation message
Expand Down Expand Up @@ -677,8 +667,6 @@ func (s *Session) GetMessages(a *agent.Agent) []chat.Message {
messages = trimMessages(messages, maxItems)
}

messages = truncateOldToolContent(messages, MaxToolCallTokens)

systemCount := 0
conversationCount := 0
for i := range messages {
Expand Down Expand Up @@ -757,34 +745,3 @@ func trimMessages(messages []chat.Message, maxItems int) []chat.Message {

return result
}

// truncateOldToolContent replaces tool results with placeholders for older
// messages that exceed the token budget. It processes messages from newest to
// oldest, keeping recent tool content intact while truncating older content
// once the budget is exhausted.
func truncateOldToolContent(messages []chat.Message, maxTokens int) []chat.Message {
if len(messages) == 0 || maxTokens <= 0 {
return messages
}

result := make([]chat.Message, len(messages))
copy(result, messages)

tokenBudget := maxTokens

for i := len(result) - 1; i >= 0; i-- {
msg := &result[i]

if msg.Role == chat.MessageRoleTool {
tokens := len(msg.Content) / 4
if tokenBudget >= tokens {
tokenBudget -= tokens
} else {
msg.Content = toolContentPlaceholder
tokenBudget = 0
}
}
}

return result
}
Loading