From 62cc0c73fe5f065df6ad42750f562d5537c6d540 Mon Sep 17 00:00:00 2001 From: Christopher Petito Date: Mon, 23 Feb 2026 23:53:09 +0100 Subject: [PATCH] context truncation/compaction improvements - Removes truncateOldToolContent and MaxToolCallTokens from session to avoid busting cache unnecessarily and potentially confusing models - Preserves assistant text as a separate message item before function_call items in convertMessagesToResponseInput (responses API) - Lowers default context limit before compaction to 80% of model's context length, anything after 50% usually sees pregressively bigger drops in output quality Signed-off-by: Christopher Petito --- pkg/model/provider/openai/client.go | 21 +- .../provider/openai/convert_messages_test.go | 157 +++++++++++++ pkg/runtime/runtime.go | 2 +- pkg/session/session.go | 43 ---- pkg/session/truncate_test.go | 214 ++++++++---------- 5 files changed, 274 insertions(+), 163 deletions(-) create mode 100644 pkg/model/provider/openai/convert_messages_test.go diff --git a/pkg/model/provider/openai/client.go b/pkg/model/provider/openai/client.go index f13e7a2a9..60be4a38e 100644 --- a/pkg/model/provider/openai/client.go +++ b/pkg/model/provider/openai/client.go @@ -480,20 +480,31 @@ func convertMessagesToResponseInput(messages []chat.Message) []responses.Respons }, } } else { - // Assistant message with tool calls - convert to response input item with function calls + // Preserve assistant text content as a separate message so the + // model retains conversational context across tool-call rounds. + if strings.TrimSpace(msg.Content) != "" { + input = append(input, responses.ResponseInputItemUnionParam{ + OfMessage: &responses.EasyInputMessageParam{ + Role: responses.EasyInputMessageRoleAssistant, + Content: responses.EasyInputMessageContentUnionParam{ + OfString: param.NewOpt(msg.Content), + }, + }, + }) + } + for _, toolCall := range msg.ToolCalls { if toolCall.Type == "function" { - funcCallItem := responses.ResponseInputItemUnionParam{ + input = append(input, responses.ResponseInputItemUnionParam{ OfFunctionCall: &responses.ResponseFunctionToolCallParam{ CallID: toolCall.ID, Name: toolCall.Function.Name, Arguments: toolCall.Function.Arguments, }, - } - input = append(input, funcCallItem) + }) } } - continue // Don't add the assistant message itself + continue } case chat.MessageRoleSystem: diff --git a/pkg/model/provider/openai/convert_messages_test.go b/pkg/model/provider/openai/convert_messages_test.go new file mode 100644 index 000000000..7608030f9 --- /dev/null +++ b/pkg/model/provider/openai/convert_messages_test.go @@ -0,0 +1,157 @@ +package openai + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/docker/cagent/pkg/chat" + "github.com/docker/cagent/pkg/tools" +) + +func TestConvertMessagesToResponseInput_AssistantTextWithToolCalls(t *testing.T) { + // When an assistant message has both text content AND tool calls, + // the text content must be preserved as a separate assistant message + // item before the function call items. Dropping it causes the model + // to lose conversational context and potentially re-start its approach. + messages := []chat.Message{ + {Role: chat.MessageRoleUser, Content: "Do something"}, + { + Role: chat.MessageRoleAssistant, + Content: "Let me check that for you.", + ToolCalls: []tools.ToolCall{ + { + ID: "call_1", + Type: "function", + Function: tools.FunctionCall{ + Name: "read_file", + Arguments: `{"path":"foo.go"}`, + }, + }, + }, + }, + {Role: chat.MessageRoleTool, ToolCallID: "call_1", Content: "file contents here"}, + } + + input := convertMessagesToResponseInput(messages) + + require.Len(t, input, 4, "should have user + assistant text + function_call + function_call_output") + + // Item 0: user message + assert.NotNil(t, input[0].OfMessage) + assert.Equal(t, "Do something", input[0].OfMessage.Content.OfString.Value) + + // Item 1: assistant text content (preserved, not dropped) + require.NotNil(t, input[1].OfMessage, "assistant text should be emitted as a separate message") + assert.Equal(t, "Let me check that for you.", input[1].OfMessage.Content.OfString.Value) + + // Item 2: function call + require.NotNil(t, input[2].OfFunctionCall) + assert.Equal(t, "call_1", input[2].OfFunctionCall.CallID) + assert.Equal(t, "read_file", input[2].OfFunctionCall.Name) + + // Item 3: function call output + require.NotNil(t, input[3].OfFunctionCallOutput) + assert.Equal(t, "call_1", input[3].OfFunctionCallOutput.CallID) +} + +func TestConvertMessagesToResponseInput_AssistantToolCallsOnly(t *testing.T) { + // When assistant has tool calls but no text, no extra message should be emitted. + messages := []chat.Message{ + {Role: chat.MessageRoleUser, Content: "Do something"}, + { + Role: chat.MessageRoleAssistant, + ToolCalls: []tools.ToolCall{ + { + ID: "call_1", + Type: "function", + Function: tools.FunctionCall{ + Name: "read_file", + Arguments: `{"path":"foo.go"}`, + }, + }, + }, + }, + {Role: chat.MessageRoleTool, ToolCallID: "call_1", Content: "file contents"}, + } + + input := convertMessagesToResponseInput(messages) + + require.Len(t, input, 3, "should have user + function_call + function_call_output (no extra assistant message)") + + assert.NotNil(t, input[0].OfMessage) + assert.NotNil(t, input[1].OfFunctionCall) + assert.NotNil(t, input[2].OfFunctionCallOutput) +} + +func TestConvertMessagesToResponseInput_MultipleToolCalls(t *testing.T) { + // Verify that multiple tool calls from a single assistant message + // all get emitted, and text content is preserved. + messages := []chat.Message{ + {Role: chat.MessageRoleUser, Content: "Check these files"}, + { + Role: chat.MessageRoleAssistant, + Content: "I'll read both files.", + ToolCalls: []tools.ToolCall{ + {ID: "call_1", Type: "function", Function: tools.FunctionCall{Name: "read_file", Arguments: `{"path":"a.go"}`}}, + {ID: "call_2", Type: "function", Function: tools.FunctionCall{Name: "read_file", Arguments: `{"path":"b.go"}`}}, + }, + }, + {Role: chat.MessageRoleTool, ToolCallID: "call_1", Content: "contents of a"}, + {Role: chat.MessageRoleTool, ToolCallID: "call_2", Content: "contents of b"}, + } + + input := convertMessagesToResponseInput(messages) + + // user + assistant text + 2 function_calls + 2 function_call_outputs = 6 + require.Len(t, input, 6) + + assert.NotNil(t, input[0].OfMessage) // user + assert.NotNil(t, input[1].OfMessage) // assistant text + assert.Equal(t, "I'll read both files.", input[1].OfMessage.Content.OfString.Value) // assistant text preserved + assert.NotNil(t, input[2].OfFunctionCall) // call_1 + assert.NotNil(t, input[3].OfFunctionCall) // call_2 + assert.NotNil(t, input[4].OfFunctionCallOutput) // result 1 + assert.NotNil(t, input[5].OfFunctionCallOutput) // result 2 +} + +func TestConvertMessagesToResponseInput_WhitespaceOnlyAssistantText(t *testing.T) { + // Whitespace-only text content should NOT produce an extra assistant message. + messages := []chat.Message{ + {Role: chat.MessageRoleUser, Content: "Do something"}, + { + Role: chat.MessageRoleAssistant, + Content: " \n\t ", + ToolCalls: []tools.ToolCall{ + {ID: "call_1", Type: "function", Function: tools.FunctionCall{Name: "test", Arguments: "{}"}}, + }, + }, + {Role: chat.MessageRoleTool, ToolCallID: "call_1", Content: "done"}, + } + + input := convertMessagesToResponseInput(messages) + + require.Len(t, input, 3, "whitespace-only content should not produce extra message") + assert.NotNil(t, input[0].OfMessage) + assert.NotNil(t, input[1].OfFunctionCall) + assert.NotNil(t, input[2].OfFunctionCallOutput) +} + +func TestConvertMessagesToResponseInput_BasicFlow(t *testing.T) { + // Verify basic conversation flow converts correctly. + messages := []chat.Message{ + {Role: chat.MessageRoleSystem, Content: "You are helpful"}, + {Role: chat.MessageRoleUser, Content: "Hello"}, + {Role: chat.MessageRoleAssistant, Content: "Hi there!"}, + {Role: chat.MessageRoleUser, Content: "Bye"}, + } + + input := convertMessagesToResponseInput(messages) + + require.Len(t, input, 4) + assert.NotNil(t, input[0].OfInputMessage) // system uses OfInputMessage + assert.NotNil(t, input[1].OfMessage) // user + assert.NotNil(t, input[2].OfMessage) // assistant (no tool calls) + assert.NotNil(t, input[3].OfMessage) // user +} diff --git a/pkg/runtime/runtime.go b/pkg/runtime/runtime.go index d4a337ede..b30d91180 100644 --- a/pkg/runtime/runtime.go +++ b/pkg/runtime/runtime.go @@ -1022,7 +1022,7 @@ func (r *LocalRuntime) RunStream(ctx context.Context, sess *session.Session) <-c if m != nil && r.sessionCompaction { contextLength := sess.InputTokens + sess.OutputTokens - if contextLength > int64(float64(contextLimit)*0.9) { + if contextLength > int64(float64(contextLimit)*0.8) { r.Summarize(ctx, sess, "", events) } } diff --git a/pkg/session/session.go b/pkg/session/session.go index 0dc00ac38..8e8f7d598 100644 --- a/pkg/session/session.go +++ b/pkg/session/session.go @@ -14,16 +14,6 @@ import ( "github.com/docker/cagent/pkg/tools" ) -const ( - // MaxToolCallTokens is the maximum number of tokens to keep from tool call - // arguments and results. Older tool calls beyond this budget will have their - // content replaced with a placeholder. Tokens are approximated as len/4. - MaxToolCallTokens = 40000 - - // toolContentPlaceholder is the text used to replace truncated tool content - toolContentPlaceholder = "[content truncated]" -) - // Item represents either a message or a sub-session type Item struct { // Message holds a regular conversation message @@ -677,8 +667,6 @@ func (s *Session) GetMessages(a *agent.Agent) []chat.Message { messages = trimMessages(messages, maxItems) } - messages = truncateOldToolContent(messages, MaxToolCallTokens) - systemCount := 0 conversationCount := 0 for i := range messages { @@ -757,34 +745,3 @@ func trimMessages(messages []chat.Message, maxItems int) []chat.Message { return result } - -// truncateOldToolContent replaces tool results with placeholders for older -// messages that exceed the token budget. It processes messages from newest to -// oldest, keeping recent tool content intact while truncating older content -// once the budget is exhausted. -func truncateOldToolContent(messages []chat.Message, maxTokens int) []chat.Message { - if len(messages) == 0 || maxTokens <= 0 { - return messages - } - - result := make([]chat.Message, len(messages)) - copy(result, messages) - - tokenBudget := maxTokens - - for i := len(result) - 1; i >= 0; i-- { - msg := &result[i] - - if msg.Role == chat.MessageRoleTool { - tokens := len(msg.Content) / 4 - if tokenBudget >= tokens { - tokenBudget -= tokens - } else { - msg.Content = toolContentPlaceholder - tokenBudget = 0 - } - } - } - - return result -} diff --git a/pkg/session/truncate_test.go b/pkg/session/truncate_test.go index 1f6a20e21..6b1734594 100644 --- a/pkg/session/truncate_test.go +++ b/pkg/session/truncate_test.go @@ -7,135 +7,121 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "github.com/docker/cagent/pkg/agent" "github.com/docker/cagent/pkg/chat" "github.com/docker/cagent/pkg/tools" ) -func TestTruncateOldToolContent(t *testing.T) { - t.Run("keeps recent tool content within budget", func(t *testing.T) { - messages := []chat.Message{ - {Role: chat.MessageRoleUser, Content: "hello"}, - { - Role: chat.MessageRoleAssistant, - ToolCalls: []tools.ToolCall{ - {ID: "1", Function: tools.FunctionCall{Name: "test", Arguments: `{"key":"value"}`}}, - }, +func TestGetMessages_PreservesAllToolContent(t *testing.T) { + // Tool results and arguments are never modified by GetMessages. + // Session compaction handles context limits instead. + testAgent := agent.New("test-agent", "test instruction") + + largeArgs := `{"path":"big.go","content":"` + strings.Repeat("x", 50000) + `"}` + largeResult := strings.Repeat("y", 50000) + + s := New() + s.AddMessage(UserMessage("do something")) + s.AddMessage(&Message{ + AgentName: "test-agent", + Message: chat.Message{ + Role: chat.MessageRoleAssistant, + ToolCalls: []tools.ToolCall{ + {ID: "call_1", Type: "function", Function: tools.FunctionCall{ + Name: "write_file", Arguments: largeArgs, + }}, }, - {Role: chat.MessageRoleTool, ToolCallID: "1", Content: "result"}, - } - - result := truncateOldToolContent(messages, 1000) - - assert.JSONEq(t, `{"key":"value"}`, result[1].ToolCalls[0].Function.Arguments) - assert.Equal(t, "result", result[2].Content) + }, }) - - t.Run("truncates oldest tool content when budget exceeded", func(t *testing.T) { - oldArgs := strings.Repeat("a", 400) // 100 tokens - oldResult := strings.Repeat("b", 400) // 100 tokens - newArgs := strings.Repeat("c", 200) // 50 tokens - newResult := strings.Repeat("d", 200) // 50 tokens - - messages := []chat.Message{ - {Role: chat.MessageRoleUser, Content: "first"}, - { - Role: chat.MessageRoleAssistant, - ToolCalls: []tools.ToolCall{ - {ID: "old", Function: tools.FunctionCall{Name: "test", Arguments: oldArgs}}, - }, - }, - {Role: chat.MessageRoleTool, ToolCallID: "old", Content: oldResult}, - {Role: chat.MessageRoleUser, Content: "second"}, - { - Role: chat.MessageRoleAssistant, - ToolCalls: []tools.ToolCall{ - {ID: "new", Function: tools.FunctionCall{Name: "test", Arguments: newArgs}}, - }, + s.AddMessage(&Message{ + AgentName: "test-agent", + Message: chat.Message{ + Role: chat.MessageRoleTool, ToolCallID: "call_1", + Content: "file written successfully", + }, + }) + s.AddMessage(UserMessage("now read it")) + s.AddMessage(&Message{ + AgentName: "test-agent", + Message: chat.Message{ + Role: chat.MessageRoleAssistant, + ToolCalls: []tools.ToolCall{ + {ID: "call_2", Type: "function", Function: tools.FunctionCall{ + Name: "read_file", Arguments: `{"path":"big.go"}`, + }}, }, - {Role: chat.MessageRoleTool, ToolCallID: "new", Content: newResult}, - } - - // Budget of 60 tokens: new result (50 tokens) fits, old result gets truncated - result := truncateOldToolContent(messages, 60) - - // New result should be preserved, old result should be truncated - assert.Equal(t, newResult, result[5].Content) - assert.Equal(t, toolContentPlaceholder, result[2].Content) + }, }) - - t.Run("does not modify non-tool messages", func(t *testing.T) { - messages := []chat.Message{ - {Role: chat.MessageRoleUser, Content: strings.Repeat("x", 1000)}, - {Role: chat.MessageRoleAssistant, Content: strings.Repeat("y", 1000)}, - {Role: chat.MessageRoleSystem, Content: strings.Repeat("z", 1000)}, - } - - result := truncateOldToolContent(messages, 10) - - assert.Equal(t, messages[0].Content, result[0].Content) - assert.Equal(t, messages[1].Content, result[1].Content) - assert.Equal(t, messages[2].Content, result[2].Content) + s.AddMessage(&Message{ + AgentName: "test-agent", + Message: chat.Message{ + Role: chat.MessageRoleTool, ToolCallID: "call_2", + Content: largeResult, + }, }) - t.Run("returns original messages when maxTokens is zero", func(t *testing.T) { - messages := []chat.Message{ - { - Role: chat.MessageRoleAssistant, - ToolCalls: []tools.ToolCall{ - {ID: "1", Function: tools.FunctionCall{Name: "test", Arguments: "args"}}, - }, - }, - {Role: chat.MessageRoleTool, ToolCallID: "1", Content: "result"}, - } - - result := truncateOldToolContent(messages, 0) - - assert.Equal(t, messages, result) - }) + messages := s.GetMessages(testAgent) - t.Run("returns original messages when maxTokens is negative", func(t *testing.T) { - messages := []chat.Message{ - { - Role: chat.MessageRoleAssistant, - ToolCalls: []tools.ToolCall{ - {ID: "1", Function: tools.FunctionCall{Name: "test", Arguments: "args"}}, - }, - }, - {Role: chat.MessageRoleTool, ToolCallID: "1", Content: "result"}, + // Verify tool results and arguments survive GetMessages verbatim. + var foundCall1Args, foundCall2Args string + var foundResult1, foundResult2 string + for _, msg := range messages { + if msg.Role == chat.MessageRoleTool && msg.ToolCallID == "call_1" { + foundResult1 = msg.Content } - - result := truncateOldToolContent(messages, -10) - - assert.Equal(t, messages, result) - }) - - t.Run("does not modify original slice", func(t *testing.T) { - originalContent := strings.Repeat("y", 400) - messages := []chat.Message{ - { - Role: chat.MessageRoleAssistant, - ToolCalls: []tools.ToolCall{ - {ID: "1", Function: tools.FunctionCall{Name: "test", Arguments: `{"key":"value"}`}}, - }, - }, - {Role: chat.MessageRoleTool, ToolCallID: "1", Content: originalContent}, + if msg.Role == chat.MessageRoleTool && msg.ToolCallID == "call_2" { + foundResult2 = msg.Content } + for _, tc := range msg.ToolCalls { + switch tc.ID { + case "call_1": + foundCall1Args = tc.Function.Arguments + case "call_2": + foundCall2Args = tc.Function.Arguments + } + } + } + assert.Equal(t, "file written successfully", foundResult1) + assert.Equal(t, largeResult, foundResult2) + assert.Equal(t, largeArgs, foundCall1Args) + assert.JSONEq(t, `{"path":"big.go"}`, foundCall2Args) +} - result := truncateOldToolContent(messages, 10) - - // Result should have truncated tool content - assert.Equal(t, toolContentPlaceholder, result[1].Content) - - // Original should be unchanged - assert.Equal(t, originalContent, messages[1].Content) +func TestGetMessages_PrefixStability(t *testing.T) { + // Calling GetMessages multiple times must produce identical output. + // This is critical for prompt caching: if the prefix changes between + // calls, every subsequent API request is a cache miss. + testAgent := agent.New("test-agent", "test instruction") + + s := New() + s.AddMessage(UserMessage("hello")) + s.AddMessage(&Message{ + AgentName: "test-agent", + Message: chat.Message{ + Role: chat.MessageRoleAssistant, + ToolCalls: []tools.ToolCall{ + {ID: "call_1", Type: "function", Function: tools.FunctionCall{ + Name: "read_file", Arguments: `{"path":"foo.go"}`, + }}, + }, + }, + }) + s.AddMessage(&Message{ + AgentName: "test-agent", + Message: chat.Message{ + Role: chat.MessageRoleTool, ToolCallID: "call_1", + Content: "contents of foo.go", + }, }) - t.Run("handles empty messages slice", func(t *testing.T) { - result := truncateOldToolContent(nil, 1000) - assert.Nil(t, result) + msgs1 := s.GetMessages(testAgent) + msgs2 := s.GetMessages(testAgent) - result = truncateOldToolContent([]chat.Message{}, 1000) - require.NotNil(t, result) - assert.Empty(t, result) - }) + require.Len(t, msgs2, len(msgs1)) + for i := range msgs1 { + assert.Equal(t, msgs1[i].Content, msgs2[i].Content, + "message %d content should be identical between calls", i) + assert.Equal(t, msgs1[i].Role, msgs2[i].Role, + "message %d role should be identical between calls", i) + } }