Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion pkg/evaluation/eval.go
Original file line number Diff line number Diff line change
Expand Up @@ -575,7 +575,12 @@ func buildTranscript(events []map[string]any) string {
fmt.Fprintf(&transcript, "[Agent %s calls tool %q with arguments: %s]\n\n", cmp.Or(currentAgent, "unknown"), name, args)

case "tool_call_response":
name, _ := getToolCallInfo(event)
// The ToolCallResponseEvent has tool_definition at the top level, not
// nested under "tool_call".
var name string
if td, ok := event["tool_definition"].(map[string]any); ok {
name, _ = td["name"].(string)
}
response, _ := event["response"].(string)
if len(response) > 500 {
response = response[:500] + "...(truncated)"
Expand Down
22 changes: 10 additions & 12 deletions pkg/evaluation/eval_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -796,12 +796,11 @@ func TestBuildTranscript(t *testing.T) {
},
},
{
"type": "tool_call_response",
"response": "file contents here",
"tool_call": map[string]any{
"function": map[string]any{
"name": "read_file",
},
"type": "tool_call_response",
"response": "file contents here",
"tool_call_id": "call_123",
"tool_definition": map[string]any{
"name": "read_file",
},
},
},
Expand All @@ -814,12 +813,11 @@ func TestBuildTranscript(t *testing.T) {
name: "long tool response truncated",
events: []map[string]any{
{
"type": "tool_call_response",
"response": strings.Repeat("x", 600),
"tool_call": map[string]any{
"function": map[string]any{
"name": "shell",
},
"type": "tool_call_response",
"response": strings.Repeat("x", 600),
"tool_call_id": "call_789",
"tool_definition": map[string]any{
"name": "shell",
},
},
},
Expand Down
25 changes: 12 additions & 13 deletions pkg/evaluation/save.go
Original file line number Diff line number Diff line change
Expand Up @@ -192,21 +192,20 @@ func SessionFromEvents(events []map[string]any, title string, questions []string
// Flush any pending assistant message before adding tool response
flushAssistantMessage()

// Add tool response message
if tc, ok := event["tool_call"].(map[string]any); ok {
toolCallID, _ := tc["id"].(string)
response, _ := event["response"].(string)
// The ToolCallResponseEvent serializes tool_call_id as a top-level string field,
// not nested under a "tool_call" map.
toolCallID, _ := event["tool_call_id"].(string)
response, _ := event["response"].(string)

msg := &session.Message{
Message: chat.Message{
Role: chat.MessageRoleTool,
Content: response,
ToolCallID: toolCallID,
CreatedAt: eventTimestamp,
},
}
sess.AddMessage(msg)
msg := &session.Message{
Message: chat.Message{
Role: chat.MessageRoleTool,
Content: response,
ToolCallID: toolCallID,
CreatedAt: eventTimestamp,
},
}
sess.AddMessage(msg)

case "token_usage":
// Update session token usage
Expand Down
16 changes: 6 additions & 10 deletions pkg/evaluation/save_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -302,11 +302,9 @@ func TestSessionFromEvents(t *testing.T) {
},
},
{
"type": "tool_call_response",
"tool_call": map[string]any{
"id": "call_123",
},
"response": "file content",
"type": "tool_call_response",
"tool_call_id": "call_123",
"response": "file content",
},
{"type": "agent_choice", "content": "Done!"},
{"type": "stream_stopped"},
Expand Down Expand Up @@ -452,11 +450,9 @@ func TestSessionFromEventsWithToolDefinitions(t *testing.T) {
},
},
{
"type": "tool_call_response",
"tool_call": map[string]any{
"id": "call_123",
},
"response": "file content",
"type": "tool_call_response",
"tool_call_id": "call_123",
"response": "file content",
},
{"type": "stream_stopped"},
}
Expand Down
Loading