diff --git a/.agents/editor/best-of-n/editor-implementor.ts b/.agents/editor/best-of-n/editor-implementor.ts
index f2f225bbdc..c954f5b52c 100644
--- a/.agents/editor/best-of-n/editor-implementor.ts
+++ b/.agents/editor/best-of-n/editor-implementor.ts
@@ -22,25 +22,24 @@ export const createBestOfNImplementor = (options: {
: 'openai/gpt-5.1',
displayName: 'Implementation Generator',
spawnerPrompt:
- 'Generates a complete implementation plan with all code changes',
+ 'Generates a complete implementation using propose_* tools that draft changes without applying them',
includeMessageHistory: true,
inheritParentSystemPrompt: true,
- toolNames: [],
+ toolNames: ['propose_write_file', 'propose_str_replace'],
spawnableAgents: [],
inputSchema: {},
- outputMode: 'last_message',
+ outputMode: 'structured_output',
instructionsPrompt: `You are an expert code editor with deep understanding of software engineering principles. You were spawned to generate an implementation for the user's request.
-Your task is to write out ALL the code changes needed to complete the user's request in a single comprehensive response.
+Your task is to write out ALL the code changes needed to complete the user's request.
-Important: You can not make any other tool calls besides editing files. You cannot read more files, write todos, spawn agents, or set output. Do not call any of these tools!
-
-Write out what changes you would make using the tool call format below. Use this exact format for each file change:
+IMPORTANT: Use propose_str_replace and propose_write_file tools to make your edits. These tools draft changes without actually applying them - they will be reviewed first. DO NOT use any other tools. Do not spawn any agents, read files, or set output.
+You can make multiple tool calls across multiple steps to complete the implementation. Only the file changes will be passed on, so you can say whatever you want to help you think. Do not write any final summary as that would be a waste of tokens because no one is reading it.
{
"cb_tool_name": "str_replace",
@@ -116,15 +115,64 @@ More style notes:
- Optional arguments are code smell and worse than required arguments.
- New components often should be added to a new file, not added to an existing file.
-Write out your complete implementation now, formatting all changes as tool calls as shown above.`,
-
- handleSteps: function* () {
- yield 'STEP'
+Write out your complete implementation now. Do not write any final summary.`,
+
+ handleSteps: function* ({ agentState: initialAgentState }) {
+ const initialMessageHistoryLength =
+ initialAgentState.messageHistory.length
+
+ const { agentState } = yield 'STEP_ALL'
+
+ const postMessages = agentState.messageHistory.slice(
+ initialMessageHistoryLength,
+ )
+
+ // Extract tool calls from assistant messages
+ const toolCalls: { toolName: string; input: any }[] = []
+ for (const message of postMessages) {
+ if (message.role !== 'assistant' || !Array.isArray(message.content))
+ continue
+ for (const part of message.content) {
+ if (part.type === 'tool-call') {
+ toolCalls.push({
+ toolName: part.toolName,
+ input: part.input ?? (part as any).args ?? {},
+ })
+ }
+ }
+ }
+
+ // Extract tool results (unified diffs) from tool messages
+ const toolResults: any[] = []
+ for (const message of postMessages) {
+ if (message.role !== 'tool' || !Array.isArray(message.content)) continue
+ for (const part of message.content) {
+ if (part.type === 'json' && part.value) {
+ toolResults.push(part.value)
+ }
+ }
+ }
+
+ // Concatenate all unified diffs for the selector to review
+ const unifiedDiffs = toolResults
+ .filter((result: any) => result.unifiedDiff)
+ .map((result: any) => `--- ${result.file} ---\n${result.unifiedDiff}`)
+ .join('\n\n')
+
+ yield {
+ toolName: 'set_output',
+ input: {
+ toolCalls,
+ toolResults,
+ unifiedDiffs,
+ },
+ includeToolCall: false,
+ }
},
}
}
const definition = {
- ...createBestOfNImplementor({ model: 'sonnet' }),
+ ...createBestOfNImplementor({ model: 'opus' }),
id: 'editor-implementor',
}
export default definition
diff --git a/.agents/editor/best-of-n/editor-multi-prompt.ts b/.agents/editor/best-of-n/editor-multi-prompt.ts
index 3beb910095..41634081a5 100644
--- a/.agents/editor/best-of-n/editor-multi-prompt.ts
+++ b/.agents/editor/best-of-n/editor-multi-prompt.ts
@@ -1,15 +1,12 @@
import { publisher } from '../../constants'
-import type {
- AgentStepContext,
- StepText,
- ToolCall,
-} from '../../types/agent-definition'
+import type { AgentStepContext, ToolCall } from '../../types/agent-definition'
import type { SecretAgentDefinition } from '../../types/secret-agent-definition'
/**
* Creates a multi-prompt editor agent that spawns one implementor per prompt.
* Each prompt specifies a slightly different implementation strategy/approach.
+ * Uses propose_* tools to draft changes, then applies the chosen implementation.
*/
export function createMultiPromptEditor(): Omit {
return {
@@ -17,7 +14,7 @@ export function createMultiPromptEditor(): Omit {
model: 'anthropic/claude-opus-4.5',
displayName: 'Multi-Prompt Editor',
spawnerPrompt:
- 'Edits code by spawning multiple implementor agents with different strategy prompts, selects the best implementation, and applies the changes. Pass an array of short prompts specifying different implementation approaches. Make sure to read any files intended to be edited before spawning this agent.',
+ 'Edits code by spawning multiple implementor agents with different strategy prompts, selects the best implementation, and applies the changes. It also returns further suggested improvements which you should take seriously and act on. Pass as input an array of short prompts specifying different implementation approaches or strategies. Make sure to read any files intended to be edited before spawning this agent.',
includeMessageHistory: true,
inheritParentSystemPrompt: true,
@@ -30,7 +27,7 @@ export function createMultiPromptEditor(): Omit {
'set_output',
],
spawnableAgents: [
- 'best-of-n-selector-opus',
+ 'best-of-n-selector2',
'editor-implementor-opus',
'editor-implementor-gpt-5',
],
@@ -58,7 +55,6 @@ export function createMultiPromptEditor(): Omit {
function* handleStepsMultiPrompt({
agentState,
params,
- logger,
}: AgentStepContext): ReturnType<
NonNullable
> {
@@ -111,31 +107,47 @@ function* handleStepsMultiPrompt({
includeToolCall: false,
} satisfies ToolCall<'spawn_agents'>
- // Extract spawn results
- const spawnedImplementations = extractSpawnResults(
- implementorResults,
- ) as any[]
+ // Extract spawn results - each is structured output with { toolCalls, toolResults, unifiedDiffs }
+ const spawnedImplementations = extractSpawnResults<{
+ toolCalls: { toolName: string; input: any }[]
+ toolResults: any[]
+ unifiedDiffs: string
+ }>(implementorResults)
- // Extract all the implementations from the results
+ // Build implementations for selector using the unified diffs
const letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
- const strategies = [...prompts, prompts[0]]
- const implementations = spawnedImplementations.map((result, index) => ({
- id: letters[index],
- strategy: strategies[index],
- content:
- 'errorMessage' in result
- ? `Error: ${result.errorMessage}`
- : extractLastMessageText(result) ?? '',
- }))
-
- // Spawn selector with implementations as params
- const { toolResult: selectorResult, agentState: selectorAgentState } = yield {
+ const implementations = spawnedImplementations.map((result, index) => {
+ if (!result || (typeof result === 'object' && 'errorMessage' in result)) {
+ return {
+ id: letters[index],
+ strategy: prompts[index] ?? 'unknown',
+ content: `Error: ${(result as any)?.errorMessage ?? 'Unknown error'}`,
+ toolCalls: [] as { toolName: string; input: any }[],
+ }
+ }
+
+ return {
+ id: letters[index],
+ strategy: prompts[index] ?? 'unknown',
+ content: result.unifiedDiffs || 'No changes proposed',
+ toolCalls: result.toolCalls ?? [],
+ }
+ })
+
+ // Spawn selector with implementations (showing unified diffs for review)
+ const { toolResult: selectorResult } = yield {
toolName: 'spawn_agents',
input: {
agents: [
{
- agent_type: 'best-of-n-selector-opus',
- params: { implementations },
+ agent_type: 'best-of-n-selector2',
+ params: {
+ implementations: implementations.map((impl) => ({
+ id: impl.id,
+ strategy: impl.strategy,
+ content: impl.content,
+ })),
+ },
},
],
},
@@ -143,45 +155,66 @@ function* handleStepsMultiPrompt({
} satisfies ToolCall<'spawn_agents'>
const selectorOutput = extractSpawnResults<{
- value: {
- implementationId: string
- reasoning: string
- }
+ implementationId: string
+ reason: string
+ suggestedImprovements: string
}>(selectorResult)[0]
- if ('errorMessage' in selectorOutput) {
+ if (!selectorOutput || !('implementationId' in selectorOutput)) {
yield {
toolName: 'set_output',
- input: { error: selectorOutput.errorMessage },
+ input: { error: 'Selector failed to return an implementation' },
} satisfies ToolCall<'set_output'>
return
}
- const { implementationId } = selectorOutput.value
+
+ const { implementationId } = selectorOutput
const chosenImplementation = implementations.find(
(implementation) => implementation.id === implementationId,
)
+
if (!chosenImplementation) {
yield {
toolName: 'set_output',
- input: { error: 'Failed to find chosen implementation.' },
+ input: {
+ error: `Failed to find chosen implementation: ${implementationId}`,
+ },
} satisfies ToolCall<'set_output'>
return
}
- const numMessagesBeforeStepText = selectorAgentState.messageHistory.length
+ // Apply the chosen implementation's tool calls as real edits
+ const appliedToolResults: any[] = []
+ for (const toolCall of chosenImplementation.toolCalls) {
+ // Convert propose_* tool calls to real edit tool calls
+ const realToolName =
+ toolCall.toolName === 'propose_str_replace'
+ ? 'str_replace'
+ : toolCall.toolName === 'propose_write_file'
+ ? 'write_file'
+ : toolCall.toolName
+
+ if (realToolName === 'str_replace' || realToolName === 'write_file') {
+ const { toolResult } = yield {
+ toolName: realToolName,
+ input: toolCall.input,
+ includeToolCall: true,
+ } satisfies ToolCall<'str_replace'> | ToolCall<'write_file'>
+
+ appliedToolResults.push(toolResult)
+ }
+ }
- const { agentState: postEditsAgentState } = yield {
- type: 'STEP_TEXT',
- text: chosenImplementation.content,
- } as StepText
- const { messageHistory } = postEditsAgentState
+ // Extract suggested improvements from selector output
+ const { suggestedImprovements } = selectorOutput
- // Set output with the messages from running the step text of the chosen implementation
+ // Set output with the applied results and suggested improvements
yield {
toolName: 'set_output',
input: {
chosenStrategy: chosenImplementation.strategy,
- messages: messageHistory.slice(numMessagesBeforeStepText),
+ toolResults: appliedToolResults,
+ suggestedImprovements,
},
includeToolCall: false,
} satisfies ToolCall<'set_output'>
@@ -199,31 +232,12 @@ function* handleStepsMultiPrompt({
? jsonResult.value
: [jsonResult.value]
- return spawnedResults.map((result: any) => result?.value).filter(Boolean)
- }
-
- /**
- * Extracts the text content from a 'lastMessage' AgentOutput.
- */
- function extractLastMessageText(agentOutput: any): string | null {
- if (!agentOutput) return null
-
- if (
- agentOutput.type === 'lastMessage' &&
- Array.isArray(agentOutput.value)
- ) {
- for (let i = agentOutput.value.length - 1; i >= 0; i--) {
- const message = agentOutput.value[i]
- if (message.role === 'assistant' && Array.isArray(message.content)) {
- for (const part of message.content) {
- if (part.type === 'text' && typeof part.text === 'string') {
- return part.text
- }
- }
- }
- }
- }
- return null
+ return spawnedResults
+ .map((result: any) => result?.value)
+ .map((result: any) =>
+ result && 'value' in result ? result.value : result,
+ )
+ .filter(Boolean)
}
}
diff --git a/.agents/editor/editor.ts b/.agents/editor/editor.ts
index 98a3c4639e..cf7011daca 100644
--- a/.agents/editor/editor.ts
+++ b/.agents/editor/editor.ts
@@ -103,44 +103,19 @@ More style notes:
Write out your complete implementation now, formatting all changes as tool calls as shown above.`,
- handleSteps: function* ({ agentState: initialAgentState }) {
+ handleSteps: function* ({ agentState: initialAgentState, logger }) {
const initialMessageHistoryLength =
initialAgentState.messageHistory.length
const { agentState } = yield 'STEP'
const { messageHistory } = agentState
const newMessages = messageHistory.slice(initialMessageHistoryLength)
- const assistantText = newMessages
- .filter((message) => message.role === 'assistant')
- .flatMap((message) => message.content)
- .filter((content) => content.type === 'text')
- .map((content) => content.text)
- .join('\n')
-
- // Extract tool calls from the assistant text
- const toolCallsText = extractToolCallsOnly(assistantText)
-
- const { agentState: postAssistantTextAgentState } = yield {
- type: 'STEP_TEXT',
- text: toolCallsText,
- } as StepText
-
- const postAssistantTextMessageHistory =
- postAssistantTextAgentState.messageHistory.slice(
- initialMessageHistoryLength,
- )
- const toolResults = postAssistantTextMessageHistory
- .filter((message) => message.role === 'tool')
- .flatMap((message) => message.content)
- .filter((content) => content.type === 'json')
- .map((content) => content.value)
yield {
toolName: 'set_output',
input: {
output: {
- message: toolCallsText,
- toolResults,
+ messages: newMessages,
},
},
includeToolCall: false,
diff --git a/cli/src/components/message-block.tsx b/cli/src/components/message-block.tsx
index 3ead6bac45..df51303a0e 100644
--- a/cli/src/components/message-block.tsx
+++ b/cli/src/components/message-block.tsx
@@ -647,11 +647,8 @@ const AgentBranchWrapper = memo(
selectedAgent.agentType,
siblingBlocks,
)
- const name = getImplementorDisplayName(
- selectedAgent.agentType,
- index,
- )
- statusText = `Selected ${name}`
+ // Just show "Selected Prompt #N" without repeating the prompt text
+ statusText = index !== undefined ? `Selected Strategy #${index + 1}` : 'Selected'
reason = lastBlock?.input?.reason
}
}
@@ -706,6 +703,8 @@ const AgentBranchWrapper = memo(
const displayName = getImplementorDisplayName(
agentBlock.agentType,
implementorIndex,
+ agentBlock.initialPrompt,
+ availableWidth,
)
const statusIndicator = isStreaming
? '●'
diff --git a/cli/src/components/tools/registry.ts b/cli/src/components/tools/registry.ts
index 1098896904..fd6c9548d3 100644
--- a/cli/src/components/tools/registry.ts
+++ b/cli/src/components/tools/registry.ts
@@ -37,6 +37,9 @@ const toolComponentRegistry = new Map([
[SuggestFollowupsComponent.toolName, SuggestFollowupsComponent],
[WriteFileComponent.toolName, WriteFileComponent],
[TaskCompleteComponent.toolName, TaskCompleteComponent],
+ // Propose tools reuse the same rendering as their base counterparts
+ ['propose_str_replace', StrReplaceComponent],
+ ['propose_write_file', WriteFileComponent],
])
/**
diff --git a/cli/src/utils/implementor-helpers.ts b/cli/src/utils/implementor-helpers.ts
index 70c5fddf6b..2d6fd324b4 100644
--- a/cli/src/utils/implementor-helpers.ts
+++ b/cli/src/utils/implementor-helpers.ts
@@ -22,11 +22,34 @@ export const isImplementorAgent = (agentType: string): boolean => {
/**
* Get the display name for an implementor agent
+ * When a prompt is provided, shows "Prompt #N: [prompt]" format
+ * Otherwise falls back to model-based naming like "Opus #1"
*/
export const getImplementorDisplayName = (
agentType: string,
index?: number,
+ prompt?: string,
+ availableWidth?: number,
): string => {
+ // If we have both an index and a prompt, show "Prompt #N: [prompt]"
+ if (index !== undefined && prompt?.trim()) {
+ // Strip "Strategy: " prefix if present (added by editor-multi-prompt)
+ const cleanPrompt = prompt.startsWith('Strategy: ')
+ ? prompt.slice('Strategy: '.length)
+ : prompt
+ // Calculate max prompt length based on terminal width
+ const prefixLength = `Strategy #${index + 1}: `.length + 2 // +2 for status indicator
+ const margin = 12
+ const maxLength = availableWidth
+ ? Math.max(20, availableWidth - prefixLength - margin)
+ : 40
+ const displayPrompt =
+ cleanPrompt.length > maxLength
+ ? cleanPrompt.slice(0, maxLength) + '...'
+ : cleanPrompt
+ return `Strategy #${index + 1}: ${displayPrompt}`
+ }
+
let baseName = 'Implementor'
// Check most specific patterns first (editor-implementor2-* with model suffix)
if (agentType.includes('editor-implementor2-gpt-5')) {
diff --git a/cli/src/utils/sdk-event-handlers.ts b/cli/src/utils/sdk-event-handlers.ts
index 59216d2539..437e0e97e1 100644
--- a/cli/src/utils/sdk-event-handlers.ts
+++ b/cli/src/utils/sdk-event-handlers.ts
@@ -22,7 +22,6 @@ import {
} from './spawn-agent-matcher'
import {
destinationFromChunkEvent,
- destinationFromTextEvent,
processTextChunk,
} from './stream-chunk-processor'
@@ -162,40 +161,6 @@ const updateStreamingAgents = (
})
}
-const handleTextEvent = (state: EventHandlerState, event: PrintModeText) => {
- if (!event.text) {
- return
- }
-
- ensureStreaming(state)
-
- const destination = destinationFromTextEvent(event)
- const text = event.text
-
- if (destination.type === 'agent') {
- const previous =
- state.streaming.streamRefs.state.agentStreamAccumulators.get(
- destination.agentId,
- ) ?? ''
- state.streaming.streamRefs.setters.setAgentAccumulator(
- destination.agentId,
- previous + text,
- )
- state.message.updater.updateAiMessageBlocks((blocks) =>
- processTextChunk(blocks, destination, text),
- )
- return
- }
-
- if (state.streaming.streamRefs.state.rootStreamSeen) {
- return
- }
-
- state.streaming.streamRefs.setters.appendRootStreamBuffer(text)
- state.streaming.streamRefs.setters.setRootStreamSeen(true)
- appendRootChunk(state, { type: destination.textType, text })
-}
-
const handleSubagentStart = (
state: EventHandlerState,
event: PrintModeSubagentStart,
@@ -483,16 +448,6 @@ export const createStreamChunkHandler =
return
}
- const previous =
- state.streaming.streamRefs.state.agentStreamAccumulators.get(
- destination.agentId,
- ) ?? ''
-
- state.streaming.streamRefs.setters.setAgentAccumulator(
- destination.agentId,
- previous + text,
- )
-
state.message.updater.updateAiMessageBlocks((blocks) =>
processTextChunk(blocks, destination, text),
)
diff --git a/common/src/tools/constants.ts b/common/src/tools/constants.ts
index bcf3138c06..123a4e0d8e 100644
--- a/common/src/tools/constants.ts
+++ b/common/src/tools/constants.ts
@@ -61,6 +61,8 @@ export const publishedTools = [
'glob',
'list_directory',
'lookup_agent_info',
+ 'propose_str_replace',
+ 'propose_write_file',
'read_docs',
'read_files',
'read_subtree',
diff --git a/packages/agent-runtime/src/__tests__/tool-stream-parser.test.ts b/packages/agent-runtime/src/__tests__/tool-stream-parser.test.ts
index ef73368e6e..6f0f480ef0 100644
--- a/packages/agent-runtime/src/__tests__/tool-stream-parser.test.ts
+++ b/packages/agent-runtime/src/__tests__/tool-stream-parser.test.ts
@@ -71,6 +71,7 @@ describe('processStreamWithTags', () => {
defaultProcessor,
onError,
onResponseChunk,
+ executeXmlToolCall: async () => {},
})) {
if (chunk.type === 'text') {
result.push(chunk.text)
@@ -137,6 +138,7 @@ describe('processStreamWithTags', () => {
defaultProcessor,
onError,
onResponseChunk,
+ executeXmlToolCall: async () => {},
})) {
if (chunk.type === 'text') {
result.push(chunk.text)
@@ -213,6 +215,7 @@ describe('processStreamWithTags', () => {
defaultProcessor,
onError,
onResponseChunk,
+ executeXmlToolCall: async () => {},
})) {
if (chunk.type === 'text') {
result.push(chunk.text)
@@ -293,6 +296,7 @@ describe('processStreamWithTags', () => {
defaultProcessor,
onError,
onResponseChunk,
+ executeXmlToolCall: async () => {},
})) {
// consume stream
}
@@ -361,6 +365,7 @@ describe('processStreamWithTags', () => {
defaultProcessor,
onError,
onResponseChunk,
+ executeXmlToolCall: async () => {},
})) {
if (chunk.type === 'text') {
result.push(chunk.text)
@@ -433,6 +438,7 @@ describe('processStreamWithTags', () => {
defaultProcessor,
onError,
onResponseChunk,
+ executeXmlToolCall: async () => {},
})) {
if (chunk.type === 'text') {
result.push(chunk.text)
@@ -486,6 +492,7 @@ describe('processStreamWithTags', () => {
defaultProcessor,
onError,
onResponseChunk,
+ executeXmlToolCall: async () => {},
})) {
if (chunk.type === 'text') {
result.push(chunk.text)
@@ -532,6 +539,7 @@ describe('processStreamWithTags', () => {
defaultProcessor,
onError,
onResponseChunk,
+ executeXmlToolCall: async () => {},
})) {
if (chunk.type === 'text') {
result.push(chunk.text)
diff --git a/packages/agent-runtime/src/__tests__/xml-tool-result-ordering.test.ts b/packages/agent-runtime/src/__tests__/xml-tool-result-ordering.test.ts
new file mode 100644
index 0000000000..978e8b9004
--- /dev/null
+++ b/packages/agent-runtime/src/__tests__/xml-tool-result-ordering.test.ts
@@ -0,0 +1,226 @@
+import { TEST_AGENT_RUNTIME_IMPL } from '@codebuff/common/testing/impl/agent-runtime'
+import { beforeEach, describe, expect, it } from 'bun:test'
+
+import { processStreamWithTools } from '../tool-stream-parser'
+
+import type { AgentRuntimeDeps } from '@codebuff/common/types/contracts/agent-runtime'
+import type { StreamChunk } from '@codebuff/common/types/contracts/llm'
+
+describe('XML tool result ordering', () => {
+ async function* createMockStream(chunks: StreamChunk[]) {
+ for (const chunk of chunks) {
+ yield chunk
+ }
+ return 'mock-message-id'
+ }
+
+ function textChunk(text: string): StreamChunk {
+ return { type: 'text' as const, text }
+ }
+
+ let agentRuntimeImpl: AgentRuntimeDeps
+
+ beforeEach(() => {
+ agentRuntimeImpl = { ...TEST_AGENT_RUNTIME_IMPL }
+ })
+
+ it('should call executeXmlToolCall synchronously and track execution order', async () => {
+ // This test verifies the execution order when XML tool calls are parsed
+ const executionOrder: string[] = []
+
+ // Stream with XML tool call embedded in text
+ const xmlToolCall = `
+{"cb_tool_name": "test_tool", "param1": "value1"}
+`
+
+ const streamChunks: StreamChunk[] = [
+ textChunk('Text before tool call\n'),
+ textChunk(xmlToolCall),
+ textChunk('\nText after tool call'),
+ ]
+
+ const stream = createMockStream(streamChunks)
+ const responseChunks: any[] = []
+
+ function onResponseChunk(chunk: any) {
+ responseChunks.push(chunk)
+ }
+
+ function defaultProcessor(toolName: string) {
+ return {
+ onTagStart: () => {},
+ onTagEnd: () => {},
+ }
+ }
+
+ for await (const chunk of processStreamWithTools({
+ ...agentRuntimeImpl,
+ stream,
+ processors: {},
+ defaultProcessor,
+ onError: () => {},
+ onResponseChunk,
+ executeXmlToolCall: async ({ toolName, input }) => {
+ executionOrder.push(`executeXmlToolCall:${toolName}`)
+ // Simulate some async work (like tool execution)
+ await new Promise((resolve) => setTimeout(resolve, 10))
+ executionOrder.push(`executeXmlToolCall:${toolName}:done`)
+ },
+ })) {
+ if (chunk.type === 'text') {
+ executionOrder.push(`text:${chunk.text.trim().slice(0, 20)}`)
+ } else if (chunk.type === 'tool-call') {
+ executionOrder.push(`tool-call:${chunk.toolName}`)
+ }
+ }
+
+ // The key assertion: executeXmlToolCall should complete BEFORE "Text after" is yielded
+ // because the stream should wait for the tool to finish
+ console.log('Execution order:', executionOrder)
+
+ const executeStartIndex = executionOrder.findIndex((e) =>
+ e.startsWith('executeXmlToolCall:test_tool'),
+ )
+ const executeDoneIndex = executionOrder.findIndex((e) =>
+ e.includes(':done'),
+ )
+ const textAfterIndex = executionOrder.findIndex((e) =>
+ e.includes('Text after'),
+ )
+
+ expect(executeStartIndex).toBeGreaterThan(-1)
+ expect(executeDoneIndex).toBeGreaterThan(-1)
+
+ // The tool execution should complete before "Text after" is processed
+ if (textAfterIndex > -1) {
+ expect(executeDoneIndex).toBeLessThan(textAfterIndex)
+ }
+ })
+
+ it('should track tool_call and tool_result events in correct order', async () => {
+ // This test simulates what happens in the full processStream flow
+ // where we capture both tool_call and tool_result events
+
+ const events: { type: string; toolName?: string; order: number }[] = []
+ let eventCounter = 0
+
+ const xmlToolCall = `
+{"cb_tool_name": "read_files", "paths": ["test.ts"]}
+`
+
+ const streamChunks: StreamChunk[] = [
+ textChunk('Before\n'),
+ textChunk(xmlToolCall),
+ textChunk('\nAfter'),
+ ]
+
+ const stream = createMockStream(streamChunks)
+
+ function defaultProcessor(toolName: string) {
+ return {
+ onTagStart: () => {},
+ onTagEnd: () => {},
+ }
+ }
+
+ // Simulate the xmlToolResponseHandler behavior
+ function onResponseChunk(chunk: any) {
+ if (chunk.type === 'text') {
+ events.push({ type: 'text', order: eventCounter++ })
+ }
+ }
+
+ for await (const chunk of processStreamWithTools({
+ ...agentRuntimeImpl,
+ stream,
+ processors: {},
+ defaultProcessor,
+ onError: () => {},
+ onResponseChunk,
+ executeXmlToolCall: async ({ toolName }) => {
+ // Simulate tool_call event
+ events.push({ type: 'tool_call', toolName, order: eventCounter++ })
+
+ // Simulate async tool execution
+ await new Promise((resolve) => setTimeout(resolve, 5))
+
+ // Simulate tool_result event
+ events.push({ type: 'tool_result', toolName, order: eventCounter++ })
+ },
+ })) {
+ // Consume stream
+ }
+
+ // Find the indices
+ const toolCallEvent = events.find((e) => e.type === 'tool_call')
+ const toolResultEvent = events.find((e) => e.type === 'tool_result')
+ const textAfterEvents = events.filter(
+ (e) => e.type === 'text' && e.order > (toolCallEvent?.order ?? 0),
+ )
+
+ expect(toolCallEvent).toBeDefined()
+ expect(toolResultEvent).toBeDefined()
+
+ // The tool_result should come immediately after tool_call,
+ // before any subsequent text events
+ if (toolResultEvent && textAfterEvents.length > 0) {
+ const firstTextAfter = textAfterEvents[0]
+ expect(toolResultEvent.order).toBeLessThan(firstTextAfter.order)
+ }
+ })
+
+ it('should not deadlock when executeXmlToolCall awaits tool execution', async () => {
+ // This test verifies that awaiting inside executeXmlToolCall doesn't cause a deadlock.
+ // The fix: pass Promise.resolve() instead of previousToolCallFinished for XML mode,
+ // so the tool can execute immediately without waiting for the stream to finish.
+
+ const xmlToolCall = `
+{"cb_tool_name": "test_tool", "param": "value"}
+`
+
+ const streamChunks: StreamChunk[] = [
+ textChunk('Before\n'),
+ textChunk(xmlToolCall),
+ textChunk('\nAfter'),
+ ]
+
+ const stream = createMockStream(streamChunks)
+ let toolExecuted = false
+
+ // This test should complete within a reasonable time.
+ // Before the fix, it would deadlock because:
+ // 1. executeXmlToolCall awaits toolPromise
+ // 2. toolPromise chains on previousToolCallFinished (streamDonePromise)
+ // 3. streamDonePromise only resolves when stream ends
+ // 4. Stream can't end because it's waiting for executeXmlToolCall
+ // => Deadlock!
+
+ const timeoutPromise = new Promise<'timeout'>((resolve) =>
+ setTimeout(() => resolve('timeout'), 1000),
+ )
+
+ const streamPromise = (async () => {
+ for await (const chunk of processStreamWithTools({
+ ...agentRuntimeImpl,
+ stream,
+ processors: {},
+ defaultProcessor: () => ({ onTagStart: () => {}, onTagEnd: () => {} }),
+ onError: () => {},
+ onResponseChunk: () => {},
+ executeXmlToolCall: async () => {
+ // Simulate tool execution with async work
+ await new Promise((resolve) => setTimeout(resolve, 50))
+ toolExecuted = true
+ },
+ })) {
+ // Consume stream
+ }
+ return 'completed'
+ })()
+
+ const result = await Promise.race([streamPromise, timeoutPromise])
+
+ expect(result).toBe('completed')
+ expect(toolExecuted).toBe(true)
+ })
+})
diff --git a/packages/agent-runtime/src/tool-stream-parser.ts b/packages/agent-runtime/src/tool-stream-parser.ts
index 2f096695dc..546babe462 100644
--- a/packages/agent-runtime/src/tool-stream-parser.ts
+++ b/packages/agent-runtime/src/tool-stream-parser.ts
@@ -1,5 +1,11 @@
import { AnalyticsEvent } from '@codebuff/common/constants/analytics-events'
+import {
+ createStreamParserState,
+ parseStreamChunk,
+} from './util/stream-xml-parser'
+
+import type { StreamParserState } from './util/stream-xml-parser'
import type { Model } from '@codebuff/common/old-constants'
import type { TrackEventFn } from '@codebuff/common/types/contracts/analytics'
import type { StreamChunk } from '@codebuff/common/types/contracts/llm'
@@ -31,6 +37,11 @@ export async function* processStreamWithTools(params: {
agentName?: string
}
trackEvent: TrackEventFn
+ executeXmlToolCall: (params: {
+ toolCallId: string
+ toolName: string
+ input: Record
+ }) => Promise
}): AsyncGenerator {
const {
stream,
@@ -41,11 +52,15 @@ export async function* processStreamWithTools(params: {
logger,
loggerOptions,
trackEvent,
+ executeXmlToolCall,
} = params
let streamCompleted = false
let buffer = ''
let autocompleted = false
+ // State for parsing XML tool calls from text stream
+ const xmlParserState: StreamParserState = createStreamParserState()
+
function processToolCallObject(params: {
toolName: string
input: any
@@ -83,9 +98,9 @@ export async function* processStreamWithTools(params: {
buffer = ''
}
- function* processChunk(
+ async function* processChunk(
chunk: StreamChunk | undefined,
- ): Generator {
+ ): AsyncGenerator {
if (chunk === undefined) {
flush()
streamCompleted = true
@@ -93,7 +108,38 @@ export async function* processStreamWithTools(params: {
}
if (chunk.type === 'text') {
- buffer += chunk.text
+ // Parse XML tool calls from the text stream
+ const { filteredText, toolCalls } = parseStreamChunk(
+ chunk.text,
+ xmlParserState,
+ )
+
+ if (filteredText) {
+ buffer += filteredText
+ yield {
+ type: 'text',
+ text: filteredText,
+ }
+ }
+
+ // Flush buffer before yielding tool calls so text event is sent first
+ if (toolCalls.length > 0) {
+ flush()
+ }
+
+ // Then process and yield any XML tool calls found
+ for (const toolCall of toolCalls) {
+ const toolCallId = `xml-${crypto.randomUUID().slice(0, 8)}`
+
+ // Execute the tool immediately if callback provided, pausing the stream
+ // The callback handles emitting tool_call and tool_result events
+ await executeXmlToolCall({
+ toolCallId,
+ toolName: toolCall.toolName,
+ input: toolCall.input,
+ })
+ }
+ return
} else {
flush()
}
diff --git a/packages/agent-runtime/src/tools/stream-parser.ts b/packages/agent-runtime/src/tools/stream-parser.ts
index 00cb52de9b..ee3b6f1dd0 100644
--- a/packages/agent-runtime/src/tools/stream-parser.ts
+++ b/packages/agent-runtime/src/tools/stream-parser.ts
@@ -64,7 +64,11 @@ export async function processStream(
> &
ParamsExcluding<
typeof processStreamWithTools,
- 'processors' | 'defaultProcessor' | 'onError' | 'loggerOptions'
+ | 'processors'
+ | 'defaultProcessor'
+ | 'onError'
+ | 'loggerOptions'
+ | 'executeXmlToolCall'
>,
) {
const {
@@ -78,10 +82,10 @@ export async function processStream(
runId,
signal,
userId,
- logger,
} = params
const fullResponseChunks: string[] = [fullResponse]
+ // === MUTABLE STATE ===
const toolResults: ToolMessage[] = []
const toolResultsToAddAfterStream: ToolMessage[] = []
const toolCalls: (CodebuffToolCall | CustomToolCall)[] = []
@@ -98,43 +102,41 @@ export async function processStream(
firstFileProcessed: false,
}
- function toolCallback(toolName: T) {
- return {
- onTagStart: () => {},
- onTagEnd: async (_: string, input: Record) => {
- if (signal.aborted) {
- return
+ // === RESPONSE HANDLER ===
+ // Creates a response handler that captures tool events into assistantMessages.
+ // When isXmlMode=true, also captures tool_result events for interleaved ordering.
+ function createResponseHandler(isXmlMode: boolean) {
+ return (chunk: string | PrintModeEvent) => {
+ if (typeof chunk !== 'string') {
+ if (chunk.type === 'tool_call') {
+ assistantMessages.push(
+ assistantMessage({ ...chunk, type: 'tool-call' }),
+ )
+ } else if (isXmlMode && chunk.type === 'tool_result') {
+ const toolResultMessage: ToolMessage = {
+ role: 'tool',
+ toolName: chunk.toolName,
+ toolCallId: chunk.toolCallId,
+ content: chunk.output,
+ }
+ assistantMessages.push(toolResultMessage)
}
- const toolCallId = generateCompactId()
- // delegated to reusable helper
- previousToolCallFinished = executeToolCall({
- ...params,
- toolName,
- input,
- fromHandleSteps: false,
-
- fileProcessingState,
- fullResponse: fullResponseChunks.join(''),
- previousToolCallFinished,
- toolCallId,
- toolCalls,
- toolResults,
- toolResultsToAddAfterStream,
-
- onCostCalculated,
- onResponseChunk: (chunk) => {
- if (typeof chunk !== 'string' && chunk.type === 'tool_call') {
- assistantMessages.push(
- assistantMessage({ ...chunk, type: 'tool-call' }),
- )
- }
- return onResponseChunk(chunk)
- },
- })
- },
+ }
+ return onResponseChunk(chunk)
}
}
- function customToolCallback(toolName: string) {
+
+ // === TOOL EXECUTION ===
+ // Unified callback factory for both native and custom tools.
+ // isXmlMode=true: execute immediately, capture results inline (for XML tool calls)
+ // isXmlMode=false: defer execution, results added at end (for native tool calls)
+ function createToolExecutionCallback(
+ toolName: string,
+ isXmlMode: boolean,
+ ) {
+ const responseHandler = createResponseHandler(isXmlMode)
+ const resultsArray = isXmlMode ? [] : toolResultsToAddAfterStream
+
return {
onTagStart: () => {},
onTagEnd: async (_: string, input: Record) => {
@@ -142,86 +144,88 @@ export async function processStream(
return
}
const toolCallId = generateCompactId()
+ const isNativeTool = toolNames.includes(toolName as ToolName)
- // Check if this is an agent tool call - if so, transform to spawn_agents
- const transformed = tryTransformAgentToolCall({
- toolName,
- input,
- spawnableAgents: agentTemplate.spawnableAgents,
- })
+ // Check if this is an agent tool call that should be transformed to spawn_agents
+ const transformed = !isNativeTool
+ ? tryTransformAgentToolCall({
+ toolName,
+ input,
+ spawnableAgents: agentTemplate.spawnableAgents,
+ })
+ : null
- if (transformed) {
- // Use executeToolCall for spawn_agents (a native tool)
- previousToolCallFinished = executeToolCall({
+ // Read previousToolCallFinished at execution time to ensure proper sequential chaining.
+ // For XML mode, if this is the first tool call (still pointing to streamDonePromise),
+ // start with a resolved promise so we don't wait for the stream to complete.
+ const previousPromise = isXmlMode && previousToolCallFinished === streamDonePromise
+ ? Promise.resolve()
+ : previousToolCallFinished
+
+ // Determine which executor to use and with what parameters
+ let toolPromise: Promise
+ if (isNativeTool || transformed) {
+ // Use executeToolCall for native tools or transformed agent calls
+ toolPromise = executeToolCall({
...params,
- toolName: transformed.toolName,
- input: transformed.input,
+ toolName: transformed ? transformed.toolName : (toolName as ToolName),
+ input: transformed ? transformed.input : input,
fromHandleSteps: false,
-
+ skipDirectResultPush: isXmlMode,
fileProcessingState,
fullResponse: fullResponseChunks.join(''),
- previousToolCallFinished,
+ previousToolCallFinished: previousPromise,
toolCallId,
toolCalls,
toolResults,
- toolResultsToAddAfterStream,
-
+ toolResultsToAddAfterStream: resultsArray,
onCostCalculated,
- onResponseChunk: (chunk) => {
- if (typeof chunk !== 'string' && chunk.type === 'tool_call') {
- assistantMessages.push(
- assistantMessage({ ...chunk, type: 'tool-call' }),
- )
- }
- return onResponseChunk(chunk)
- },
+ onResponseChunk: responseHandler,
})
} else {
- // delegated to reusable helper for custom tools
- previousToolCallFinished = executeCustomToolCall({
+ // Use executeCustomToolCall for custom/MCP tools
+ toolPromise = executeCustomToolCall({
...params,
toolName,
input,
-
+ skipDirectResultPush: isXmlMode,
fileProcessingState,
fullResponse: fullResponseChunks.join(''),
- previousToolCallFinished,
+ previousToolCallFinished: previousPromise,
toolCallId,
toolCalls,
toolResults,
- toolResultsToAddAfterStream,
-
- onResponseChunk: (chunk) => {
- if (typeof chunk !== 'string' && chunk.type === 'tool_call') {
- assistantMessages.push(
- assistantMessage({ ...chunk, type: 'tool-call' }),
- )
- }
- return onResponseChunk(chunk)
- },
+ toolResultsToAddAfterStream: resultsArray,
+ onResponseChunk: responseHandler,
})
}
+
+ previousToolCallFinished = toolPromise
+
+ // For XML mode, await execution so results appear inline before stream continues
+ if (isXmlMode) {
+ await toolPromise
+ }
},
}
}
+ // === STREAM PROCESSING ===
const streamWithTags = processStreamWithTools({
...params,
processors: Object.fromEntries([
- ...toolNames.map((toolName) => [toolName, toolCallback(toolName)]),
+ ...toolNames.map((name) => [name, createToolExecutionCallback(name, false)]),
...Object.keys(fileContext.customToolDefinitions ?? {}).map(
- (toolName) => [toolName, customToolCallback(toolName)],
+ (name) => [name, createToolExecutionCallback(name, false)],
),
]),
- defaultProcessor: customToolCallback,
+ defaultProcessor: (name: string) => createToolExecutionCallback(name, false),
onError: (toolName, error) => {
const toolResult: ToolMessage = {
role: 'tool',
toolName,
toolCallId: generateCompactId(),
- content: jsonToolResult({
- errorMessage: error,
- }),
+ content: jsonToolResult({ errorMessage: error }),
}
toolResults.push(cloneDeep(toolResult))
toolResultsToAddAfterStream.push(cloneDeep(toolResult))
@@ -246,11 +250,21 @@ export async function processStream(
}
return onResponseChunk(chunk)
},
+ // Execute XML-parsed tool calls immediately during streaming
+ executeXmlToolCall: async ({ toolName, input }) => {
+ if (signal.aborted) {
+ return
+ }
+ const callback = createToolExecutionCallback(toolName, true)
+ await callback.onTagEnd(toolName, input as Record)
+ },
})
+ // === STREAM CONSUMPTION LOOP ===
let messageId: string | null = null
let hadToolCallError = false
const errorMessages: Message[] = []
+
while (true) {
if (signal.aborted) {
break
@@ -273,7 +287,6 @@ export async function processStream(
fullResponseChunks.push(chunk.text)
} else if (chunk.type === 'error') {
onResponseChunk(chunk)
-
hadToolCallError = true
// Collect error messages to add AFTER all tool results
// This ensures proper message ordering for Anthropic's API which requires
@@ -286,13 +299,14 @@ export async function processStream(
),
)
} else if (chunk.type === 'tool-call') {
- // Do nothing, the onResponseChunk for tool is handled in the processor
+ // Tool call handling is done in the processor's onResponseChunk
} else {
chunk satisfies never
throw new Error(`Unhandled chunk type: ${(chunk as any).type}`)
}
}
+ // === FINALIZATION ===
agentState.messageHistory = buildArray([
...expireMessages(agentState.messageHistory, 'agentStep'),
...assistantMessages,
@@ -304,7 +318,7 @@ export async function processStream(
await previousToolCallFinished
}
- // Error messages must come AFTER tool results for proper API ordering)
+ // Error messages must come AFTER tool results for proper API ordering
agentState.messageHistory.push(...errorMessages)
return {
diff --git a/packages/agent-runtime/src/tools/tool-executor.ts b/packages/agent-runtime/src/tools/tool-executor.ts
index 05757d2c1b..99dd98c53f 100644
--- a/packages/agent-runtime/src/tools/tool-executor.ts
+++ b/packages/agent-runtime/src/tools/tool-executor.ts
@@ -118,6 +118,7 @@ export type ExecuteToolCallParams = {
toolCalls: (CodebuffToolCall | CustomToolCall)[]
toolResults: ToolMessage[]
toolResultsToAddAfterStream: ToolMessage[]
+ skipDirectResultPush?: boolean
userId: string | undefined
userInputId: string
@@ -252,7 +253,7 @@ export function executeToolCall(
toolResults.push(toolResult)
- if (!excludeToolFromMessageHistory) {
+ if (!excludeToolFromMessageHistory && !params.skipDirectResultPush) {
agentState.messageHistory.push(toolResult)
}
@@ -468,7 +469,7 @@ export async function executeCustomToolCall(
toolResults.push(toolResult)
- if (!excludeToolFromMessageHistory) {
+ if (!excludeToolFromMessageHistory && !params.skipDirectResultPush) {
agentState.messageHistory.push(toolResult)
}
return
diff --git a/packages/agent-runtime/src/util/__tests__/stream-xml-parser.test.ts b/packages/agent-runtime/src/util/__tests__/stream-xml-parser.test.ts
new file mode 100644
index 0000000000..825a3c96ed
--- /dev/null
+++ b/packages/agent-runtime/src/util/__tests__/stream-xml-parser.test.ts
@@ -0,0 +1,222 @@
+import { describe, expect, it } from 'bun:test'
+
+import {
+ createStreamParserState,
+ parseStreamChunk,
+} from '../stream-xml-parser'
+
+describe('stream-xml-parser', () => {
+ describe('parseStreamChunk', () => {
+ it('should pass through plain text without tool calls', () => {
+ const state = createStreamParserState()
+ const result = parseStreamChunk('Hello, world!', state)
+
+ expect(result.filteredText).toBe('Hello, world!')
+ expect(result.toolCalls).toEqual([])
+ })
+
+ it('should extract a complete tool call in a single chunk', () => {
+ const state = createStreamParserState()
+ const chunk = `
+{"cb_tool_name": "test_tool", "path": "foo.ts"}
+`
+
+ const result = parseStreamChunk(chunk, state)
+
+ expect(result.filteredText).toBe('')
+ expect(result.toolCalls).toHaveLength(1)
+ expect(result.toolCalls[0].toolName).toBe('test_tool')
+ expect(result.toolCalls[0].input).toEqual({ path: 'foo.ts' })
+ })
+
+ it('should extract tool call and preserve text before and after', () => {
+ const state = createStreamParserState()
+ const chunk = `Before text
+
+{"cb_tool_name": "test_tool"}
+
+After text`
+
+ const result = parseStreamChunk(chunk, state)
+
+ expect(result.filteredText).toBe('Before text\n\nAfter text')
+ expect(result.toolCalls).toHaveLength(1)
+ expect(result.toolCalls[0].toolName).toBe('test_tool')
+ })
+
+ it('should handle tool call split across multiple chunks', () => {
+ const state = createStreamParserState()
+
+ // First chunk: start tag and partial content
+ const result1 = parseStreamChunk('\n{"cb_tool', state)
+ expect(result1.filteredText).toBe('')
+ expect(result1.toolCalls).toEqual([])
+
+ // Second chunk: rest of content and end tag
+ const result2 = parseStreamChunk('_name": "test_tool"}\n', state)
+ expect(result2.filteredText).toBe('')
+ expect(result2.toolCalls).toHaveLength(1)
+ expect(result2.toolCalls[0].toolName).toBe('test_tool')
+ })
+
+ it('should handle partial start tag at chunk boundary', () => {
+ const state = createStreamParserState()
+
+ // First chunk ends with partial start tag
+ const result1 = parseStreamChunk('Some text\n{"cb_tool_name": "test"}\n', state)
+ expect(result2.filteredText).toBe('')
+ expect(result2.toolCalls).toHaveLength(1)
+ })
+
+ it('should handle multiple tool calls in sequence', () => {
+ const state = createStreamParserState()
+ const chunk = `
+{"cb_tool_name": "tool_a"}
+
+Middle text
+
+{"cb_tool_name": "tool_b"}
+`
+
+ const result = parseStreamChunk(chunk, state)
+
+ expect(result.filteredText).toBe('\nMiddle text\n')
+ expect(result.toolCalls).toHaveLength(2)
+ expect(result.toolCalls[0].toolName).toBe('tool_a')
+ expect(result.toolCalls[1].toolName).toBe('tool_b')
+ })
+
+ it('should handle empty chunks', () => {
+ const state = createStreamParserState()
+ const result = parseStreamChunk('', state)
+
+ expect(result.filteredText).toBe('')
+ expect(result.toolCalls).toEqual([])
+ })
+
+ it('should remove cb_easp from input', () => {
+ const state = createStreamParserState()
+ const chunk = `
+{"cb_tool_name": "test", "cb_easp": true, "path": "foo.ts"}
+`
+
+ const result = parseStreamChunk(chunk, state)
+
+ expect(result.toolCalls).toHaveLength(1)
+ expect(result.toolCalls[0].input).toEqual({ path: 'foo.ts' })
+ expect(result.toolCalls[0].input).not.toHaveProperty('cb_easp')
+ })
+
+ it('should handle tool call without newlines after/before tags', () => {
+ const state = createStreamParserState()
+ // No newline after start tag or before end tag
+ const chunk = `{"cb_tool_name": "test_tool"}`
+
+ const result = parseStreamChunk(chunk, state)
+
+ expect(result.filteredText).toBe('')
+ expect(result.toolCalls).toHaveLength(1)
+ expect(result.toolCalls[0].toolName).toBe('test_tool')
+ })
+
+ it('should handle tool call with CRLF line endings', () => {
+ const state = createStreamParserState()
+ const chunk = `\r\n{"cb_tool_name": "test_tool"}\r\n`
+
+ const result = parseStreamChunk(chunk, state)
+
+ expect(result.filteredText).toBe('')
+ expect(result.toolCalls).toHaveLength(1)
+ expect(result.toolCalls[0].toolName).toBe('test_tool')
+ })
+
+ it('should handle tool call with extra whitespace', () => {
+ const state = createStreamParserState()
+ const chunk = `
+ {"cb_tool_name": "test_tool"}
+ `
+
+ const result = parseStreamChunk(chunk, state)
+
+ expect(result.filteredText).toBe('')
+ expect(result.toolCalls).toHaveLength(1)
+ expect(result.toolCalls[0].toolName).toBe('test_tool')
+ })
+
+ it('should handle realistic streaming scenario with small chunks', () => {
+ const state = createStreamParserState()
+ const allChunks: string[] = []
+ const allToolCalls: any[] = []
+
+ // Simulate streaming in small chunks like a real LLM would
+ const fullText = `
+Thinking about the task...
+
+
+
+{"cb_tool_name": "propose_str_replace", "path": "test.ts"}
+`
+
+ // Stream in ~10 char chunks
+ for (let i = 0; i < fullText.length; i += 10) {
+ const chunk = fullText.slice(i, i + 10)
+ const result = parseStreamChunk(chunk, state)
+ allChunks.push(result.filteredText)
+ allToolCalls.push(...result.toolCalls)
+ }
+
+ const combinedText = allChunks.join('')
+ expect(combinedText).toBe('\nThinking about the task...\n\n\n')
+ expect(allToolCalls).toHaveLength(1)
+ expect(allToolCalls[0].toolName).toBe('propose_str_replace')
+ expect(allToolCalls[0].input.path).toBe('test.ts')
+ })
+
+ it('should handle end tag split across chunks', () => {
+ const state = createStreamParserState()
+ const allChunks: string[] = []
+ const allToolCalls: any[] = []
+
+ // Send start tag and content
+ let result = parseStreamChunk('\n{"cb_tool_name": "test"}\n', state)
+ allChunks.push(result.filteredText)
+ allToolCalls.push(...result.toolCalls)
+
+ // Send rest of end tag
+ result = parseStreamChunk('codebuff_tool_call>', state)
+ allChunks.push(result.filteredText)
+ allToolCalls.push(...result.toolCalls)
+
+ expect(allToolCalls).toHaveLength(1)
+ expect(allToolCalls[0].toolName).toBe('test')
+ })
+
+ it('should handle tiny chunks (1-2 chars at a time)', () => {
+ const state = createStreamParserState()
+ const allChunks: string[] = []
+ const allToolCalls: any[] = []
+
+ const fullText = `Hi
+{"cb_tool_name": "x"}
+Bye`
+
+ // Stream 2 chars at a time
+ for (let i = 0; i < fullText.length; i += 2) {
+ const chunk = fullText.slice(i, i + 2)
+ const result = parseStreamChunk(chunk, state)
+ allChunks.push(result.filteredText)
+ allToolCalls.push(...result.toolCalls)
+ }
+
+ const combinedText = allChunks.join('')
+ expect(combinedText).toBe('HiBye')
+ expect(allToolCalls).toHaveLength(1)
+ expect(allToolCalls[0].toolName).toBe('x')
+ })
+ })
+})
\ No newline at end of file
diff --git a/packages/agent-runtime/src/util/stream-xml-parser.ts b/packages/agent-runtime/src/util/stream-xml-parser.ts
new file mode 100644
index 0000000000..ce805a30ba
--- /dev/null
+++ b/packages/agent-runtime/src/util/stream-xml-parser.ts
@@ -0,0 +1,162 @@
+/**
+ * Stateful stream XML parser that extracts tool calls from XML
+ * and filters them out of the text stream.
+ *
+ * Handles partial tags at chunk boundaries using a stateful approach.
+ */
+
+import {
+ toolNameParam,
+ toolXmlName,
+} from '@codebuff/common/tools/constants'
+
+// Use flexible tag matching without requiring specific newlines
+const startToolTag = `<${toolXmlName}>`
+const endToolTag = `${toolXmlName}>`
+
+export type ParsedToolCall = {
+ toolName: string
+ input: Record
+}
+
+export type StreamParserState = {
+ /** Buffer for holding partial content when inside a tool call tag or at boundaries */
+ buffer: string
+ /** Whether we're currently inside a tool call tag */
+ insideToolCall: boolean
+}
+
+export type ParseResult = {
+ /** Filtered text with tool call XML removed */
+ filteredText: string
+ /** Tool calls extracted from this chunk */
+ toolCalls: ParsedToolCall[]
+}
+
+/**
+ * Creates initial parser state
+ */
+export function createStreamParserState(): StreamParserState {
+ return {
+ buffer: '',
+ insideToolCall: false,
+ }
+}
+
+/**
+ * Parses a stream chunk, extracting tool calls and filtering out the XML.
+ *
+ * @param chunk - The incoming text chunk
+ * @param state - Mutable parser state (updated in place)
+ * @returns Filtered text and any extracted tool calls
+ */
+export function parseStreamChunk(
+ chunk: string,
+ state: StreamParserState,
+): ParseResult {
+ if (!chunk) {
+ return { filteredText: '', toolCalls: [] }
+ }
+
+ // Combine buffer with new chunk
+ let text = state.buffer + chunk
+ state.buffer = ''
+
+ let filteredText = ''
+ const toolCalls: ParsedToolCall[] = []
+
+ while (text.length > 0) {
+ if (state.insideToolCall) {
+ // We're inside a tool call, look for the end tag
+ const endIndex = text.indexOf(endToolTag)
+
+ if (endIndex !== -1) {
+ // Found end tag - extract the content and parse it
+ const toolCallContent = text.slice(0, endIndex)
+ const parsedToolCall = parseToolCallContent(toolCallContent)
+ if (parsedToolCall) {
+ toolCalls.push(parsedToolCall)
+ }
+
+ text = text.slice(endIndex + endToolTag.length)
+ state.insideToolCall = false
+ } else {
+ // No end tag yet - buffer all content until we find the end tag
+ state.buffer = text
+ text = ''
+ }
+ } else {
+ // We're outside a tool call, look for start tag
+ const startIndex = text.indexOf(startToolTag)
+
+ if (startIndex !== -1) {
+ // Found start tag - emit text before it, then enter tool call
+ filteredText += text.slice(0, startIndex)
+ text = text.slice(startIndex + startToolTag.length)
+ state.insideToolCall = true
+ } else {
+ // No start tag - check if we might have a partial start tag
+ const partialStart = findPartialTagMatch(text, startToolTag)
+ if (partialStart > 0) {
+ // Emit everything except the partial tag, buffer the partial
+ filteredText += text.slice(0, -partialStart)
+ state.buffer = text.slice(-partialStart)
+ text = ''
+ } else {
+ // No partial match, emit all
+ filteredText += text
+ text = ''
+ }
+ }
+ }
+ }
+
+ return { filteredText, toolCalls }
+}
+
+/**
+ * Parse the JSON content inside a tool call tag.
+ */
+function parseToolCallContent(content: string): ParsedToolCall | null {
+ const trimmed = content.trim()
+ if (!trimmed) {
+ return null
+ }
+
+ try {
+ const parsed = JSON.parse(trimmed)
+ const toolName = parsed[toolNameParam]
+
+ if (typeof toolName !== 'string') {
+ return null
+ }
+
+ // Remove internal params from the input
+ const input = { ...parsed }
+ delete input[toolNameParam]
+ delete input['cb_easp'] // endsAgentStepParam
+
+ return { toolName, input }
+ } catch {
+ // Invalid JSON - skip
+ return null
+ }
+}
+
+/**
+ * Find if the end of `text` is a partial match for the beginning of `tag`.
+ * Returns the length of the overlap, or 0 if no overlap.
+ */
+function findPartialTagMatch(text: string, tag: string): number {
+ const maxOverlap = Math.min(text.length, tag.length - 1)
+
+ for (let len = maxOverlap; len > 0; len--) {
+ const suffix = text.slice(-len)
+ const prefix = tag.slice(0, len)
+ if (suffix === prefix) {
+ return len
+ }
+ }
+
+ return 0
+}
diff --git a/sdk/src/__tests__/tool-xml-filter.test.ts b/sdk/src/__tests__/tool-xml-filter.test.ts
deleted file mode 100644
index b88e69edae..0000000000
--- a/sdk/src/__tests__/tool-xml-filter.test.ts
+++ /dev/null
@@ -1,456 +0,0 @@
-import { endToolTag, startToolTag } from '@codebuff/common/tools/constants'
-import { describe, expect, it } from 'bun:test'
-
-import { filterXml } from '../tool-xml-filter'
-
-function getStreamValues(stream: ReturnType): {
- chunks: string[]
- finalBuffer: string
-} {
- const chunks: string[] = []
- let finalBuffer = ''
- while (true) {
- const { value, done } = stream.next()
- if (done) {
- finalBuffer = value.buffer
- break
- }
- chunks.push(value.chunk)
- }
- return { chunks, finalBuffer }
-}
-
-describe('filterXml', () => {
- describe('basic text emission', () => {
- it('should emit text that does not contain tool tags', () => {
- const { chunks, finalBuffer } = getStreamValues(
- filterXml({
- chunk: 'Hello, world!',
- buffer: '',
- }),
- )
-
- expect(chunks).toEqual(['Hello, world!'])
- expect(finalBuffer).toBe('')
- })
-
- it('should emit multiple chunks of plain text', () => {
- const { chunks: chunks1, finalBuffer: buffer1 } = getStreamValues(
- filterXml({
- chunk: 'First chunk ',
- buffer: '',
- }),
- )
-
- const { chunks: chunks2, finalBuffer: buffer2 } = getStreamValues(
- filterXml({
- chunk: 'second chunk',
- buffer: buffer1,
- }),
- )
-
- expect([...chunks1, ...chunks2]).toEqual(['First chunk ', 'second chunk'])
- expect(buffer2).toBe('')
- })
- })
-
- describe('complete tool calls', () => {
- it('should filter out a complete tool call in a single chunk', () => {
- const toolCall = `${startToolTag}{"cb_tool_name": "test_tool"}${endToolTag}`
-
- const { chunks, finalBuffer } = getStreamValues(
- filterXml({
- chunk: toolCall,
- buffer: '',
- }),
- )
-
- expect(chunks).toEqual([])
- expect(finalBuffer).toBe('')
- })
-
- it('should emit text before and after a complete tool call', () => {
- const chunk = `Before text${startToolTag}{"cb_tool_name": "test"}${endToolTag}After text`
-
- const { chunks, finalBuffer } = getStreamValues(
- filterXml({
- chunk,
- buffer: '',
- }),
- )
-
- expect(chunks).toEqual(['Before text', 'After text'])
- expect(finalBuffer).toBe('')
- })
-
- it('should handle multiple tool calls in sequence', () => {
- const chunk = `Text1${startToolTag}{"tool": "a"}${endToolTag}Text2${startToolTag}{"tool": "b"}${endToolTag}Text3`
-
- const { chunks, finalBuffer } = getStreamValues(
- filterXml({
- chunk,
- buffer: '',
- }),
- )
-
- expect(chunks).toEqual(['Text1', 'Text2', 'Text3'])
- expect(finalBuffer).toBe('')
- })
- })
-
- describe('partial tool calls and buffering', () => {
- it('should buffer when chunk ends with incomplete start tag', () => {
- const { chunks, finalBuffer } = getStreamValues(
- filterXml({
- chunk: 'Some text {
- const { chunks, finalBuffer } = getStreamValues(
- filterXml({
- chunk: 'Text {
- const { chunks, finalBuffer } = getStreamValues(
- filterXml({
- chunk: `${startToolTag}{"tool": "test"`,
- buffer: '',
- }),
- )
-
- expect(chunks).toEqual([])
- expect(finalBuffer).toBe(`${startToolTag}{"tool": "test"`)
- })
-
- it('should complete buffered tool call when receiving end tag', () => {
- // First chunk: start tag and partial content
- const { chunks: chunks1, finalBuffer: buffer1 } = getStreamValues(
- filterXml({
- chunk: `${startToolTag}{"tool":`,
- buffer: '',
- }),
- )
-
- // Second chunk: rest of content and end tag
- const { chunks: chunks2, finalBuffer: buffer2 } = getStreamValues(
- filterXml({
- chunk: ` "test"}${endToolTag}`,
- buffer: buffer1,
- }),
- )
-
- expect([...chunks1, ...chunks2]).toEqual([])
- expect(buffer2).toBe('')
- })
-
- it('should handle text split across chunks with tool call', () => {
- const { chunks: chunks1, finalBuffer: buffer1 } = getStreamValues(
- filterXml({
- chunk: 'Before',
- buffer: '',
- }),
- )
-
- const { chunks: chunks2, finalBuffer: buffer2 } = getStreamValues(
- filterXml({
- chunk: ` text${startToolTag}{"tool": "test"}${endToolTag}After`,
- buffer: buffer1,
- }),
- )
-
- expect([...chunks1, ...chunks2]).toEqual(['Before', ' text', 'After'])
- expect(buffer2).toBe('')
- })
- })
-
- describe('overlap handling', () => {
- it('should handle overlap when chunk ends with start of tag', () => {
- const { chunks, finalBuffer } = getStreamValues(
- filterXml({
- chunk: 'Text<',
- buffer: '',
- }),
- )
-
- expect(chunks).toEqual(['Text'])
- expect(finalBuffer).toBe('<')
- })
-
- it('should handle overlap with multiple characters', () => {
- const { chunks, finalBuffer } = getStreamValues(
- filterXml({
- chunk: 'Text {
- // First chunk: ends with potential tag start
- const { chunks: chunks1, finalBuffer: buffer1 } = getStreamValues(
- filterXml({
- chunk: 'Text {
- it('should handle empty chunks', () => {
- const { chunks, finalBuffer } = getStreamValues(
- filterXml({
- chunk: '',
- buffer: '',
- }),
- )
-
- expect(chunks).toEqual([])
- expect(finalBuffer).toBe('')
- })
-
- it('should handle chunk with only start tag', () => {
- const { chunks, finalBuffer } = getStreamValues(
- filterXml({
- chunk: startToolTag,
- buffer: '',
- }),
- )
-
- expect(chunks).toEqual([])
- expect(finalBuffer).toBe(startToolTag)
- })
-
- it('should handle chunk with only end tag', () => {
- const { chunks, finalBuffer } = getStreamValues(
- filterXml({
- chunk: endToolTag,
- buffer: '',
- }),
- )
-
- expect(chunks).toEqual([endToolTag])
- expect(finalBuffer).toBe('')
- })
-
- it('should handle malformed tool call with end tag but no start tag', () => {
- const { chunks, finalBuffer } = getStreamValues(
- filterXml({
- chunk: `Some text${endToolTag}More text`,
- buffer: '',
- }),
- )
-
- expect(chunks).toEqual([`Some text${endToolTag}`, 'More text'])
- expect(finalBuffer).toBe('')
- })
-
- it('should handle nested angle brackets in text', () => {
- const { chunks, finalBuffer } = getStreamValues(
- filterXml({
- chunk: 'if (x < 5 && y > 3) { }',
- buffer: '',
- }),
- )
-
- expect(chunks).toEqual(['if (x < 5 && y > 3) { }'])
- expect(finalBuffer).toBe('')
- })
-
- it('should handle very long tool call content', () => {
- const longContent = 'x'.repeat(10000)
- const chunk = `${startToolTag}${longContent}${endToolTag}`
-
- const { chunks, finalBuffer } = getStreamValues(
- filterXml({
- chunk,
- buffer: '',
- }),
- )
-
- expect(chunks).toEqual([])
- expect(finalBuffer).toBe('')
- })
- })
-
- describe('complex streaming scenarios', () => {
- it('should handle tool call split across many small chunks', () => {
- let buffer = ''
- const allChunks: string[] = []
- const chunksList = [
- '<',
- 'codebuff',
- '_tool',
- '_call',
- '>\n',
- '{"tool',
- '": "test',
- '"}',
- '\n',
- 'codebuff',
- '_tool_call',
- '>',
- ]
-
- for (const chunk of chunksList) {
- const { chunks, finalBuffer } = getStreamValues(
- filterXml({ chunk, buffer }),
- )
- allChunks.push(...chunks)
- buffer = finalBuffer
- }
-
- expect(allChunks).toEqual([])
- expect(buffer).toBe('')
- })
-
- it('should handle interleaved text and tool calls across chunks', () => {
- let buffer = ''
- const allChunks: string[] = []
- const chunksList = [
- 'Text1',
- `${startToolTag}{"a":1}`,
- `${endToolTag}Text2`,
- `${startToolTag}{"b":2}${endToolTag}`,
- 'Text3',
- ]
-
- for (const chunk of chunksList) {
- const { chunks, finalBuffer } = getStreamValues(
- filterXml({ chunk, buffer }),
- )
- allChunks.push(...chunks)
- buffer = finalBuffer
- }
-
- expect(allChunks).toEqual(['Text1', 'Text2', 'Text3'])
- expect(buffer).toBe('')
- })
-
- it('should maintain buffer state correctly through multiple iterations', () => {
- const allChunks: string[] = []
-
- // Chunk 1: Text with partial tag
- const { chunks: chunks1, finalBuffer: buffer1 } = getStreamValues(
- filterXml({
- chunk: 'Start\ncontent${endToolTag}`,
- buffer: buffer1,
- }),
- )
- allChunks.push(...chunks2)
- expect(buffer2).toBe('')
- expect(allChunks).toEqual(['Start'])
-
- // Chunk 3: More text
- const { chunks: chunks3, finalBuffer: buffer3 } = getStreamValues(
- filterXml({
- chunk: 'End',
- buffer: buffer2,
- }),
- )
- allChunks.push(...chunks3)
- expect(allChunks).toEqual(['Start', 'End'])
- expect(buffer3).toBe('')
- })
- })
-
- describe('real-world patterns', () => {
- it('should handle typical LLM streaming with tool call', () => {
- let buffer = ''
- const allChunks: string[] = []
- const chunksList = [
- 'Let me help you with that.\n\n',
- `${startToolTag}\n`,
- '{\n',
- ' "cb_tool_name": "write_file",\n',
- ' "path": "test.ts",\n',
- ' "content": "console.log(\'hello\');"\n',
- '}\n',
- `${endToolTag}\n`,
- "I've created the file for you.",
- ]
-
- for (const chunk of chunksList) {
- const { chunks, finalBuffer } = getStreamValues(
- filterXml({ chunk, buffer }),
- )
- allChunks.push(...chunks)
- buffer = finalBuffer
- }
-
- expect(allChunks).toEqual([
- 'Let me help you with that.\n\n',
- '\n',
- "I've created the file for you.",
- ])
- expect(buffer).toBe('')
- })
-
- it('should handle multiple tool calls with explanatory text', () => {
- let buffer = ''
- const allChunks: string[] = []
- const chunksList = [
- "First, I'll read the file.\n",
- `${startToolTag}{"cb_tool_name":"read_files","paths":["file.ts"]}${endToolTag}\n`,
- "Now I'll update it.\n",
- `${startToolTag}{"cb_tool_name":"write_file","path":"file.ts","content":"new"}${endToolTag}\n`,
- 'Done!',
- ]
-
- for (const chunk of chunksList) {
- const { chunks, finalBuffer } = getStreamValues(
- filterXml({ chunk, buffer }),
- )
- allChunks.push(...chunks)
- buffer = finalBuffer
- }
-
- expect(allChunks).toEqual([
- "First, I'll read the file.\n",
- '\n',
- "Now I'll update it.\n",
- '\n',
- 'Done!',
- ])
- expect(buffer).toBe('')
- })
- })
-})
diff --git a/sdk/src/run.ts b/sdk/src/run.ts
index 7e0b166cfe..1ae6b994d1 100644
--- a/sdk/src/run.ts
+++ b/sdk/src/run.ts
@@ -30,7 +30,6 @@ import {
RETRY_BACKOFF_MAX_DELAY_MS,
} from './retry-config'
import { initialSessionState, applyOverridesToSessionState } from './run-state'
-import { filterXml } from './tool-xml-filter'
import { changeFile } from './tools/change-file'
import { codeSearch } from './tools/code-search'
import { glob } from './tools/glob'
@@ -600,8 +599,6 @@ export async function runOnce({
}
}
- const buffers: Record = { 0: '' }
-
const onResponseChunk = async (
action: ServerAction<'response-chunk'>,
): Promise => {
@@ -633,21 +630,7 @@ export async function runOnce({
}
if (handleStreamChunk) {
- const stream = filterXml({
- chunk,
- buffer: buffers[0],
- })
- while (true) {
- const { value, done } = stream.next()
- if (done) {
- buffers[0] = value.buffer
- break
- }
-
- if (value.chunk) {
- await handleStreamChunk(value.chunk)
- }
- }
+ await handleStreamChunk(chunk)
}
}
const onSubagentResponseChunk = async (
@@ -658,24 +641,13 @@ export async function runOnce({
}
const { agentId, agentType, chunk } = action
- if (handleStreamChunk) {
- const stream = filterXml({
+ if (handleStreamChunk && chunk) {
+ await handleStreamChunk({
+ type: 'subagent_chunk',
+ agentId,
+ agentType,
chunk,
- buffer: buffers[agentId] ?? '',
})
- while (true) {
- const { value, done } = stream.next()
- if (done) {
- buffers[agentId] = value.buffer
- break
- }
- await handleStreamChunk({
- type: 'subagent_chunk',
- agentId,
- agentType,
- chunk: value.chunk,
- })
- }
}
}
diff --git a/sdk/src/tool-xml-filter.ts b/sdk/src/tool-xml-filter.ts
deleted file mode 100644
index 3402c5cc2d..0000000000
--- a/sdk/src/tool-xml-filter.ts
+++ /dev/null
@@ -1,51 +0,0 @@
-import { endToolTag, startToolTag } from '@codebuff/common/tools/constants'
-import { suffixPrefixOverlap } from '@codebuff/common/util/string'
-
-export function* filterXml(params: {
- chunk: string
- buffer: string
-}): Generator<{ chunk: string }, { buffer: string }> {
- const { chunk } = params
- let { buffer } = params
-
- buffer += chunk
- let startToolTagIndex = buffer.indexOf(startToolTag)
- let endToolTagIndex = buffer.indexOf(endToolTag)
-
- while (endToolTagIndex !== -1) {
- if (startToolTagIndex > endToolTagIndex || startToolTagIndex === -1) {
- // End tag found before start tag: unexpected state, just flush to end tag
- yield { chunk: buffer.slice(0, endToolTagIndex + endToolTag.length) }
- buffer = buffer.slice(endToolTagIndex + endToolTag.length)
- startToolTagIndex = buffer.indexOf(startToolTag)
- endToolTagIndex = buffer.indexOf(endToolTag)
- continue
- }
-
- // Start tag found before end tag - tool call found
- if (startToolTagIndex > 0) {
- yield { chunk: buffer.slice(0, startToolTagIndex) }
- }
- buffer = buffer.slice(endToolTagIndex + endToolTag.length)
- startToolTagIndex = buffer.indexOf(startToolTag)
- endToolTagIndex = buffer.indexOf(endToolTag)
- continue
- } // no more end tags
-
- // cut to first start tag
- if (startToolTagIndex !== -1) {
- if (startToolTagIndex > 0) {
- yield { chunk: buffer.slice(0, startToolTagIndex) }
- }
- return { buffer: buffer.slice(startToolTagIndex) }
- }
-
- // partial start tag
- const overlap = suffixPrefixOverlap(buffer, startToolTag)
- if (overlap.length < buffer.length) {
- yield { chunk: buffer.slice(0, buffer.length - overlap.length) }
- buffer = overlap
- }
-
- return { buffer }
-}