diff --git a/.agents/base2/base2.ts b/.agents/base2/base2.ts index 410c8064b..0f6582f90 100644 --- a/.agents/base2/base2.ts +++ b/.agents/base2/base2.ts @@ -54,6 +54,8 @@ export function createBase2( !isFast && 'suggest_followups', 'str_replace', 'write_file', + 'propose_str_replace', + 'propose_write_file', 'ask_user', 'set_output', ), @@ -68,7 +70,7 @@ export function createBase2( isDefault && 'thinker', isLite && 'editor-gpt-5', isDefault && 'editor', - isMax && 'editor-multi-prompt', + isMax && 'editor-multi-prompt2', isMax && 'thinker-best-of-n-opus', !isLite && 'code-reviewer', 'context-pruner', @@ -127,7 +129,7 @@ Use the spawn_agents tool to spawn specialized agents to help you complete the u (isDefault || isMax) && `- Spawn the ${isDefault ? 'thinker' : 'thinker-best-of-n-opus'} after gathering context to solve complex problems or when the user asks you to think about a problem.`, isMax && - `- Spawn the editor-multi-prompt agent to implement the changes after you have gathered all the context you need. You must spawn this agent for non-trivial changes, since it writes much better code than you would with the str_replace or write_file tools. Don't spawn the editor in parallel with context-gathering agents.`, + `- IMPORTANT: You must spawn the editor-multi-prompt2 agent to implement the changes after you have gathered all the context you need. You must spawn this agent for non-trivial changes, since it writes much better code than you would with the str_replace or write_file tools. Don't spawn the editor in parallel with context-gathering agents.`, '- Spawn commanders sequentially if the second command depends on the the first.', !isFast && !isLite && @@ -181,7 +183,7 @@ ${ ? '[ You implement the changes using the str_replace or write_file tools ]' : isLite ? '[ You implement the changes using the editor-gpt-5 agent ]' - : '[ You implement the changes using the editor-multi-prompt agent ]' + : '[ You implement the changes using the editor-multi-prompt2 agent ]' } ${ @@ -291,6 +293,8 @@ ${buildArray( EXPLORE_PROMPT, isMax && `- Important: Read as many files as could possibly be relevant to the task over several steps to improve your understanding of the user's request and produce the best possible code changes. Find more examples within the codebase similar to the user's request, dependencies that help with understanding how things work, tests, etc. This is frequently 12-20 files, depending on the task.`, + isMax && + 'If needed, use the ask_user tool to ask the user for clarification on their request or alternate implementation strategies. It is good to get context on the codebase before asking questions so you can ask informed questions.', (isDefault || isMax) && `- For any task requiring 3+ steps, use the write_todos tool to write out your step-by-step implementation plan. Include ALL of the applicable tasks in the list.${isFast ? '' : ' You should include a step to review the changes after you have implemented the changes.'}:${hasNoValidation ? '' : ' You should include at least one step to validate/test your changes: be specific about whether to typecheck, run tests, run lints, etc.'} You may be able to do reviewing and validation in parallel in the same step. Skip write_todos for simple tasks like quick edits or answering questions.`, isDefault && @@ -300,7 +304,7 @@ ${buildArray( isDefault && '- IMPORTANT: You must spawn the editor agent to implement the changes after you have gathered all the context you need. This agent will do the best job of implementing the changes so you must spawn it for all non-trivial changes. Do not pass any prompt or params to the editor agent when spawning it. It will make its own best choices of what to do.', isMax && - `- IMPORTANT: You must spawn the editor-multi-prompt agent to implement non-trivial code changes, since it will generate the best code changes from multiple implementation proposals. This is the best way to make high quality code changes -- strongly prefer using this agent over the str_replace or write_file tools, unless the change is very straightforward and obvious.`, + `- IMPORTANT: You must spawn the editor-multi-prompt2 agent to implement non-trivial code changes, since it will generate the best code changes from multiple implementation proposals. This is the best way to make high quality code changes -- strongly prefer using this agent over the str_replace or write_file tools, unless the change is very straightforward and obvious.`, isFast && '- Implement the changes using the str_replace or write_file tools. Implement all the changes in one go.', isFast && @@ -334,7 +338,7 @@ function buildImplementationStepPrompt({ isMax && `Keep working until the user's request is completely satisfied${!hasNoValidation ? ' and validated' : ''}, or until you require more information from the user.`, isMax && - `You must spawn the 'editor-multi-prompt' agent to implement code changes, since it will generate the best code changes.`, + `You must spawn the 'editor-multi-prompt2' agent to implement code changes, since it will generate the best code changes.`, (isDefault || isMax) && 'Spawn code-reviewer to review the changes after you have implemented the changes and in parallel with typechecking or testing.', `After completing the user request, summarize your changes in a sentence${isFast ? '' : ' or a few short bullet points'}.${isSonnet ? " Don't create any summary markdown files or example documentation files, unless asked by the user." : ''} Don't repeat yourself, especially if you have already concluded and summarized the changes in a previous step -- just end your turn.`, diff --git a/.agents/context-pruner.ts b/.agents/context-pruner.ts index 3628a1006..e7d29a7bb 100644 --- a/.agents/context-pruner.ts +++ b/.agents/context-pruner.ts @@ -172,6 +172,12 @@ const definition: AgentDefinition = { if (lastInstructionsPromptIndex !== -1) { currentMessages.splice(lastInstructionsPromptIndex, 1) } + const lastSubagentSpawnIndex = currentMessages.findLastIndex((message) => + message.tags?.includes('SUBAGENT_SPAWN'), + ) + if (lastSubagentSpawnIndex !== -1) { + currentMessages.splice(lastSubagentSpawnIndex, 1) + } // Initial check - if already under limit, return const initialTokens = countMessagesTokens(currentMessages) diff --git a/.agents/editor/best-of-n/best-of-n-selector2.ts b/.agents/editor/best-of-n/best-of-n-selector2.ts new file mode 100644 index 000000000..2b7c52ead --- /dev/null +++ b/.agents/editor/best-of-n/best-of-n-selector2.ts @@ -0,0 +1,144 @@ +import { + PLACEHOLDER, + type SecretAgentDefinition, +} from '../../types/secret-agent-definition' +import { publisher } from '../../constants' + +export const createBestOfNSelector2 = (options: { + model: 'sonnet' | 'opus' | 'gpt-5' +}): Omit => { + const { model } = options + const isSonnet = model === 'sonnet' + const isOpus = model === 'opus' + const isGpt5 = model === 'gpt-5' + return { + publisher, + model: isSonnet + ? 'anthropic/claude-sonnet-4.5' + : isOpus + ? 'anthropic/claude-opus-4.5' + : 'openai/gpt-5.2', + ...(isGpt5 && { + reasoningOptions: { + effort: 'high', + }, + }), + displayName: isGpt5 + ? 'Best-of-N GPT-5 Diff Selector' + : isOpus + ? 'Best-of-N Opus Diff Selector' + : 'Best-of-N Sonnet Diff Selector', + spawnerPrompt: + 'Analyzes multiple implementation proposals (as unified diffs) and selects the best one', + + includeMessageHistory: true, + inheritParentSystemPrompt: true, + + toolNames: ['set_output'], + spawnableAgents: [], + + inputSchema: { + params: { + type: 'object', + properties: { + implementations: { + type: 'array', + items: { + type: 'object', + properties: { + id: { type: 'string' }, + strategy: { type: 'string' }, + content: { type: 'string', description: 'Unified diff of the proposed changes' }, + }, + required: ['id', 'content'], + }, + }, + }, + required: ['implementations'], + }, + }, + outputMode: 'structured_output', + outputSchema: { + type: 'object', + properties: { + implementationId: { + type: 'string', + description: 'The id of the chosen implementation', + }, + reason: { + type: 'string', + description: + 'An extremely short (1 sentence) description of why this implementation was chosen', + }, + suggestedImprovements: { + type: 'string', + description: + 'A summary of suggested improvements from non-chosen implementations that could enhance the selected implementation. You can also include any new ideas you have to improve upon the selected implementation. Leave empty if no valuable improvements were found.', + }, + }, + required: ['implementationId', 'reason', 'suggestedImprovements'], + }, + + instructionsPrompt: `As part of the best-of-n workflow of agents, you are the implementation selector agent. + +## Task Instructions + +You have been provided with multiple implementation proposals via params. Each implementation shows a UNIFIED DIFF of the proposed changes. + +The implementations are available in the params.implementations array, where each has: +- id: A unique identifier for the implementation (A, B, C, etc.) +- strategy: The strategy/approach used for this implementation +- content: The unified diff showing what would change + +Your task is to: +1. Analyze each implementation's diff carefully, compare them against the original user requirements +2. Select the best implementation +3. Identify the best ideas/techniques from the NON-CHOSEN implementations that could improve the selected implementation + +Evaluate each based on (in order of importance): +- Correctness and completeness in fulfilling the user's request +- Simplicity and maintainability +- Code quality and adherence to project conventions +- Proper reuse of existing code (helper functions, libraries, etc.) +- Minimal changes to existing code (fewer files changed, fewer lines changed) +- Clarity and readability + +## Analyzing Non-Chosen Implementations + +After selecting the best implementation, look at each non-chosen implementation and identify any valuable aspects that could enhance the selected implementation. These might include: +- More elegant code patterns or abstractions +- Simplified logic or reuse of existing code +- Additional edge case handling +- Better naming or organization +- Useful comments or documentation +- Additional features that align with the user's request + +Only include improvements that are genuinely valuable and compatible with the selected implementation. If a non-chosen implementation has no useful improvements to offer, don't include it. + +## User Request + +For context, here is the original user request again: + +${PLACEHOLDER.USER_INPUT_PROMPT} + + +Try to select an implementation that fulfills all the requirements in the user's request. + +## Response Format + +${ + isSonnet || isOpus + ? `Use tags to write out your thoughts about the implementations as needed to pick the best implementation. IMPORTANT: You should think really really hard to make sure you pick the absolute best implementation! Also analyze the non-chosen implementations for any valuable techniques or approaches that could improve the selected one. + +Then, do not write any other explanations AT ALL. You should directly output a single tool call to set_output with the selected implementationId, short reason, and suggestedImprovements array.` + : `Output a single tool call to set_output with the selected implementationId, reason, and suggestedImprovements. Do not write anything else.` +}`, + } +} + +const definition: SecretAgentDefinition = { + ...createBestOfNSelector2({ model: 'opus' }), + id: 'best-of-n-selector2', +} + +export default definition diff --git a/.agents/editor/best-of-n/editor-implementor.ts b/.agents/editor/best-of-n/editor-implementor.ts index f2f225bbd..522110a6a 100644 --- a/.agents/editor/best-of-n/editor-implementor.ts +++ b/.agents/editor/best-of-n/editor-implementor.ts @@ -19,7 +19,7 @@ export const createBestOfNImplementor = (options: { ? 'anthropic/claude-opus-4.5' : isGemini ? 'google/gemini-3-pro-preview' - : 'openai/gpt-5.1', + : 'openai/gpt-5.2', displayName: 'Implementation Generator', spawnerPrompt: 'Generates a complete implementation plan with all code changes', diff --git a/.agents/editor/best-of-n/editor-implementor2-gpt-5.ts b/.agents/editor/best-of-n/editor-implementor2-gpt-5.ts new file mode 100644 index 000000000..460057bf9 --- /dev/null +++ b/.agents/editor/best-of-n/editor-implementor2-gpt-5.ts @@ -0,0 +1,7 @@ +import { createBestOfNImplementor2 } from './editor-implementor2' + +const definition = { + ...createBestOfNImplementor2({ model: 'gpt-5' }), + id: 'editor-implementor2-gpt-5', +} +export default definition diff --git a/.agents/editor/best-of-n/editor-implementor2.ts b/.agents/editor/best-of-n/editor-implementor2.ts new file mode 100644 index 000000000..8456ed2e1 --- /dev/null +++ b/.agents/editor/best-of-n/editor-implementor2.ts @@ -0,0 +1,156 @@ +import { publisher } from '../../constants' + +import type { SecretAgentDefinition } from '../../types/secret-agent-definition' + +export const createBestOfNImplementor2 = (options: { + model: 'gpt-5' | 'opus' | 'sonnet' +}): Omit => { + const { model } = options + const isGpt5 = model === 'gpt-5' + const isOpus = model === 'opus' + return { + publisher, + model: isGpt5 + ? 'openai/gpt-5.2' + : isOpus + ? 'anthropic/claude-opus-4.5' + : 'anthropic/claude-sonnet-4.5', + displayName: isGpt5 + ? 'GPT-5 Implementation Generator v2' + : isOpus + ? 'Opus Implementation Generator v2' + : 'Sonnet Implementation Generator v2', + spawnerPrompt: + 'Generates a complete implementation using propose_* tools that draft changes without applying them', + + includeMessageHistory: true, + inheritParentSystemPrompt: true, + + toolNames: ['propose_write_file', 'propose_str_replace'], + spawnableAgents: [], + + inputSchema: {}, + outputMode: 'structured_output', + + instructionsPrompt: `You are an expert code editor with deep understanding of software engineering principles. You were spawned to generate an implementation for the user's request. + +Your task is to write out ALL the code changes needed to complete the user's request. + +IMPORTANT: Use propose_str_replace and propose_write_file tools to make your edits. These tools draft changes without actually applying them - they will be reviewed first. + +You can make multiple tool calls across multiple steps to complete the implementation. + +After your edit tool calls, you can optionally mention any follow-up steps to take, like deleting a file, or a specific way to validate the changes. + +Your implementation should: +- Be complete and comprehensive +- Include all necessary changes to fulfill the user's request +- Follow the project's conventions and patterns +- Be as simple and maintainable as possible +- Reuse existing code wherever possible +- Be well-structured and organized + +More style notes: +- Extra try/catch blocks clutter the code -- use them sparingly. +- Optional arguments are code smell and worse than required arguments. +- New components often should be added to a new file, not added to an existing file. + +Write out your complete implementation now.`, + + handleSteps: function* ({ agentState: initialAgentState }) { + const initialMessageHistoryLength = + initialAgentState.messageHistory.length + + // Helper to check if a message is empty (no tool calls and empty/no text) + const isEmptyAssistantMessage = (message: any): boolean => { + if (message.role !== 'assistant' || !Array.isArray(message.content)) { + return false + } + const hasToolCalls = message.content.some( + (part: any) => part.type === 'tool-call', + ) + if (hasToolCalls) { + return false + } + // Check if all text parts are empty or there are no text parts + const textParts = message.content.filter( + (part: any) => part.type === 'text', + ) + if (textParts.length === 0) { + return true + } + return textParts.every((part: any) => !part.text || !part.text.trim()) + } + + const { agentState } = yield 'STEP_ALL' + + let postMessages = agentState.messageHistory.slice( + initialMessageHistoryLength, + ) + + // Retry if no messages or if the only message is empty (no tool calls and empty text) + if (postMessages.length === 0) { + const { agentState: postMessagesAgentState } = yield 'STEP_ALL' + postMessages = postMessagesAgentState.messageHistory.slice( + initialMessageHistoryLength, + ) + } else if ( + postMessages.length === 1 && + isEmptyAssistantMessage(postMessages[0]) + ) { + const { agentState: postMessagesAgentState } = yield 'STEP_ALL' + postMessages = postMessagesAgentState.messageHistory.slice( + initialMessageHistoryLength, + ) + } + + // Extract tool calls from assistant messages + // Handle both 'input' and 'args' property names for compatibility + const toolCalls: { toolName: string; input: any }[] = [] + for (const message of postMessages) { + if (message.role !== 'assistant' || !Array.isArray(message.content)) + continue + for (const part of message.content) { + if (part.type === 'tool-call') { + toolCalls.push({ + toolName: part.toolName, + input: part.input ?? (part as any).args ?? {}, + }) + } + } + } + + // Extract tool results (unified diffs) from tool messages + const toolResults: any[] = [] + for (const message of postMessages) { + if (message.role !== 'tool' || !Array.isArray(message.content)) continue + for (const part of message.content) { + if (part.type === 'json' && part.value) { + toolResults.push(part.value) + } + } + } + + // Concatenate all unified diffs for the selector to review + const unifiedDiffs = toolResults + .filter((result: any) => result.unifiedDiff) + .map((result: any) => `--- ${result.file} ---\n${result.unifiedDiff}`) + .join('\n\n') + + yield { + toolName: 'set_output', + input: { + toolCalls, + toolResults, + unifiedDiffs, + }, + includeToolCall: false, + } + }, + } +} +const definition = { + ...createBestOfNImplementor2({ model: 'opus' }), + id: 'editor-implementor2', +} +export default definition diff --git a/.agents/editor/best-of-n/editor-multi-prompt.ts b/.agents/editor/best-of-n/editor-multi-prompt.ts index e76a3abea..873d751e3 100644 --- a/.agents/editor/best-of-n/editor-multi-prompt.ts +++ b/.agents/editor/best-of-n/editor-multi-prompt.ts @@ -29,7 +29,11 @@ export function createMultiPromptEditor(): Omit { 'set_messages', 'set_output', ], - spawnableAgents: ['best-of-n-selector-opus', 'editor-implementor-opus'], + spawnableAgents: [ + 'best-of-n-selector-opus', + 'editor-implementor-opus', + 'editor-implementor-gpt-5', + ], inputSchema: { params: { @@ -92,10 +96,16 @@ function* handleStepsMultiPrompt({ } satisfies ToolCall<'set_messages'> // Spawn one opus implementor per prompt - const implementorAgents = prompts.map((prompt) => ({ - agent_type: 'editor-implementor-opus', - prompt: `Strategy: ${prompt}`, - })) + const implementorAgents: { agent_type: string; prompt?: string }[] = + prompts.map((prompt) => ({ + agent_type: 'editor-implementor-opus', + prompt: `Strategy: ${prompt}`, + })) + + // Always spawn an additional gpt-5 implementor with no prompt + implementorAgents.push({ + agent_type: 'editor-implementor-gpt-5', + }) // Spawn all implementor agents const { toolResult: implementorResults } = yield { @@ -111,16 +121,12 @@ function* handleStepsMultiPrompt({ implementorResults, ) as any[] - logger.info( - { implementorResults, spawnedImplementations, prompts }, - 'spawnedImplementations', - ) - // Extract all the implementations from the results const letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' + const strategies = [...prompts, prompts[0]] const implementations = spawnedImplementations.map((result, index) => ({ id: letters[index], - strategy: prompts[index], + strategy: strategies[index], content: 'errorMessage' in result ? `Error: ${result.errorMessage}` diff --git a/.agents/editor/best-of-n/editor-multi-prompt2.ts b/.agents/editor/best-of-n/editor-multi-prompt2.ts new file mode 100644 index 000000000..1921c3b76 --- /dev/null +++ b/.agents/editor/best-of-n/editor-multi-prompt2.ts @@ -0,0 +1,251 @@ +import { publisher } from '../../constants' + +import type { AgentStepContext, ToolCall } from '../../types/agent-definition' +import type { SecretAgentDefinition } from '../../types/secret-agent-definition' + +/** + * Creates a multi-prompt editor agent that spawns one implementor per prompt. + * Each prompt specifies a slightly different implementation strategy/approach. + */ +export function createMultiPromptEditor(): Omit { + return { + publisher, + model: 'anthropic/claude-opus-4.5', + displayName: 'Multi-Prompt Editor', + spawnerPrompt: + 'Edits code by spawning multiple implementor agents with different strategy prompts, selects the best implementation, and applies the changes. It also returns further suggested improvements which you should take seriously and act on. Pass as input an array of short prompts specifying different implementation approaches or strategies. Make sure to read any files intended to be edited before spawning this agent.', + + includeMessageHistory: true, + inheritParentSystemPrompt: true, + + toolNames: [ + 'spawn_agents', + 'str_replace', + 'write_file', + 'set_messages', + 'set_output', + ], + spawnableAgents: [ + 'best-of-n-selector2', + 'editor-implementor2', + 'editor-implementor2-gpt-5', + ], + + inputSchema: { + params: { + type: 'object', + properties: { + prompts: { + type: 'array', + items: { type: 'string' }, + description: + 'Array of short prompts, each specifying a slightly different implementation strategy or approach. Example: ["use a cache for the data", "don\t cache anything", "make the minimal possible changes", "modularize your solution by creating new files"]', + }, + }, + required: ['prompts'], + }, + }, + outputMode: 'structured_output', + + handleSteps: handleStepsMultiPrompt, + } +} + +function* handleStepsMultiPrompt({ + agentState, + params, +}: AgentStepContext): ReturnType< + NonNullable +> { + const prompts = (params?.prompts as string[] | undefined) ?? [] + + if (prompts.length === 0) { + yield { + toolName: 'set_output', + input: { + error: 'No prompts provided. Please pass an array of strategy prompts.', + }, + } satisfies ToolCall<'set_output'> + return + } + + // Only keep messages up to just before the last user role message (skips input prompt, instructions prompt). + const { messageHistory: initialMessageHistory } = agentState + let userMessageIndex = initialMessageHistory.length + + while (userMessageIndex > 0) { + const message = initialMessageHistory[userMessageIndex - 1] + if (message.role === 'user') { + userMessageIndex-- + } else { + break + } + } + const updatedMessageHistory = initialMessageHistory.slice(0, userMessageIndex) + yield { + toolName: 'set_messages', + input: { + messages: updatedMessageHistory, + }, + includeToolCall: false, + } satisfies ToolCall<'set_messages'> + + // Spawn one implementor2 per prompt (uses propose_* tools) + const implementorAgents: { agent_type: string; prompt?: string }[] = + prompts.map((prompt) => ({ + agent_type: 'editor-implementor2', + prompt: `Strategy: ${prompt}`, + })) + + // Always spawn an additional gpt-5 implementor first with no prompt + implementorAgents.unshift({ + agent_type: 'editor-implementor2-gpt-5', + }) + + // Spawn all implementor agents + const { toolResult: implementorResults } = yield { + toolName: 'spawn_agents', + input: { + agents: implementorAgents, + }, + includeToolCall: false, + } + + // Extract spawn results - each is structured output with { toolCalls, toolResults, unifiedDiffs } + const spawnedImplementations = extractSpawnResults<{ + toolCalls: { toolName: string; input: any }[] + toolResults: any[] + unifiedDiffs: string + }>(implementorResults) + + // Build implementations for selector using the unified diffs + const letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' + const strategies = [...prompts, 'default'] + const implementations = spawnedImplementations.map((result, index) => { + if (!result || 'errorMessage' in result) { + return { + id: letters[index], + strategy: strategies[index] ?? 'unknown', + content: `Error: ${result?.errorMessage ?? 'Unknown error'}`, + toolCalls: [], + } + } + + return { + id: letters[index], + strategy: strategies[index] ?? 'unknown', + content: result.unifiedDiffs ?? 'No changes proposed', + toolCalls: result.toolCalls ?? [], + } + }) + + // Spawn selector with implementations (showing unified diffs for review) + const { toolResult: selectorResult } = yield { + toolName: 'spawn_agents', + input: { + agents: [ + { + agent_type: 'best-of-n-selector2', + params: { + implementations: implementations.map((impl) => ({ + id: impl.id, + strategy: impl.strategy, + content: impl.content, + })), + }, + }, + ], + }, + includeToolCall: false, + } satisfies ToolCall<'spawn_agents'> + + const selectorOutput = extractSpawnResults<{ + implementationId: string + reason: string + suggestedImprovements: string + }>(selectorResult)[0] + + if (!selectorOutput || !selectorOutput.implementationId) { + yield { + toolName: 'set_output', + input: { error: 'Selector failed to return an implementation' }, + } satisfies ToolCall<'set_output'> + return + } + + const { implementationId } = selectorOutput + const chosenImplementation = implementations.find( + (implementation) => implementation.id === implementationId, + ) + + if (!chosenImplementation) { + yield { + toolName: 'set_output', + input: { + error: `Failed to find chosen implementation: ${implementationId}`, + }, + } satisfies ToolCall<'set_output'> + return + } + + // Apply the chosen implementation's tool calls as real edits + const appliedToolResults: any[] = [] + for (const toolCall of chosenImplementation.toolCalls) { + // Convert propose_* tool calls to real edit tool calls + const realToolName = + toolCall.toolName === 'propose_str_replace' + ? 'str_replace' + : toolCall.toolName === 'propose_write_file' + ? 'write_file' + : toolCall.toolName + + if (realToolName === 'str_replace' || realToolName === 'write_file') { + const { toolResult } = yield { + toolName: realToolName, + input: toolCall.input, + includeToolCall: true, + } satisfies ToolCall<'str_replace'> | ToolCall<'write_file'> + + appliedToolResults.push(toolResult) + } + } + + // Extract suggested improvements from selector output + const { suggestedImprovements } = selectorOutput + + // Set output with the applied results and suggested improvements + yield { + toolName: 'set_output', + input: { + chosenStrategy: chosenImplementation.strategy, + toolResults: appliedToolResults, + suggestedImprovements, + }, + includeToolCall: false, + } satisfies ToolCall<'set_output'> + + /** + * Extracts the array of subagent results from spawn_agents tool output. + */ + function extractSpawnResults(results: any[] | undefined): T[] { + if (!results || results.length === 0) return [] + + const jsonResult = results.find((r) => r.type === 'json') + if (!jsonResult?.value) return [] + + const spawnedResults = Array.isArray(jsonResult.value) + ? jsonResult.value + : [jsonResult.value] + + return spawnedResults + .map((result: any) => result?.value) + .map((result: any) => ('value' in result ? result.value : result)) + .filter(Boolean) + } +} + +const definition = { + ...createMultiPromptEditor(), + id: 'editor-multi-prompt2', +} +export default definition diff --git a/.agents/editor/editor.ts b/.agents/editor/editor.ts index 99d03721f..a0197828a 100644 --- a/.agents/editor/editor.ts +++ b/.agents/editor/editor.ts @@ -9,7 +9,7 @@ export const createCodeEditor = (options: { publisher, model: options.model === 'gpt-5' - ? 'openai/gpt-5.1' + ? 'openai/gpt-5.2' : 'anthropic/claude-opus-4.5', displayName: 'Code Editor', spawnerPrompt: @@ -20,7 +20,7 @@ export const createCodeEditor = (options: { includeMessageHistory: true, inheritParentSystemPrompt: true, - instructionsPrompt: `You are an expert code editor with deep understanding of software engineering principles. You were spawned to generate an implementation for the user's request. + instructionsPrompt: `You are an expert code editor with deep understanding of software engineering principles. You were spawned to generate an implementation for the user's request. Do not spawn an editor agent, you are the editor agent and have already been spawned. Your task is to write out ALL the code changes needed to complete the user's request in a single comprehensive response. diff --git a/.agents/reviewer/code-reviewer-gpt-5.ts b/.agents/reviewer/code-reviewer-gpt-5.ts index 5d045d4b7..dcd97403d 100644 --- a/.agents/reviewer/code-reviewer-gpt-5.ts +++ b/.agents/reviewer/code-reviewer-gpt-5.ts @@ -4,7 +4,7 @@ import type { SecretAgentDefinition } from '../types/secret-agent-definition' const definition: SecretAgentDefinition = { ...codeReviewer, id: 'code-reviewer-gpt-5', - model: 'openai/gpt-5.1', + model: 'openai/gpt-5.2', } export default definition diff --git a/.agents/reviewer/code-reviewer.ts b/.agents/reviewer/code-reviewer.ts index 3115d51d6..5cbb7bc6b 100644 --- a/.agents/reviewer/code-reviewer.ts +++ b/.agents/reviewer/code-reviewer.ts @@ -25,7 +25,7 @@ export const createReviewer = ( inheritParentSystemPrompt: true, includeMessageHistory: true, - instructionsPrompt: `You are a subagent that reviews code changes. Do not use any tools. For reference, here is the original user request: + instructionsPrompt: `You are a subagent that reviews code changes and gives helpful critical feedback. Do not use any tools. For reference, here is the original user request: ${PLACEHOLDER.USER_INPUT_PROMPT} diff --git a/.agents/types/tools.ts b/.agents/types/tools.ts index 4d47cc8c4..2c14b6e38 100644 --- a/.agents/types/tools.ts +++ b/.agents/types/tools.ts @@ -10,6 +10,8 @@ export type ToolName = | 'glob' | 'list_directory' | 'lookup_agent_info' + | 'propose_str_replace' + | 'propose_write_file' | 'read_docs' | 'read_files' | 'read_subtree' @@ -38,6 +40,8 @@ export interface ToolParamsMap { glob: GlobParams list_directory: ListDirectoryParams lookup_agent_info: LookupAgentInfoParams + propose_str_replace: ProposeStrReplaceParams + propose_write_file: ProposeWriteFileParams read_docs: ReadDocsParams read_files: ReadFilesParams read_subtree: ReadSubtreeParams @@ -149,6 +153,35 @@ export interface LookupAgentInfoParams { agentId: string } +/** + * Propose string replacements in a file without actually applying them. + */ +export interface ProposeStrReplaceParams { + /** The path to the file to edit. */ + path: string + /** Array of replacements to make. */ + replacements: { + /** The string to replace. This must be an *exact match* of the string you want to replace, including whitespace and punctuation. */ + old: string + /** The string to replace the corresponding old string with. Can be empty to delete. */ + new: string + /** Whether to allow multiple replacements of old string. */ + allowMultiple?: boolean + }[] +} + +/** + * Propose creating or editing a file without actually applying the changes. + */ +export interface ProposeWriteFileParams { + /** Path to the file relative to the **project root** */ + path: string + /** What the change is intended to do in only one sentence. */ + instructions: string + /** Edit snippet to apply to the file. */ + content: string +} + /** * Fetch up-to-date documentation for libraries and frameworks using Context7 API. */ diff --git a/cli/src/utils/create-run-config.ts b/cli/src/utils/create-run-config.ts index 6b62b4272..e43100f6d 100644 --- a/cli/src/utils/create-run-config.ts +++ b/cli/src/utils/create-run-config.ts @@ -4,7 +4,10 @@ import { RETRY_BACKOFF_MAX_DELAY_MS, } from '@codebuff/sdk' -import { createEventHandler, createStreamChunkHandler } from './sdk-event-handlers' +import { + createEventHandler, + createStreamChunkHandler, +} from './sdk-event-handlers' import type { EventHandlerState } from './sdk-event-handlers' import type { AgentDefinition, MessageContent, RunState } from '@codebuff/sdk' @@ -68,15 +71,15 @@ export const createRunConfig = (params: CreateRunConfigParams) => { setIsRetrying(true) setStreamStatus('waiting') }, - onRetryExhausted: async ({ totalAttempts, errorCode }: RetryExhaustedArgs) => { - logger.warn( - { totalAttempts, errorCode }, - 'SDK exhausted all retries', - ) + onRetryExhausted: async ({ + totalAttempts, + errorCode, + }: RetryExhaustedArgs) => { + logger.warn({ totalAttempts, errorCode }, 'SDK exhausted all retries') }, }, agentDefinitions, - maxAgentSteps: 40, + maxAgentSteps: 100, handleStreamChunk: createStreamChunkHandler(eventHandlerState), handleEvent: createEventHandler(eventHandlerState), } diff --git a/cli/src/utils/implementor-helpers.ts b/cli/src/utils/implementor-helpers.ts index a0d91791d..cd801a113 100644 --- a/cli/src/utils/implementor-helpers.ts +++ b/cli/src/utils/implementor-helpers.ts @@ -5,6 +5,10 @@ export const IMPLEMENTOR_AGENT_IDS = [ 'editor-implementor-opus', 'editor-implementor-gemini', 'editor-implementor-gpt-5', + 'editor-implementor2', + 'editor-implementor2-opus', + 'editor-implementor2-gpt-5', + 'editor-implementor2-sonnet', ] as const /** @@ -24,12 +28,23 @@ export const getImplementorDisplayName = ( index?: number, ): string => { let baseName = 'Implementor' - if (agentType.includes('editor-implementor-opus')) { + // Check most specific patterns first (editor-implementor2-* with model suffix) + if (agentType.includes('editor-implementor2-gpt-5')) { + baseName = 'GPT-5.2' + } else if (agentType.includes('editor-implementor2-opus')) { baseName = 'Opus' - } else if (agentType.includes('editor-implementor-gemini')) { - baseName = 'Gemini' + } else if (agentType.includes('editor-implementor2-sonnet')) { + baseName = 'Sonnet' + } else if (agentType.includes('editor-implementor2')) { + // Generic editor-implementor2 defaults to Opus + baseName = 'Opus' + // Then check editor-implementor-* patterns (less specific) } else if (agentType.includes('editor-implementor-gpt-5')) { baseName = 'GPT-5' + } else if (agentType.includes('editor-implementor-opus')) { + baseName = 'Opus' + } else if (agentType.includes('editor-implementor-gemini')) { + baseName = 'Gemini' } else if (agentType.includes('editor-implementor')) { baseName = 'Sonnet' } diff --git a/common/src/constants/agents.ts b/common/src/constants/agents.ts index 36aafd324..01b92e37d 100644 --- a/common/src/constants/agents.ts +++ b/common/src/constants/agents.ts @@ -92,4 +92,4 @@ export const AGENT_NAME_TO_TYPES = Object.entries(AGENT_NAMES).reduce( {} as Record, ) -export const MAX_AGENT_STEPS_DEFAULT = 25 +export const MAX_AGENT_STEPS_DEFAULT = 100 diff --git a/common/src/tools/constants.ts b/common/src/tools/constants.ts index f03af6e04..bcf3138c0 100644 --- a/common/src/tools/constants.ts +++ b/common/src/tools/constants.ts @@ -31,6 +31,8 @@ export const toolNames = [ 'glob', 'list_directory', 'lookup_agent_info', + 'propose_str_replace', + 'propose_write_file', 'read_docs', 'read_files', 'read_subtree', diff --git a/common/src/tools/list.ts b/common/src/tools/list.ts index 1d1cadaaa..bc2157b1c 100644 --- a/common/src/tools/list.ts +++ b/common/src/tools/list.ts @@ -12,6 +12,8 @@ import { findFilesParams } from './params/tool/find-files' import { globParams } from './params/tool/glob' import { listDirectoryParams } from './params/tool/list-directory' import { lookupAgentInfoParams } from './params/tool/lookup-agent-info' +import { proposeStrReplaceParams } from './params/tool/propose-str-replace' +import { proposeWriteFileParams } from './params/tool/propose-write-file' import { readDocsParams } from './params/tool/read-docs' import { readFilesParams } from './params/tool/read-files' import { readSubtreeParams } from './params/tool/read-subtree' @@ -46,6 +48,8 @@ export const toolParams = { glob: globParams, list_directory: listDirectoryParams, lookup_agent_info: lookupAgentInfoParams, + propose_str_replace: proposeStrReplaceParams, + propose_write_file: proposeWriteFileParams, read_docs: readDocsParams, read_files: readFilesParams, read_subtree: readSubtreeParams, diff --git a/common/src/tools/params/tool/propose-str-replace.ts b/common/src/tools/params/tool/propose-str-replace.ts new file mode 100644 index 000000000..15915e7c3 --- /dev/null +++ b/common/src/tools/params/tool/propose-str-replace.ts @@ -0,0 +1,86 @@ +import z from 'zod/v4' + +import { $getNativeToolCallExampleString, jsonToolResultSchema } from '../utils' + +import type { $ToolParams } from '../../constants' + +export const proposeUpdateFileResultSchema = z.union([ + z.object({ + file: z.string(), + message: z.string(), + unifiedDiff: z.string(), + }), + z.object({ + file: z.string(), + errorMessage: z.string(), + }), +]) + +const toolName = 'propose_str_replace' +const endsAgentStep = false +const inputSchema = z + .object({ + path: z + .string() + .min(1, 'Path cannot be empty') + .describe(`The path to the file to edit.`), + replacements: z + .array( + z + .object({ + old: z + .string() + .min(1, 'Old cannot be empty') + .describe( + `The string to replace. This must be an *exact match* of the string you want to replace, including whitespace and punctuation.`, + ), + new: z + .string() + .describe( + `The string to replace the corresponding old string with. Can be empty to delete.`, + ), + allowMultiple: z + .boolean() + .optional() + .default(false) + .describe( + 'Whether to allow multiple replacements of old string.', + ), + }) + .describe('Pair of old and new strings.'), + ) + .min(1, 'Replacements cannot be empty') + .describe('Array of replacements to make.'), + }) + .describe(`Propose string replacements in a file without actually applying them.`) +const description = ` +Propose edits to a file without actually applying them. Use this tool when you want to draft changes that will be reviewed before being applied. + +This tool works identically to str_replace but the changes are not written to disk. Instead, it returns the unified diff of what would change. Multiple propose calls on the same file will stack correctly. + +Example: +${$getNativeToolCallExampleString({ + toolName, + inputSchema, + input: { + path: 'path/to/file', + replacements: [ + { old: 'This is the old string', new: 'This is the new string' }, + { + old: '\nfoo:', + new: '\nbar:', + allowMultiple: true, + }, + ], + }, + endsAgentStep, +})} + `.trim() + +export const proposeStrReplaceParams = { + toolName, + endsAgentStep, + description, + inputSchema, + outputSchema: jsonToolResultSchema(proposeUpdateFileResultSchema), +} satisfies $ToolParams diff --git a/common/src/tools/params/tool/propose-write-file.ts b/common/src/tools/params/tool/propose-write-file.ts new file mode 100644 index 000000000..ab30ec056 --- /dev/null +++ b/common/src/tools/params/tool/propose-write-file.ts @@ -0,0 +1,71 @@ +import z from 'zod/v4' + +import { proposeUpdateFileResultSchema } from './propose-str-replace' +import { $getNativeToolCallExampleString, jsonToolResultSchema } from '../utils' + +import type { $ToolParams } from '../../constants' + +const toolName = 'propose_write_file' +const endsAgentStep = false +const inputSchema = z + .object({ + path: z + .string() + .min(1, 'Path cannot be empty') + .describe(`Path to the file relative to the **project root**`), + instructions: z + .string() + .describe('What the change is intended to do in only one sentence.'), + content: z.string().describe(`Complete file content to write to the file.`), + }) + .describe( + `Propose creating or editing a file without actually applying the changes.`, + ) +const description = ` +Propose creating or editing a file without actually applying the changes. + +This tool works identically to write_file but the changes are not written to disk. Instead, it returns the unified diff of what would change. Each call overwrites the previous call. + +Format the \`content\` parameter with the entire content of the file. + +This tool is to be used in subagents. + +Example - Simple file creation: +${$getNativeToolCallExampleString({ + toolName, + inputSchema, + input: { + path: 'new-file.ts', + instructions: 'Prints Hello, world', + content: 'console.log("Hello, world!");', + }, + endsAgentStep, +})} + +Example - Overwriting a file: +${$getNativeToolCallExampleString({ + toolName, + inputSchema, + input: { + path: 'foo.ts', + instructions: 'Update foo function', + content: `function foo() { + doSomethingNew(); +} + +function bar() { + doSomethingOld(); +} +`, + }, + endsAgentStep, +})} +`.trim() + +export const proposeWriteFileParams = { + toolName, + endsAgentStep, + description, + inputSchema, + outputSchema: jsonToolResultSchema(proposeUpdateFileResultSchema), +} satisfies $ToolParams diff --git a/package.json b/package.json index bac28addb..d06efd581 100644 --- a/package.json +++ b/package.json @@ -15,7 +15,7 @@ "cli" ], "scripts": { - "dev": "bun scripts/dev.ts", + "dev": "bun start-cli", "up": "bun scripts/start-services.ts", "start-cli": "bun --cwd cli dev", "start-db": "bun --cwd packages/internal db:start", diff --git a/packages/agent-runtime/src/__tests__/loop-agent-steps.test.ts b/packages/agent-runtime/src/__tests__/loop-agent-steps.test.ts index 8c2fba940..2015f8f06 100644 --- a/packages/agent-runtime/src/__tests__/loop-agent-steps.test.ts +++ b/packages/agent-runtime/src/__tests__/loop-agent-steps.test.ts @@ -141,7 +141,6 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () => ancestorRunIds: [], onResponseChunk: () => {}, signal: new AbortController().signal, - tools: {}, } }) diff --git a/packages/agent-runtime/src/__tests__/main-prompt.test.ts b/packages/agent-runtime/src/__tests__/main-prompt.test.ts index d60f0a63d..ab87fcbe1 100644 --- a/packages/agent-runtime/src/__tests__/main-prompt.test.ts +++ b/packages/agent-runtime/src/__tests__/main-prompt.test.ts @@ -98,7 +98,6 @@ describe('mainPrompt', () => { onResponseChunk: () => {}, localAgentTemplates: mockLocalAgentTemplates, signal: new AbortController().signal, - tools: {}, } // Mock analytics and tracing diff --git a/packages/agent-runtime/src/__tests__/malformed-tool-call.test.ts b/packages/agent-runtime/src/__tests__/malformed-tool-call.test.ts deleted file mode 100644 index cf681129b..000000000 --- a/packages/agent-runtime/src/__tests__/malformed-tool-call.test.ts +++ /dev/null @@ -1,331 +0,0 @@ -import * as bigquery from '@codebuff/bigquery' -import * as analytics from '@codebuff/common/analytics' -import { TEST_USER_ID } from '@codebuff/common/old-constants' -import { TEST_AGENT_RUNTIME_IMPL } from '@codebuff/common/testing/impl/agent-runtime' -import { getInitialSessionState } from '@codebuff/common/types/session-state' -import * as stringUtils from '@codebuff/common/util/string' -import { - afterEach, - beforeEach, - describe, - expect, - mock, - spyOn, - test, -} from 'bun:test' - -import { createToolCallChunk, mockFileContext } from './test-utils' -import { processStream } from '../tools/stream-parser' - -import type { StreamChunk } from '@codebuff/common/types/contracts/llm' - -import type { AgentTemplate } from '../templates/types' -import type { - AgentRuntimeDeps, - AgentRuntimeScopedDeps, -} from '@codebuff/common/types/contracts/agent-runtime' -import type { ParamsOf } from '@codebuff/common/types/function-params' -import type { - Message, - ToolMessage, -} from '@codebuff/common/types/messages/codebuff-message' - -let agentRuntimeImpl: AgentRuntimeDeps = { ...TEST_AGENT_RUNTIME_IMPL } - -describe('malformed tool call error handling', () => { - let testAgent: AgentTemplate - let agentRuntimeImpl: AgentRuntimeDeps & AgentRuntimeScopedDeps - let defaultParams: ParamsOf - - beforeEach(() => { - agentRuntimeImpl = { ...TEST_AGENT_RUNTIME_IMPL } - - testAgent = { - id: 'test-agent', - displayName: 'Test Agent', - spawnerPrompt: 'Testing malformed tool calls', - model: 'claude-3-5-sonnet-20241022', - inputSchema: {}, - outputMode: 'all_messages' as const, - includeMessageHistory: true, - inheritParentSystemPrompt: false, - mcpServers: {}, - toolNames: ['read_files', 'end_turn'], - spawnableAgents: [], - systemPrompt: 'Test system prompt', - instructionsPrompt: 'Test instructions prompt', - stepPrompt: 'Test agent step prompt', - } - - const sessionState = getInitialSessionState(mockFileContext) - const agentState = sessionState.mainAgentState - - defaultParams = { - ...agentRuntimeImpl, - stream: createMockStream([]), - runId: 'test-run-id', - ancestorRunIds: [], - agentStepId: 'test-step', - clientSessionId: 'test-session', - fingerprintId: 'test-fingerprint', - userInputId: 'test-input', - userId: TEST_USER_ID, - repoId: 'test-repo', - repoUrl: undefined, - agentTemplate: testAgent, - agentState, - localAgentTemplates: { 'test-agent': testAgent }, - fileContext: mockFileContext, - messages: [], - system: 'Test system prompt', - agentContext: {}, - onResponseChunk: mock(() => {}), - onCostCalculated: mock(async () => {}), - fullResponse: '', - prompt: '', - signal: new AbortController().signal, - tools: {}, - } - - // Mock analytics and tracing - spyOn(analytics, 'initAnalytics').mockImplementation(() => {}) - analytics.initAnalytics(TEST_AGENT_RUNTIME_IMPL) - spyOn(analytics, 'trackEvent').mockImplementation(() => {}) - spyOn(bigquery, 'insertTrace').mockImplementation(() => - Promise.resolve(true), - ) - - // Mock websocket actions - agentRuntimeImpl.requestFiles = async () => ({}) - agentRuntimeImpl.requestOptionalFile = async () => null - agentRuntimeImpl.requestToolCall = async () => ({ - output: [ - { - type: 'json', - value: 'Tool call success', - }, - ], - }) - - // Mock LLM APIs - agentRuntimeImpl.promptAiSdk = async function () { - return 'Test response' - } - - // Mock generateCompactId for consistent test results - spyOn(stringUtils, 'generateCompactId').mockReturnValue('test-tool-call-id') - }) - - afterEach(() => { - mock.restore() - agentRuntimeImpl = { ...TEST_AGENT_RUNTIME_IMPL } - }) - - function createMockStream(chunks: StreamChunk[]) { - async function* generator() { - for (const chunk of chunks) { - yield chunk - } - return 'mock-message-id' - } - return generator() - } - - function textChunk(text: string): StreamChunk { - return { type: 'text' as const, text } - } - - test('should add tool result errors to message history after stream completes', async () => { - // With native tools, malformed tool calls are handled at the API level. - // This test now verifies that an unknown tool is properly handled. - const chunks: StreamChunk[] = [ - createToolCallChunk('unknown_tool_xyz', { paths: ['test.ts'] }), - createToolCallChunk('end_turn', {}), - ] - - const stream = createMockStream(chunks) - - await processStream({ - ...defaultParams, - stream, - }) - - // Should have tool result errors in the final message history - const toolMessages: ToolMessage[] = - defaultParams.agentState.messageHistory.filter( - (m: Message) => m.role === 'tool', - ) - - expect(toolMessages.length).toBeGreaterThan(0) - - // Find the error tool result for the unknown tool - const errorToolResult = toolMessages.find( - (m) => - m.content?.[0]?.type === 'json' && - (m.content[0] as any)?.value?.errorMessage, - ) - - expect(errorToolResult).toBeDefined() - expect( - (errorToolResult?.content?.[0] as any)?.value?.errorMessage, - ).toContain('not found') - }) - - test('should handle multiple unknown tool calls', async () => { - const chunks: StreamChunk[] = [ - createToolCallChunk('unknown_tool_1', { param: 'value1' }), - textChunk('Some text between calls'), - createToolCallChunk('unknown_tool_2', { param: 'value2' }), - createToolCallChunk('end_turn', {}), - ] - - const stream = createMockStream(chunks) - - await processStream({ - ...defaultParams, - stream, - }) - - // Should have multiple error tool results - const toolMessages = defaultParams.agentState.messageHistory.filter( - (m: Message) => m.role === 'tool', - ) as ToolMessage[] - - const errorMessages = toolMessages.filter( - (m) => - m.content?.[0]?.type === 'json' && - (m.content[0] as any)?.value?.errorMessage, - ) - - expect(errorMessages.length).toBe(2) - }) - - test('should preserve original toolResults array alongside message history', async () => { - const chunks: StreamChunk[] = [ - createToolCallChunk('unknown_tool_xyz', { param: 'value' }), - createToolCallChunk('end_turn', {}), - ] - - const stream = createMockStream(chunks) - - const result = await processStream({ - ...defaultParams, - stream, - }) - - // Should have error in both toolResults and message history - expect(result.toolResults.length).toBeGreaterThan(0) - - const errorToolResult = result.toolResults.find( - (tr) => - tr.content?.[0]?.type === 'json' && - (tr.content[0] as any)?.value?.errorMessage, - ) - - expect(errorToolResult).toBeDefined() - - const toolMessages = defaultParams.agentState.messageHistory.filter( - (m: Message) => m.role === 'tool', - ) as ToolMessage[] - - expect(toolMessages.length).toBeGreaterThan(0) - }) - - test('should handle unknown tool names and add error to message history', async () => { - const chunks: StreamChunk[] = [ - createToolCallChunk('unknown_tool', { param: 'value' }), - createToolCallChunk('end_turn', {}), - ] - - const stream = createMockStream(chunks) - - await processStream({ - ...defaultParams, - stream, - }) - - const toolMessages = defaultParams.agentState.messageHistory.filter( - (m: Message) => m.role === 'tool', - ) as ToolMessage[] - - const errorMessage = toolMessages.find( - (m) => - m.toolName === 'unknown_tool' && - m.content?.[0]?.type === 'json' && - (m.content[0] as any)?.value?.errorMessage, - ) - - expect(errorMessage).toBeDefined() - expect((errorMessage?.content?.[0] as any)?.value?.errorMessage).toContain( - 'Tool unknown_tool not found', - ) - }) - - test('should not affect valid tool calls in message history', async () => { - const chunks: StreamChunk[] = [ - // Valid tool call - createToolCallChunk('read_files', { paths: ['test.ts'] }), - // Unknown tool call - createToolCallChunk('unknown_tool_xyz', { param: 'value' }), - createToolCallChunk('end_turn', {}), - ] - - const stream = createMockStream(chunks) - - await processStream({ - ...defaultParams, - requestFiles: async ({ filePaths }) => { - return Object.fromEntries( - filePaths.map((path) => [path, `${path} content`]), - ) - }, - stream, - }) - - const toolMessages = defaultParams.agentState.messageHistory.filter( - (m: Message) => m.role === 'tool', - ) as ToolMessage[] - - // Should have both valid and error tool results - const validResults = toolMessages.filter( - (m) => - m.toolName === 'read_files' && - !(m.content?.[0] as any)?.value?.errorMessage, - ) - - const errorResults = toolMessages.filter( - (m) => - m.content?.[0]?.type === 'json' && - (m.content[0] as any)?.value?.errorMessage, - ) - - expect(validResults.length).toBeGreaterThan(0) - expect(errorResults.length).toBeGreaterThan(0) - }) - - test('should handle stream with only unknown tool calls', async () => { - const chunks: StreamChunk[] = [ - createToolCallChunk('unknown_tool_1', { param: 'value1' }), - createToolCallChunk('unknown_tool_2', { param: 'value2' }), - ] - - const stream = createMockStream(chunks) - - await processStream({ - ...defaultParams, - stream, - }) - - const toolMessages = defaultParams.agentState.messageHistory.filter( - (m: Message) => m.role === 'tool', - ) as ToolMessage[] - - expect(toolMessages.length).toBe(2) - toolMessages.forEach((msg) => { - expect(msg.content?.[0]?.type).toBe('json') - expect((msg.content?.[0] as any)?.value?.errorMessage).toContain( - 'not found', - ) - }) - }) -}) diff --git a/packages/agent-runtime/src/__tests__/prompt-caching-subagents.test.ts b/packages/agent-runtime/src/__tests__/prompt-caching-subagents.test.ts index 90cf1e53b..48e10960f 100644 --- a/packages/agent-runtime/src/__tests__/prompt-caching-subagents.test.ts +++ b/packages/agent-runtime/src/__tests__/prompt-caching-subagents.test.ts @@ -136,7 +136,6 @@ describe('Prompt Caching for Subagents with inheritParentSystemPrompt', () => { ancestorRunIds: [], onResponseChunk: () => {}, signal: new AbortController().signal, - tools: {}, } }) diff --git a/packages/agent-runtime/src/__tests__/propose-tools.test.ts b/packages/agent-runtime/src/__tests__/propose-tools.test.ts new file mode 100644 index 000000000..d404b3acb --- /dev/null +++ b/packages/agent-runtime/src/__tests__/propose-tools.test.ts @@ -0,0 +1,716 @@ +import { TEST_USER_ID } from '@codebuff/common/old-constants' +import { TEST_AGENT_RUNTIME_IMPL } from '@codebuff/common/testing/impl/agent-runtime' +import { getInitialSessionState } from '@codebuff/common/types/session-state' +import { + assistantMessage, + userMessage, +} from '@codebuff/common/util/messages' +import { + afterEach, + beforeEach, + describe, + expect, + it, + mock, + spyOn, +} from 'bun:test' + +import { + clearAgentGeneratorCache, + runProgrammaticStep, +} from '../run-programmatic-step' +import { clearAllProposedContent } from '../tools/handlers/tool/proposed-content-store' +import { mockFileContext } from './test-utils' +import * as toolExecutor from '../tools/tool-executor' + +import type { AgentTemplate, StepGenerator } from '../templates/types' +import type { executeToolCall } from '../tools/tool-executor' +import type { + AgentRuntimeDeps, + AgentRuntimeScopedDeps, +} from '@codebuff/common/types/contracts/agent-runtime' +import type { Logger } from '@codebuff/common/types/contracts/logger' +import type { ParamsOf } from '@codebuff/common/types/function-params' +import type { ToolMessage } from '@codebuff/common/types/messages/codebuff-message' +import type { AgentState } from '@codebuff/common/types/session-state' + +const logger: Logger = { + debug: () => {}, + error: () => {}, + info: () => {}, + warn: () => {}, +} + +/** + * Tests for propose_str_replace and propose_write_file tools. + * These tools allow agents to propose file edits without applying them, + * returning unified diffs instead. This is useful for best-of-n editor patterns + * where multiple implementations are generated and one is selected. + */ +describe('propose_str_replace and propose_write_file tools', () => { + let mockTemplate: AgentTemplate + let mockAgentState: AgentState + let mockParams: ParamsOf + let executeToolCallSpy: ReturnType> + let agentRuntimeImpl: AgentRuntimeDeps & AgentRuntimeScopedDeps + + // Mock file system - maps file paths to their contents + const mockFiles: Record = {} + + beforeEach(() => { + // Reset mock file system + mockFiles['src/utils.ts'] = `export function add(a: number, b: number): number { + return a + b; +} + +export function subtract(a: number, b: number): number { + return a - b; +} +` + mockFiles['src/index.ts'] = `import { add } from './utils'; +console.log(add(1, 2)); +` + + agentRuntimeImpl = { + ...TEST_AGENT_RUNTIME_IMPL, + addAgentStep: async () => 'test-agent-step-id', + sendAction: () => {}, + } + + // Mock executeToolCall to handle propose_* tools + executeToolCallSpy = spyOn( + toolExecutor, + 'executeToolCall', + ).mockImplementation(async (options: ParamsOf) => { + const { toolName, input, toolResults, agentState } = options + + if (toolName === 'propose_str_replace') { + const { path, replacements } = input as { + path: string + replacements: Array<{ old: string; new: string; allowMultiple: boolean }> + } + + // Get current content (from proposed state or mock files) + let content = mockFiles[path] ?? null + + if (content === null) { + const errorResult: ToolMessage = { + role: 'tool', + toolName: 'propose_str_replace', + toolCallId: `${toolName}-call-id`, + content: [{ type: 'json', value: { file: path, errorMessage: `File not found: ${path}` } }], + } + toolResults.push(errorResult) + agentState.messageHistory.push(errorResult) + return + } + + // Apply replacements + const errors: string[] = [] + for (const replacement of replacements) { + if (!content.includes(replacement.old)) { + errors.push(`String not found: "${replacement.old.slice(0, 50)}..."`) + continue + } + if (replacement.allowMultiple) { + content = content.replaceAll(replacement.old, replacement.new) + } else { + content = content.replace(replacement.old, replacement.new) + } + } + + if (errors.length > 0) { + const errorResult: ToolMessage = { + role: 'tool', + toolName: 'propose_str_replace', + toolCallId: `${toolName}-call-id`, + content: [{ type: 'json', value: { file: path, errorMessage: errors.join('; ') } }], + } + toolResults.push(errorResult) + agentState.messageHistory.push(errorResult) + return + } + + // Generate unified diff + const originalContent = mockFiles[path]! + const diff = generateSimpleDiff(path, originalContent, content) + + // Store proposed content for future calls + mockFiles[path] = content + + const successResult: ToolMessage = { + role: 'tool', + toolName: 'propose_str_replace', + toolCallId: `${toolName}-call-id`, + content: [{ + type: 'json', + value: { + file: path, + message: 'Proposed string replacements', + unifiedDiff: diff, + }, + }], + } + toolResults.push(successResult) + agentState.messageHistory.push(successResult) + } else if (toolName === 'propose_write_file') { + const { path, content: newContent } = input as { + path: string + instructions: string + content: string + } + + const originalContent = mockFiles[path] ?? '' + const isNewFile = !(path in mockFiles) + + // Generate unified diff + const diff = generateSimpleDiff(path, originalContent, newContent) + + // Store proposed content + mockFiles[path] = newContent + + const successResult: ToolMessage = { + role: 'tool', + toolName: 'propose_write_file', + toolCallId: `${toolName}-call-id`, + content: [{ + type: 'json', + value: { + file: path, + message: isNewFile ? `Proposed new file ${path}` : `Proposed changes to ${path}`, + unifiedDiff: diff, + }, + }], + } + toolResults.push(successResult) + agentState.messageHistory.push(successResult) + } else if (toolName === 'set_output') { + agentState.output = input + const result: ToolMessage = { + role: 'tool', + toolName: 'set_output', + toolCallId: `${toolName}-call-id`, + content: [{ type: 'json', value: 'Output set successfully' }], + } + toolResults.push(result) + agentState.messageHistory.push(result) + } else if (toolName === 'end_turn') { + // No-op for end_turn + } + }) + + // Mock crypto.randomUUID + spyOn(crypto, 'randomUUID').mockImplementation( + () => 'mock-uuid-0000-0000-0000-000000000000' as `${string}-${string}-${string}-${string}-${string}`, + ) + + // Create mock template for implementor agent + mockTemplate = { + id: 'test-implementor', + displayName: 'Test Implementor', + spawnerPrompt: 'Testing propose tools', + model: 'claude-3-5-sonnet-20241022', + inputSchema: {}, + outputMode: 'structured_output', + includeMessageHistory: true, + inheritParentSystemPrompt: false, + mcpServers: {}, + toolNames: ['propose_str_replace', 'propose_write_file', 'set_output', 'end_turn'], + spawnableAgents: [], + systemPrompt: 'You are a code implementor that proposes changes.', + instructionsPrompt: 'Implement the requested changes using propose_str_replace or propose_write_file.', + stepPrompt: '', + handleSteps: undefined, + } as AgentTemplate + + // Create mock agent state + const sessionState = getInitialSessionState(mockFileContext) + mockAgentState = { + ...sessionState.mainAgentState, + agentId: 'test-implementor-id', + runId: 'test-run-id' as `${string}-${string}-${string}-${string}-${string}`, + messageHistory: [ + userMessage('Add a multiply function to src/utils.ts'), + assistantMessage('I will implement the changes.'), + ], + output: undefined, + directCreditsUsed: 0, + childRunIds: [], + } + + // Create mock params + mockParams = { + ...agentRuntimeImpl, + runId: 'test-run-id', + ancestorRunIds: [], + repoId: undefined, + repoUrl: undefined, + agentState: mockAgentState, + template: mockTemplate, + prompt: 'Add a multiply function to src/utils.ts', + toolCallParams: {}, + userId: TEST_USER_ID, + userInputId: 'test-user-input', + clientSessionId: 'test-session', + fingerprintId: 'test-fingerprint', + onResponseChunk: () => {}, + onCostCalculated: async () => {}, + fileContext: mockFileContext, + localAgentTemplates: {}, + system: 'Test system prompt', + stepsComplete: false, + stepNumber: 1, + tools: {}, + logger, + signal: new AbortController().signal, + } + }) + + afterEach(() => { + mock.restore() + clearAgentGeneratorCache({ logger }) + clearAllProposedContent() + }) + + describe('propose_str_replace', () => { + it('should propose string replacement and return unified diff', async () => { + const toolResultsCapture: any[] = [] + + const mockGenerator = (function* () { + const step = yield { + toolName: 'propose_str_replace', + input: { + path: 'src/utils.ts', + replacements: [{ + old: 'export function subtract(a: number, b: number): number {\n return a - b;\n}', + new: `export function subtract(a: number, b: number): number { + return a - b; +} + +export function multiply(a: number, b: number): number { + return a * b; +}`, + allowMultiple: false, + }], + }, + } + toolResultsCapture.push(step.toolResult) + + const firstResult = step.toolResult?.[0] + const unifiedDiff = firstResult?.type === 'json' ? (firstResult.value as { unifiedDiff?: string })?.unifiedDiff : undefined + yield { + toolName: 'set_output', + input: { + toolCalls: [{ toolName: 'propose_str_replace', input: step }], + toolResults: step.toolResult, + unifiedDiffs: unifiedDiff ?? '', + }, + } + yield { toolName: 'end_turn', input: {} } + })() as StepGenerator + + mockTemplate.handleSteps = () => mockGenerator + + const result = await runProgrammaticStep(mockParams) + + expect(result.endTurn).toBe(true) + expect(executeToolCallSpy).toHaveBeenCalledWith( + expect.objectContaining({ + toolName: 'propose_str_replace', + }), + ) + + // Verify tool result contains unified diff + expect(toolResultsCapture).toHaveLength(1) + const toolResult = toolResultsCapture[0] + expect(toolResult).toBeDefined() + expect(toolResult[0].type).toBe('json') + const jsonResult = toolResult[0] as { type: 'json'; value: { file: string; unifiedDiff: string } } + expect(jsonResult.value.file).toBe('src/utils.ts') + expect(jsonResult.value.unifiedDiff).toContain('+export function multiply') + expect(jsonResult.value.unifiedDiff).toContain('return a * b') + }) + + it('should return error when string not found', async () => { + const toolResultsCapture: any[] = [] + + const mockGenerator = (function* () { + const step = yield { + toolName: 'propose_str_replace', + input: { + path: 'src/utils.ts', + replacements: [{ + old: 'nonexistent string that does not exist in the file', + new: 'replacement', + allowMultiple: false, + }], + }, + } + toolResultsCapture.push(step.toolResult) + yield { toolName: 'end_turn', input: {} } + })() as StepGenerator + + mockTemplate.handleSteps = () => mockGenerator + + await runProgrammaticStep(mockParams) + + expect(toolResultsCapture).toHaveLength(1) + const toolResult = toolResultsCapture[0] + const jsonResult = toolResult[0] as { type: 'json'; value: { errorMessage: string } } + expect(jsonResult.value.errorMessage).toContain('String not found') + }) + + it('should stack multiple replacements on the same file', async () => { + const toolResultsCapture: any[] = [] + + const mockGenerator = (function* () { + // First replacement + const step1 = yield { + toolName: 'propose_str_replace', + input: { + path: 'src/utils.ts', + replacements: [{ + old: 'return a + b;', + new: 'return a + b; // addition', + allowMultiple: false, + }], + }, + } + toolResultsCapture.push({ step: 1, result: step1.toolResult }) + + // Second replacement should work on the already-modified content + const step2 = yield { + toolName: 'propose_str_replace', + input: { + path: 'src/utils.ts', + replacements: [{ + old: 'return a - b;', + new: 'return a - b; // subtraction', + allowMultiple: false, + }], + }, + } + toolResultsCapture.push({ step: 2, result: step2.toolResult }) + + yield { toolName: 'end_turn', input: {} } + })() as StepGenerator + + mockTemplate.handleSteps = () => mockGenerator + + await runProgrammaticStep(mockParams) + + expect(toolResultsCapture).toHaveLength(2) + + // Both replacements should succeed + const result0 = toolResultsCapture[0].result[0] as { type: 'json'; value: { unifiedDiff: string } } + const result1 = toolResultsCapture[1].result[0] as { type: 'json'; value: { unifiedDiff: string } } + expect(result0.value.unifiedDiff).toContain('// addition') + expect(result1.value.unifiedDiff).toContain('// subtraction') + + // Final file should have both changes + expect(mockFiles['src/utils.ts']).toContain('// addition') + expect(mockFiles['src/utils.ts']).toContain('// subtraction') + }) + }) + + describe('propose_write_file', () => { + it('should propose new file creation and return unified diff', async () => { + const toolResultsCapture: any[] = [] + + const mockGenerator = (function* () { + const step = yield { + toolName: 'propose_write_file', + input: { + path: 'src/multiply.ts', + instructions: 'Create multiply function', + content: `export function multiply(a: number, b: number): number { + return a * b; +} +`, + }, + } + toolResultsCapture.push(step.toolResult) + yield { toolName: 'end_turn', input: {} } + })() as StepGenerator + + mockTemplate.handleSteps = () => mockGenerator + + await runProgrammaticStep(mockParams) + + expect(toolResultsCapture).toHaveLength(1) + const toolResult = toolResultsCapture[0] + const jsonResult = toolResult[0] as { type: 'json'; value: { file: string; message: string; unifiedDiff: string } } + expect(jsonResult.value.file).toBe('src/multiply.ts') + expect(jsonResult.value.message).toContain('new file') + expect(jsonResult.value.unifiedDiff).toContain('+export function multiply') + }) + + it('should propose file edit and return unified diff', async () => { + const toolResultsCapture: any[] = [] + + const mockGenerator = (function* () { + const step = yield { + toolName: 'propose_write_file', + input: { + path: 'src/utils.ts', + instructions: 'Add multiply function', + content: `export function add(a: number, b: number): number { + return a + b; +} + +export function subtract(a: number, b: number): number { + return a - b; +} + +export function multiply(a: number, b: number): number { + return a * b; +} +`, + }, + } + toolResultsCapture.push(step.toolResult) + yield { toolName: 'end_turn', input: {} } + })() as StepGenerator + + mockTemplate.handleSteps = () => mockGenerator + + await runProgrammaticStep(mockParams) + + expect(toolResultsCapture).toHaveLength(1) + const toolResult = toolResultsCapture[0] + const jsonResult = toolResult[0] as { type: 'json'; value: { file: string; message: string; unifiedDiff: string } } + expect(jsonResult.value.file).toBe('src/utils.ts') + expect(jsonResult.value.message).toContain('changes') + expect(jsonResult.value.unifiedDiff).toContain('+export function multiply') + }) + }) + + describe('implementor agent workflow', () => { + it('should receive tool results from previous tool calls across multiple steps', async () => { + /** + * This test verifies that when an agent makes multiple tool calls, + * each subsequent yield receives the tool result from the previous call. + * This is critical for the implementor2 pattern where the agent needs to + * see the unified diff results to know what changes were proposed. + */ + const receivedToolResults: any[] = [] + + const mockGenerator = (function* () { + // First tool call - propose_str_replace + const step1 = yield { + toolName: 'propose_str_replace', + input: { + path: 'src/utils.ts', + replacements: [{ + old: 'return a + b;', + new: 'return a + b; // first change', + allowMultiple: false, + }], + }, + } + const step1First = step1.toolResult?.[0] + const step1HasDiff = step1First?.type === 'json' && !!(step1First.value as { unifiedDiff?: string })?.unifiedDiff + receivedToolResults.push({ + step: 1, + toolResult: step1.toolResult, + hasUnifiedDiff: step1HasDiff, + }) + + // Second tool call - another propose_str_replace + const step2 = yield { + toolName: 'propose_str_replace', + input: { + path: 'src/utils.ts', + replacements: [{ + old: 'return a - b;', + new: 'return a - b; // second change', + allowMultiple: false, + }], + }, + } + const step2First = step2.toolResult?.[0] + const step2HasDiff = step2First?.type === 'json' && !!(step2First.value as { unifiedDiff?: string })?.unifiedDiff + receivedToolResults.push({ + step: 2, + toolResult: step2.toolResult, + hasUnifiedDiff: step2HasDiff, + }) + + // Third tool call - propose_write_file + const step3 = yield { + toolName: 'propose_write_file', + input: { + path: 'src/new-file.ts', + instructions: 'Create new file', + content: 'export const newFile = true;', + }, + } + const step3First = step3.toolResult?.[0] + const step3HasDiff = step3First?.type === 'json' && !!(step3First.value as { unifiedDiff?: string })?.unifiedDiff + receivedToolResults.push({ + step: 3, + toolResult: step3.toolResult, + hasUnifiedDiff: step3HasDiff, + }) + + yield { toolName: 'end_turn', input: {} } + })() as StepGenerator + + mockTemplate.handleSteps = () => mockGenerator + + const result = await runProgrammaticStep(mockParams) + + expect(result.endTurn).toBe(true) + + // Verify we received tool results for all 3 steps + expect(receivedToolResults).toHaveLength(3) + + // Step 1: Should have received tool result with unified diff + expect(receivedToolResults[0].step).toBe(1) + expect(receivedToolResults[0].toolResult).toBeDefined() + expect(receivedToolResults[0].hasUnifiedDiff).toBe(true) + const step1Result = receivedToolResults[0].toolResult[0] as { type: 'json'; value: { file: string; unifiedDiff: string } } + expect(step1Result.value.file).toBe('src/utils.ts') + expect(step1Result.value.unifiedDiff).toContain('first change') + + // Step 2: Should have received tool result with unified diff + expect(receivedToolResults[1].step).toBe(2) + expect(receivedToolResults[1].toolResult).toBeDefined() + expect(receivedToolResults[1].hasUnifiedDiff).toBe(true) + const step2Result = receivedToolResults[1].toolResult[0] as { type: 'json'; value: { file: string; unifiedDiff: string } } + expect(step2Result.value.file).toBe('src/utils.ts') + expect(step2Result.value.unifiedDiff).toContain('second change') + + // Step 3: Should have received tool result with unified diff for new file + expect(receivedToolResults[2].step).toBe(3) + expect(receivedToolResults[2].toolResult).toBeDefined() + expect(receivedToolResults[2].hasUnifiedDiff).toBe(true) + const step3Result = receivedToolResults[2].toolResult[0] as { type: 'json'; value: { file: string; message: string } } + expect(step3Result.value.file).toBe('src/new-file.ts') + expect(step3Result.value.message).toContain('new file') + }) + + it('should collect tool calls and results for output', async () => { + /** + * This test simulates the editor-implementor2 workflow: + * 1. Agent makes propose_* tool calls + * 2. Tool results (with unified diffs) are captured + * 3. Agent extracts tool calls and diffs for set_output + */ + // Capture tool results as they come in + const capturedToolResults: any[] = [] + const capturedToolCalls: { toolName: string; input: any }[] = [] + + const mockGenerator = (function* () { + // Make a propose_str_replace call + const step1 = yield { + toolName: 'propose_str_replace', + input: { + path: 'src/utils.ts', + replacements: [{ + old: 'export function subtract(a: number, b: number): number {\n return a - b;\n}', + new: `export function subtract(a: number, b: number): number { + return a - b; +} + +export function multiply(a: number, b: number): number { + return a * b; +}`, + allowMultiple: false, + }], + }, + } + + // Capture the tool call and result + capturedToolCalls.push({ + toolName: 'propose_str_replace', + input: step1, + }) + const step1First = step1.toolResult?.[0] + if (step1First?.type === 'json' && step1First.value) { + capturedToolResults.push(step1First.value) + } + + // Generate unified diffs string from captured results + const unifiedDiffs = capturedToolResults + .filter((result: any) => result.unifiedDiff) + .map((result: any) => `--- ${result.file} ---\n${result.unifiedDiff}`) + .join('\n\n') + + yield { + toolName: 'set_output', + input: { + toolCalls: capturedToolCalls, + toolResults: capturedToolResults, + unifiedDiffs, + }, + } + yield { toolName: 'end_turn', input: {} } + })() as StepGenerator + + mockTemplate.handleSteps = () => mockGenerator + + const result = await runProgrammaticStep(mockParams) + + expect(result.endTurn).toBe(true) + expect(result.agentState.output).toBeDefined() + + const output = result.agentState.output as { + toolCalls: any[] + toolResults: any[] + unifiedDiffs: string + } + + // Verify tool calls were captured + expect(output.toolCalls).toHaveLength(1) + expect(output.toolCalls[0].toolName).toBe('propose_str_replace') + + // Verify tool results were captured + expect(output.toolResults).toHaveLength(1) + expect(output.toolResults[0].file).toBe('src/utils.ts') + expect(output.toolResults[0].unifiedDiff).toContain('+export function multiply') + + // Verify unified diffs string was generated + expect(output.unifiedDiffs).toContain('--- src/utils.ts ---') + expect(output.unifiedDiffs).toContain('+export function multiply') + }) + }) +}) + +/** + * Simple diff generator for testing purposes. + * In production, the actual handlers use the 'diff' library. + */ +function generateSimpleDiff(path: string, oldContent: string, newContent: string): string { + const oldLines = oldContent.split('\n') + const newLines = newContent.split('\n') + + const diffLines: string[] = [] + const maxLen = Math.max(oldLines.length, newLines.length) + + let inChange = false + let changeStart = 0 + + for (let i = 0; i < maxLen; i++) { + const oldLine = oldLines[i] + const newLine = newLines[i] + + if (oldLine !== newLine) { + if (!inChange) { + inChange = true + changeStart = i + diffLines.push(`@@ -${i + 1},${oldLines.length - i} +${i + 1},${newLines.length - i} @@`) + } + if (oldLine !== undefined) { + diffLines.push(`-${oldLine}`) + } + if (newLine !== undefined) { + diffLines.push(`+${newLine}`) + } + } else if (inChange && oldLine === newLine) { + diffLines.push(` ${oldLine}`) + } + } + + return diffLines.join('\n') +} diff --git a/packages/agent-runtime/src/__tests__/spawn-agents-message-history.test.ts b/packages/agent-runtime/src/__tests__/spawn-agents-message-history.test.ts index cfb92f380..41c98ea92 100644 --- a/packages/agent-runtime/src/__tests__/spawn-agents-message-history.test.ts +++ b/packages/agent-runtime/src/__tests__/spawn-agents-message-history.test.ts @@ -143,7 +143,8 @@ describe('Spawn Agents Message History', () => { // Verify that the subagent's message history contains the filtered messages // expireMessages filters based on timeToLive property, not role // Since the system message doesn't have timeToLive, it will be included - expect(capturedSubAgentState.messageHistory).toHaveLength(4) // System + user + assistant messages + // System + user + assistant messages + spawn message + expect(capturedSubAgentState.messageHistory).toHaveLength(5) // Verify system message is included (because it has no timeToLive property) const systemMessages = capturedSubAgentState.messageHistory.filter( @@ -173,6 +174,14 @@ describe('Spawn Agents Message History', () => { (msg: any) => msg.content[0]?.text === 'How are you?', ), ).toBeTruthy() + + // Verify the subagent spawn message is included with proper structure + const spawnMessage = capturedSubAgentState.messageHistory.find( + (msg: any) => msg.tags?.includes('SUBAGENT_SPAWN'), + ) + expect(spawnMessage).toBeTruthy() + expect(spawnMessage.role).toBe('user') + expect(spawnMessage.content[0]?.text).toContain('Subagent child-agent has been spawned') }) it('should not include conversation history when includeMessageHistory is false', async () => { @@ -215,8 +224,15 @@ describe('Spawn Agents Message History', () => { toolCall, }) - // Verify that the subagent's message history is empty when there are no messages to pass - expect(capturedSubAgentState.messageHistory).toHaveLength(0) + // Verify that the subagent's message history contains only the spawn message + // when includeMessageHistory is true (even with empty parent history) + expect(capturedSubAgentState.messageHistory).toHaveLength(1) + + // Verify the spawn message structure + const spawnMessage = capturedSubAgentState.messageHistory[0] + expect(spawnMessage.role).toBe('user') + expect(spawnMessage.tags).toContain('SUBAGENT_SPAWN') + expect(spawnMessage.content[0]?.text).toContain('Subagent child-agent has been spawned') }) it('should handle message history with only system messages', async () => { @@ -240,10 +256,17 @@ describe('Spawn Agents Message History', () => { // Verify that system messages without timeToLive are included // expireMessages only filters messages with timeToLive='userPrompt' - expect(capturedSubAgentState.messageHistory).toHaveLength(2) + // Plus 1 for the subagent spawn message + expect(capturedSubAgentState.messageHistory).toHaveLength(3) const systemMessages = capturedSubAgentState.messageHistory.filter( (msg: any) => msg.role === 'system', ) expect(systemMessages).toHaveLength(2) + + // Verify spawn message is present + const spawnMessage = capturedSubAgentState.messageHistory.find( + (msg: any) => msg.tags?.includes('SUBAGENT_SPAWN'), + ) + expect(spawnMessage).toBeTruthy() }) }) diff --git a/packages/agent-runtime/src/__tests__/spawn-agents-permissions.test.ts b/packages/agent-runtime/src/__tests__/spawn-agents-permissions.test.ts index ef3ed0e7b..3fe3107a8 100644 --- a/packages/agent-runtime/src/__tests__/spawn-agents-permissions.test.ts +++ b/packages/agent-runtime/src/__tests__/spawn-agents-permissions.test.ts @@ -29,6 +29,10 @@ describe('Spawn Agents Permissions', () => { typeof handleSpawnAgents, 'agentState' | 'agentTemplate' | 'localAgentTemplates' | 'toolCall' > + let handleSpawnAgentInlineBaseParams: ParamsExcluding< + typeof handleSpawnAgentInline, + 'agentState' | 'agentTemplate' | 'localAgentTemplates' | 'toolCall' + > const createMockAgent = ( id: string, @@ -67,12 +71,16 @@ describe('Spawn Agents Permissions', () => { sendSubagentChunk: mockSendSubagentChunk, signal: new AbortController().signal, system: 'Test system prompt', - tools: {}, userId: TEST_USER_ID, userInputId: 'test-input', writeToClient: () => {}, } + handleSpawnAgentInlineBaseParams = { + ...handleSpawnAgentsBaseParams, + tools: {}, + } + // Mock sendSubagentChunk mockSendSubagentChunk = mock(() => {}) @@ -426,7 +434,7 @@ describe('Spawn Agents Permissions', () => { // Should not throw await handleSpawnAgentInline({ - ...handleSpawnAgentsBaseParams, + ...handleSpawnAgentInlineBaseParams, agentState: sessionState.mainAgentState, agentTemplate: parentAgent, localAgentTemplates: { thinker: childAgent }, @@ -443,7 +451,7 @@ describe('Spawn Agents Permissions', () => { const toolCall = createInlineSpawnToolCall('reviewer') // Try to spawn reviewer const result = handleSpawnAgentInline({ - ...handleSpawnAgentsBaseParams, + ...handleSpawnAgentInlineBaseParams, agentState: sessionState.mainAgentState, agentTemplate: parentAgent, localAgentTemplates: { reviewer: childAgent }, @@ -462,7 +470,7 @@ describe('Spawn Agents Permissions', () => { const toolCall = createInlineSpawnToolCall('nonexistent') const result = handleSpawnAgentInline({ - ...handleSpawnAgentsBaseParams, + ...handleSpawnAgentInlineBaseParams, agentState: sessionState.mainAgentState, agentTemplate: parentAgent, localAgentTemplates: {}, // Empty - agent not found @@ -481,7 +489,7 @@ describe('Spawn Agents Permissions', () => { // Should not throw await handleSpawnAgentInline({ - ...handleSpawnAgentsBaseParams, + ...handleSpawnAgentInlineBaseParams, agentState: sessionState.mainAgentState, agentTemplate: parentAgent, localAgentTemplates: { 'codebuff/thinker@1.0.0': childAgent }, @@ -499,7 +507,7 @@ describe('Spawn Agents Permissions', () => { // Should not throw await handleSpawnAgentInline({ - ...handleSpawnAgentsBaseParams, + ...handleSpawnAgentInlineBaseParams, agentState: sessionState.mainAgentState, agentTemplate: parentAgent, localAgentTemplates: { @@ -519,7 +527,7 @@ describe('Spawn Agents Permissions', () => { const toolCall = createInlineSpawnToolCall('codebuff/thinker@2.0.0') const result = handleSpawnAgentInline({ - ...handleSpawnAgentsBaseParams, + ...handleSpawnAgentInlineBaseParams, agentState: sessionState.mainAgentState, agentTemplate: parentAgent, localAgentTemplates: { 'codebuff/thinker@2.0.0': childAgent }, diff --git a/packages/agent-runtime/src/run-agent-step.ts b/packages/agent-runtime/src/run-agent-step.ts index 99a6fae0a..323d93241 100644 --- a/packages/agent-runtime/src/run-agent-step.ts +++ b/packages/agent-runtime/src/run-agent-step.ts @@ -506,6 +506,7 @@ export async function loopAgentSteps( | 'system' | 'template' | 'toolCallParams' + | 'tools' > & ParamsExcluding & ParamsExcluding< diff --git a/packages/agent-runtime/src/run-programmatic-step.ts b/packages/agent-runtime/src/run-programmatic-step.ts index 1aee16582..dc2c2c771 100644 --- a/packages/agent-runtime/src/run-programmatic-step.ts +++ b/packages/agent-runtime/src/run-programmatic-step.ts @@ -2,6 +2,7 @@ import { getErrorObject } from '@codebuff/common/util/error' import { assistantMessage } from '@codebuff/common/util/messages' import { cloneDeep } from 'lodash' +import { clearProposedContentForRun } from './tools/handlers/tool/proposed-content-store' import { executeToolCall } from './tools/tool-executor' import { parseTextWithToolCalls } from './util/parse-tool-calls-from-text' @@ -38,6 +39,7 @@ export const runIdToStepAll: Set = new Set() // Function to clear the generator cache for testing purposes export function clearAgentGeneratorCache(params: { logger: Logger }) { for (const key in runIdToGenerator) { + clearProposedContentForRun(key) delete runIdToGenerator[key] } runIdToStepAll.clear() @@ -377,6 +379,7 @@ export async function runProgrammaticStep( if (endTurn) { delete runIdToGenerator[agentState.runId] runIdToStepAll.delete(agentState.runId) + clearProposedContentForRun(agentState.runId) } } } diff --git a/packages/agent-runtime/src/tools/handlers/list.ts b/packages/agent-runtime/src/tools/handlers/list.ts index 4c5fb752c..d75eb829a 100644 --- a/packages/agent-runtime/src/tools/handlers/list.ts +++ b/packages/agent-runtime/src/tools/handlers/list.ts @@ -9,6 +9,8 @@ import { handleFindFiles } from './tool/find-files' import { handleGlob } from './tool/glob' import { handleListDirectory } from './tool/list-directory' import { handleLookupAgentInfo } from './tool/lookup-agent-info' +import { handleProposeStrReplace } from './tool/propose-str-replace' +import { handleProposeWriteFile } from './tool/propose-write-file' import { handleReadDocs } from './tool/read-docs' import { handleReadFiles } from './tool/read-files' import { handleReadSubtree } from './tool/read-subtree' @@ -51,6 +53,8 @@ export const codebuffToolHandlers = { glob: handleGlob, list_directory: handleListDirectory, lookup_agent_info: handleLookupAgentInfo, + propose_str_replace: handleProposeStrReplace, + propose_write_file: handleProposeWriteFile, read_docs: handleReadDocs, read_files: handleReadFiles, read_subtree: handleReadSubtree, diff --git a/packages/agent-runtime/src/tools/handlers/tool/propose-str-replace.ts b/packages/agent-runtime/src/tools/handlers/tool/propose-str-replace.ts new file mode 100644 index 000000000..6c1bd2248 --- /dev/null +++ b/packages/agent-runtime/src/tools/handlers/tool/propose-str-replace.ts @@ -0,0 +1,108 @@ +import { processStrReplace } from '../../../process-str-replace' +import { + getProposedContent, + setProposedContent, +} from './proposed-content-store' + +import type { CodebuffToolHandlerFunction } from '../handler-function-type' +import type { + CodebuffToolCall, + CodebuffToolOutput, +} from '@codebuff/common/tools/list' +import type { RequestOptionalFileFn } from '@codebuff/common/types/contracts/client' +import type { Logger } from '@codebuff/common/types/contracts/logger' +import type { ParamsExcluding } from '@codebuff/common/types/function-params' +import type { AgentState } from '@codebuff/common/types/session-state' + +export const handleProposeStrReplace = (async ( + params: { + previousToolCallFinished: Promise + toolCall: CodebuffToolCall<'propose_str_replace'> + + logger: Logger + agentState: AgentState + runId: string + + requestOptionalFile: RequestOptionalFileFn + } & ParamsExcluding, +): Promise<{ output: CodebuffToolOutput<'propose_str_replace'> }> => { + const { + previousToolCallFinished, + toolCall, + + logger, + runId, + + requestOptionalFile, + } = params + const { path, replacements } = toolCall.input + + // Get content from proposed state first (by runId), then fall back to disk + const getProposedOrDiskContent = async (): Promise => { + const proposedContent = getProposedContent(runId, path) + if (proposedContent !== undefined) { + return proposedContent + } + return requestOptionalFile({ ...params, filePath: path }) + } + + const latestContentPromise = getProposedOrDiskContent() + + const strReplaceResultPromise = processStrReplace({ + path, + replacements, + initialContentPromise: latestContentPromise, + logger, + }).catch((error: any) => { + logger.error(error, 'Error processing propose_str_replace') + return { + tool: 'str_replace' as const, + path, + error: 'Unknown error: Failed to process the propose_str_replace.', + } + }) + + // Store the proposed content for future propose calls on the same file (by runId) + setProposedContent( + runId, + path, + strReplaceResultPromise.then((result) => + 'content' in result ? result.content : null, + ), + ) + + await previousToolCallFinished + + const strReplaceResult = await strReplaceResultPromise + + if ('error' in strReplaceResult) { + return { + output: [ + { + type: 'json', + value: { + file: path, + errorMessage: strReplaceResult.error, + }, + }, + ], + } + } + + const message = strReplaceResult.messages.length > 0 + ? strReplaceResult.messages.join('\n\n') + : 'Proposed string replacement' + + return { + output: [ + { + type: 'json', + value: { + file: path, + message, + unifiedDiff: strReplaceResult.patch, + }, + }, + ], + } +}) satisfies CodebuffToolHandlerFunction<'propose_str_replace'> diff --git a/packages/agent-runtime/src/tools/handlers/tool/propose-write-file.ts b/packages/agent-runtime/src/tools/handlers/tool/propose-write-file.ts new file mode 100644 index 000000000..b20d19ee1 --- /dev/null +++ b/packages/agent-runtime/src/tools/handlers/tool/propose-write-file.ts @@ -0,0 +1,87 @@ +import { createPatch } from 'diff' + +import { + getProposedContent, + setProposedContent, +} from './proposed-content-store' + +import type { CodebuffToolHandlerFunction } from '../handler-function-type' +import type { + CodebuffToolCall, + CodebuffToolOutput, +} from '@codebuff/common/tools/list' +import type { RequestOptionalFileFn } from '@codebuff/common/types/contracts/client' +import type { Logger } from '@codebuff/common/types/contracts/logger' +import type { ParamsExcluding } from '@codebuff/common/types/function-params' + +/** + * Proposes writing a file without actually applying the changes. + * Simply overwrites the file exactly with the given content (creating if it doesn't exist). + * Returns a unified diff of the changes for review. + */ +export const handleProposeWriteFile = (async ( + params: { + previousToolCallFinished: Promise + toolCall: CodebuffToolCall<'propose_write_file'> + + logger: Logger + runId: string + + requestOptionalFile: RequestOptionalFileFn + } & ParamsExcluding, +): Promise<{ output: CodebuffToolOutput<'propose_write_file'> }> => { + const { + previousToolCallFinished, + toolCall, + logger, + runId, + requestOptionalFile, + } = params + const { path, content } = toolCall.input + + // Get content from proposed state first (by runId), then fall back to disk + const getProposedOrDiskContent = async (): Promise => { + const proposedContent = getProposedContent(runId, path) + if (proposedContent !== undefined) { + return proposedContent + } + return requestOptionalFile({ ...params, filePath: path }) + } + + const initialContent = await getProposedOrDiskContent() + + // Normalize content (remove leading newline if present) + const newContent = content.startsWith('\n') ? content.slice(1) : content + + // Store the proposed content for future propose calls on the same file (by runId) + setProposedContent(runId, path, Promise.resolve(newContent)) + + await previousToolCallFinished + + // Generate unified diff + const oldContent = initialContent ?? '' + let patch = createPatch(path, oldContent, newContent) + + // Strip the header lines, keep only from @@ onwards + const lines = patch.split('\n') + const hunkStartIndex = lines.findIndex((line) => line.startsWith('@@')) + if (hunkStartIndex !== -1) { + patch = lines.slice(hunkStartIndex).join('\n') + } + + const isNewFile = initialContent === null + const message = isNewFile ? `Proposed new file ${path}` : `Proposed changes to ${path}` + + return { + output: [ + { + type: 'json', + value: { + file: path, + message, + unifiedDiff: patch, + }, + }, + ], + } +}) as CodebuffToolHandlerFunction<'propose_write_file'> diff --git a/packages/agent-runtime/src/tools/handlers/tool/proposed-content-store.ts b/packages/agent-runtime/src/tools/handlers/tool/proposed-content-store.ts new file mode 100644 index 000000000..77310c83e --- /dev/null +++ b/packages/agent-runtime/src/tools/handlers/tool/proposed-content-store.ts @@ -0,0 +1,64 @@ +/** + * Store for proposed file content by runId. + * This allows propose_str_replace and propose_write_file tools to + * track proposed changes within an agent run, isolated by runId. + */ + +/** Map of runId -> path -> Promise */ +const proposedContentByRunId = new Map< + string, + Record> +>() + +/** + * Get the proposed content map for a specific runId. + * Creates an empty record if none exists. + */ +export function getProposedContentForRun( + runId: string, +): Record> { + let contentByPath = proposedContentByRunId.get(runId) + if (!contentByPath) { + contentByPath = {} + proposedContentByRunId.set(runId, contentByPath) + } + return contentByPath +} + +/** + * Get proposed content for a specific file in a run. + */ +export function getProposedContent( + runId: string, + path: string, +): Promise | undefined { + const contentByPath = proposedContentByRunId.get(runId) + return contentByPath?.[path] +} + +/** + * Set proposed content for a specific file in a run. + */ +export function setProposedContent( + runId: string, + path: string, + content: Promise, +): void { + const contentByPath = getProposedContentForRun(runId) + contentByPath[path] = content +} + +/** + * Clear all proposed content for a specific runId. + * Should be called when an agent run completes. + */ +export function clearProposedContentForRun(runId: string): void { + proposedContentByRunId.delete(runId) +} + +/** + * Clear all proposed content (for testing purposes). + */ +export function clearAllProposedContent(): void { + proposedContentByRunId.clear() +} diff --git a/packages/agent-runtime/src/tools/handlers/tool/spawn-agent-inline.ts b/packages/agent-runtime/src/tools/handlers/tool/spawn-agent-inline.ts index 5c7070b9b..7c4f9ce4c 100644 --- a/packages/agent-runtime/src/tools/handlers/tool/spawn-agent-inline.ts +++ b/packages/agent-runtime/src/tools/handlers/tool/spawn-agent-inline.ts @@ -4,6 +4,7 @@ import { logAgentSpawn, executeSubagent, createAgentState, + extractSubagentContextParams, } from './spawn-agent-utils' import type { CodebuffToolHandlerFunction } from '../handler-function-type' @@ -75,9 +76,13 @@ export const handleSpawnAgentInline = (async ( await previousToolCallFinished const { agentTemplate, agentType } = await validateAndGetAgentTemplate({ - ...params, agentTypeStr, parentAgentTemplate, + localAgentTemplates: params.localAgentTemplates, + logger, + fetchAgentFromDatabase: params.fetchAgentFromDatabase, + databaseAgentCache: params.databaseAgentCache, + apiKey: params.apiKey, }) validateAgentInput(agentTemplate, agentType, prompt, spawnParams) @@ -105,7 +110,6 @@ export const handleSpawnAgentInline = (async ( } logAgentSpawn({ - ...params, agentTemplate: inlineTemplate, agentType, agentId: childAgentState.agentId, @@ -113,10 +117,17 @@ export const handleSpawnAgentInline = (async ( prompt, spawnParams, inline: true, + logger, }) + // Extract common context params to avoid bugs from spreading all params + const contextParams = extractSubagentContextParams(params) + const result = await executeSubagent({ - ...params, + ...contextParams, + + // Spawn-specific params + ancestorRunIds: parentAgentState.ancestorRunIds, userInputId: `${userInputId}-inline-${agentType}${childAgentState.agentId}`, prompt: prompt || '', spawnParams, diff --git a/packages/agent-runtime/src/tools/handlers/tool/spawn-agent-utils.ts b/packages/agent-runtime/src/tools/handlers/tool/spawn-agent-utils.ts index 793419521..7f4a43110 100644 --- a/packages/agent-runtime/src/tools/handlers/tool/spawn-agent-utils.ts +++ b/packages/agent-runtime/src/tools/handlers/tool/spawn-agent-utils.ts @@ -4,9 +4,13 @@ import { generateCompactId } from '@codebuff/common/util/string' import { loopAgentSteps } from '../../../run-agent-step' import { getAgentTemplate } from '../../../templates/agent-registry' -import { filterUnfinishedToolCalls } from '../../../util/messages' +import { filterUnfinishedToolCalls, withSystemTags } from '../../../util/messages' import type { AgentTemplate } from '@codebuff/common/types/agent-template' +import type { + AgentRuntimeDeps, + AgentRuntimeScopedDeps, +} from '@codebuff/common/types/contracts/agent-runtime' import type { Logger } from '@codebuff/common/types/contracts/logger' import type { ParamsExcluding, @@ -19,6 +23,79 @@ import type { AgentTemplateType, Subgoal, } from '@codebuff/common/types/session-state' +import type { ProjectFileContext } from '@codebuff/common/util/file' +import { Message } from '@codebuff/common/types/messages/codebuff-message' + +/** + * Common context params needed for spawning subagents. + * These are the params that don't change between different spawn calls + * and are passed through from the parent agent runtime. + */ +export type SubagentContextParams = AgentRuntimeDeps & + AgentRuntimeScopedDeps & { + clientSessionId: string + fileContext: ProjectFileContext + localAgentTemplates: Record + repoId: string | undefined + repoUrl: string | undefined + signal: AbortSignal + userId: string | undefined + } + +/** + * Extracts the common context params needed for spawning subagents. + * This avoids bugs from spreading all params with `...params` which can + * accidentally pass through params that should be overridden. + */ +export function extractSubagentContextParams( + params: SubagentContextParams, +): SubagentContextParams { + return { + // AgentRuntimeDeps - Environment + clientEnv: params.clientEnv, + ciEnv: params.ciEnv, + // AgentRuntimeDeps - Database + getUserInfoFromApiKey: params.getUserInfoFromApiKey, + fetchAgentFromDatabase: params.fetchAgentFromDatabase, + startAgentRun: params.startAgentRun, + finishAgentRun: params.finishAgentRun, + addAgentStep: params.addAgentStep, + // AgentRuntimeDeps - Billing + consumeCreditsWithFallback: params.consumeCreditsWithFallback, + // AgentRuntimeDeps - LLM + promptAiSdkStream: params.promptAiSdkStream, + promptAiSdk: params.promptAiSdk, + promptAiSdkStructured: params.promptAiSdkStructured, + // AgentRuntimeDeps - Mutable State + databaseAgentCache: params.databaseAgentCache, + liveUserInputRecord: params.liveUserInputRecord, + sessionConnections: params.sessionConnections, + // AgentRuntimeDeps - Analytics + trackEvent: params.trackEvent, + // AgentRuntimeDeps - Other + logger: params.logger, + fetch: params.fetch, + + // AgentRuntimeScopedDeps - Client (WebSocket) + handleStepsLogChunk: params.handleStepsLogChunk, + requestToolCall: params.requestToolCall, + requestMcpToolData: params.requestMcpToolData, + requestFiles: params.requestFiles, + requestOptionalFile: params.requestOptionalFile, + sendAction: params.sendAction, + sendSubagentChunk: params.sendSubagentChunk, + apiKey: params.apiKey, + + // Core context params + clientSessionId: params.clientSessionId, + fileContext: params.fileContext, + localAgentTemplates: params.localAgentTemplates, + repoId: params.repoId, + repoUrl: params.repoUrl, + signal: params.signal, + userId: params.userId, + } +} /** * Checks if a parent agent is allowed to spawn a child agent @@ -166,9 +243,21 @@ export function createAgentState( // When including message history, filter out any tool calls that don't have // corresponding tool responses. This prevents the spawned agent from seeing // unfinished tool calls which throw errors in the Anthropic API. - const messageHistory = agentTemplate.includeMessageHistory - ? filterUnfinishedToolCalls(parentAgentState.messageHistory) - : [] + let messageHistory: Message[] = [] + + if (agentTemplate.includeMessageHistory) { + messageHistory = filterUnfinishedToolCalls(parentAgentState.messageHistory) + messageHistory.push({ + role: 'user', + content: [ + { + type: 'text', + text: withSystemTags(`Subagent ${agentType} has been spawned.`), + }, + ], + tags: ['SUBAGENT_SPAWN'], + }) + } return { agentId, diff --git a/packages/agent-runtime/src/tools/handlers/tool/spawn-agents.ts b/packages/agent-runtime/src/tools/handlers/tool/spawn-agents.ts index 596a3c064..c80483a07 100644 --- a/packages/agent-runtime/src/tools/handlers/tool/spawn-agents.ts +++ b/packages/agent-runtime/src/tools/handlers/tool/spawn-agents.ts @@ -6,6 +6,7 @@ import { createAgentState, logAgentSpawn, executeSubagent, + extractSubagentContextParams, } from './spawn-agent-utils' import type { CodebuffToolHandlerFunction } from '../handler-function-type' @@ -111,8 +112,14 @@ export const handleSpawnAgents = (async ( logger, }) + // Extract common context params to avoid bugs from spreading all params + const contextParams = extractSubagentContextParams(params) + const result = await executeSubagent({ - ...params, + ...contextParams, + + // Spawn-specific params + ancestorRunIds: parentAgentState.ancestorRunIds, userInputId: `${userInputId}-${agentType}${subAgentState.agentId}`, prompt: prompt || '', spawnParams, @@ -121,6 +128,8 @@ export const handleSpawnAgents = (async ( agentState: subAgentState, fingerprintId, isOnlyChild: agents.length === 1, + excludeToolFromMessageHistory: false, + fromHandleSteps: false, parentSystemPrompt, parentTools: agentTemplate.inheritParentSystemPrompt ? parentTools diff --git a/packages/agent-runtime/src/tools/handlers/tool/write-file.ts b/packages/agent-runtime/src/tools/handlers/tool/write-file.ts index f80382270..dfd6247a8 100644 --- a/packages/agent-runtime/src/tools/handlers/tool/write-file.ts +++ b/packages/agent-runtime/src/tools/handlers/tool/write-file.ts @@ -51,7 +51,7 @@ export function getFileProcessingValues( } for (const [key, value] of Object.entries(state)) { const typedKey = key as keyof typeof fileProcessingValues - if (fileProcessingValues[typedKey] !== undefined) { + if (typedKey in fileProcessingValues) { fileProcessingValues[typedKey] = value as any } } diff --git a/packages/agent-runtime/src/tools/stream-parser.ts b/packages/agent-runtime/src/tools/stream-parser.ts index 4c0f03f97..00cb52de9 100644 --- a/packages/agent-runtime/src/tools/stream-parser.ts +++ b/packages/agent-runtime/src/tools/stream-parser.ts @@ -30,13 +30,6 @@ import type { import type { PrintModeEvent } from '@codebuff/common/types/print-mode' import type { Subgoal } from '@codebuff/common/types/session-state' import type { ProjectFileContext } from '@codebuff/common/util/file' -import type { ToolCallPart } from 'ai' - -export type ToolCallError = { - toolName?: string - args: Record - error: string -} & Omit export async function processStream( params: { diff --git a/packages/agent-runtime/src/tools/tool-executor.ts b/packages/agent-runtime/src/tools/tool-executor.ts index 30106cb99..05757d2c1 100644 --- a/packages/agent-runtime/src/tools/tool-executor.ts +++ b/packages/agent-runtime/src/tools/tool-executor.ts @@ -54,51 +54,21 @@ export function parseRawToolCall(params: { toolCallId: string input: Record } - autoInsertEndStepParam?: boolean }): CodebuffToolCall | ToolCallError { - const { rawToolCall, autoInsertEndStepParam = false } = params + const { rawToolCall } = params const toolName = rawToolCall.toolName - if (!(toolName in toolParams)) { - return { - toolName, - toolCallId: rawToolCall.toolCallId, - input: rawToolCall.input, - error: `Tool ${toolName} not found`, - } - } - const validName = toolName as T - - // const processedParameters: Record = {} - // for (const [param, val] of Object.entries(rawToolCall.input ?? {})) { - // processedParameters[param] = val - // } - - // Add the required codebuff_end_step parameter with the correct value for this tool if requested - // if (autoInsertEndStepParam) { - // processedParameters[endsAgentStepParam] = - // toolParams[validName].endsAgentStep - // } - - // const paramsSchema = toolParams[validName].endsAgentStep - // ? ( - // toolParams[validName].inputSchema satisfies z.ZodObject as z.ZodObject - // ).extend({ - // [endsAgentStepParam]: z.literal(toolParams[validName].endsAgentStep), - // }) - // : toolParams[validName].inputSchema - const processedParameters = rawToolCall.input - const paramsSchema = toolParams[validName].inputSchema + const paramsSchema = toolParams[toolName].inputSchema const result = paramsSchema.safeParse(processedParameters) if (!result.success) { return { - toolName: validName, + toolName, toolCallId: rawToolCall.toolCallId, input: rawToolCall.input, - error: `Invalid parameters for ${validName}: ${JSON.stringify( + error: `Invalid parameters for ${toolName}: ${JSON.stringify( result.error.issues, null, 2, @@ -111,7 +81,7 @@ export function parseRawToolCall(params: { } return { - toolName: validName, + toolName, input: result.data, toolCallId: rawToolCall.toolCallId, } as CodebuffToolCall @@ -163,7 +133,6 @@ export function executeToolCall( const { toolName, input, - autoInsertEndStepParam = false, excludeToolFromMessageHistory = false, fromHandleSteps = false, @@ -188,8 +157,24 @@ export function executeToolCall( toolCallId, input, }, - autoInsertEndStepParam, }) + + // Filter out restricted tools - emit error instead of tool call/result + // This prevents the CLI from showing tool calls that the agent doesn't have permission to use + if ( + toolCall.toolName && + !agentTemplate.toolNames.includes(toolCall.toolName) && + !fromHandleSteps + ) { + // Emit an error event instead of tool call/result pair + // The stream parser will convert this to a user message for proper API compliance + onResponseChunk({ + type: 'error', + message: `Tool \`${toolName}\` is not currently available. Make sure to only use tools provided at the start of the conversation AND that you most recently have permission to use.`, + }) + return previousToolCallFinished + } + if ('error' in toolCall) { const toolResult: ToolMessage = { role: 'tool', @@ -208,21 +193,6 @@ export function executeToolCall( return previousToolCallFinished } - // Filter out restricted tools - emit error instead of tool call/result - // This prevents the CLI from showing tool calls that the agent doesn't have permission to use - if ( - !agentTemplate.toolNames.includes(toolCall.toolName) && - !fromHandleSteps - ) { - // Emit an error event instead of tool call/result pair - // The stream parser will convert this to a user message for proper API compliance - onResponseChunk({ - type: 'error', - message: `Tool \`${toolName}\` is not currently available. Make sure to only use tools listed in the system instructions.`, - }) - return previousToolCallFinished - } - // Only emit tool_call event after permission check passes onResponseChunk({ type: 'tool_call', @@ -399,27 +369,11 @@ export async function executeCustomToolCall( }, autoInsertEndStepParam, }) - if ('error' in toolCall) { - const toolResult: ToolMessage = { - role: 'tool', - toolName, - toolCallId: toolCall.toolCallId, - content: jsonToolResult({ - errorMessage: toolCall.error, - }), - } - toolResults.push(cloneDeep(toolResult)) - toolResultsToAddAfterStream.push(cloneDeep(toolResult)) - logger.debug( - { toolCall, error: toolCall.error }, - `${toolName} error: ${toolCall.error}`, - ) - return previousToolCallFinished - } // Filter out restricted tools - emit error instead of tool call/result // This prevents the CLI from showing tool calls that the agent doesn't have permission to use if ( + toolCall.toolName && !(agentTemplate.toolNames as string[]).includes(toolCall.toolName) && !fromHandleSteps && !( @@ -436,6 +390,24 @@ export async function executeCustomToolCall( return previousToolCallFinished } + if ('error' in toolCall) { + const toolResult: ToolMessage = { + role: 'tool', + toolName, + toolCallId: toolCall.toolCallId, + content: jsonToolResult({ + errorMessage: toolCall.error, + }), + } + toolResults.push(cloneDeep(toolResult)) + toolResultsToAddAfterStream.push(cloneDeep(toolResult)) + logger.debug( + { toolCall, error: toolCall.error }, + `${toolName} error: ${toolCall.error}`, + ) + return previousToolCallFinished + } + // Only emit tool_call event after permission check passes onResponseChunk({ type: 'tool_call', diff --git a/sdk/src/run.ts b/sdk/src/run.ts index 0d3d3039a..7e0b166cf 100644 --- a/sdk/src/run.ts +++ b/sdk/src/run.ts @@ -833,7 +833,6 @@ export async function runOnce({ clientSessionId: promptId, userId, signal: signal ?? new AbortController().signal, - tools: {}, }).catch((error) => { // Let retryable errors and PaymentRequiredError propagate so the retry wrapper can handle them const isRetryable = isRetryableError(error)