Skip to content

Commit 8a1ba97

Browse files
committed
update editor best of n max + add unit tests (not fully working yet tho)
1 parent a23a698 commit 8a1ba97

File tree

3 files changed

+157
-42
lines changed

3 files changed

+157
-42
lines changed
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
import { API_KEY_ENV_VAR } from '@codebuff/common/old-constants'
2+
import { describe, expect, it } from 'bun:test'
3+
4+
import { CodebuffClient } from '@codebuff/sdk'
5+
6+
import type { PrintModeEvent } from '@codebuff/common/types/print-mode'
7+
8+
/**
9+
* Integration tests for the editor-best-of-n-max agent.
10+
* These tests verify that the best-of-n editor workflow works correctly:
11+
* 1. Spawns multiple implementor agents in parallel
12+
* 2. Collects their implementation proposals
13+
* 3. Uses a selector agent to choose the best implementation
14+
* 4. Applies the chosen implementation
15+
*/
16+
describe('Editor Best-of-N Max Agent Integration', () => {
17+
it(
18+
'should generate and select the best implementation for a simple edit',
19+
async () => {
20+
const apiKey = process.env[API_KEY_ENV_VAR]
21+
if (!apiKey) {
22+
throw new Error('API key not found')
23+
}
24+
25+
// Create mock project files with a simple TypeScript file to edit
26+
const projectFiles: Record<string, string> = {
27+
'src/utils/math.ts': `
28+
export function add(a: number, b: number): number {
29+
return a + b
30+
}
31+
32+
export function subtract(a: number, b: number): number {
33+
return a - b
34+
}
35+
`,
36+
'src/index.ts': `
37+
import { add, subtract } from './utils/math'
38+
39+
console.log(add(1, 2))
40+
console.log(subtract(5, 3))
41+
`,
42+
'package.json': JSON.stringify({
43+
name: 'test-project',
44+
version: '1.0.0',
45+
dependencies: {},
46+
}),
47+
}
48+
49+
const client = new CodebuffClient({
50+
apiKey,
51+
cwd: '/tmp/test-best-of-n-project',
52+
projectFiles,
53+
})
54+
55+
const events: PrintModeEvent[] = []
56+
57+
// Run the editor-best-of-n-max agent with a simple task
58+
// Using n=2 to keep the test fast while still testing the best-of-n workflow
59+
const run = await client.run({
60+
agent: 'editor-best-of-n-max',
61+
prompt:
62+
'Add a multiply function to src/utils/math.ts that takes two numbers and returns their product',
63+
params: { n: 2 },
64+
handleEvent: (event) => {
65+
console.log(event)
66+
events.push(event)
67+
},
68+
})
69+
70+
// The output should not be an error
71+
expect(run.output.type).not.toEqual('error')
72+
73+
// Verify we got some output
74+
expect(run.output).toBeDefined()
75+
76+
// The output should contain the implementation response
77+
const outputStr =
78+
typeof run.output === 'string' ? run.output : JSON.stringify(run.output)
79+
console.log('Output:', outputStr)
80+
81+
// Should contain evidence of the multiply function being added
82+
const relevantTerms = ['multiply', 'product', 'str_replace', 'write_file']
83+
const foundRelevantTerm = relevantTerms.some((term) =>
84+
outputStr.toLowerCase().includes(term.toLowerCase()),
85+
)
86+
87+
expect(foundRelevantTerm).toBe(true)
88+
},
89+
{ timeout: 120_000 }, // 2 minute timeout for best-of-n workflow
90+
)
91+
})

.agents/editor/best-of-n/editor-best-of-n.ts

Lines changed: 65 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -39,11 +39,11 @@ export function createBestOfNEditor(
3939
spawnableAgents: buildArray(
4040
'best-of-n-selector',
4141
'best-of-n-selector-opus',
42-
isDefault && 'best-of-n-selector-gemini',
42+
'best-of-n-selector-gemini',
4343
'editor-implementor',
4444
'editor-implementor-opus',
45-
isDefault && 'editor-implementor-gemini',
46-
isMax && 'editor-implementor-gpt-5',
45+
'editor-implementor-gemini',
46+
'editor-implementor-gpt-5',
4747
),
4848

4949
inputSchema: {
@@ -230,6 +230,7 @@ function* handleStepsDefault({
230230
}
231231
function* handleStepsMax({
232232
params,
233+
logger,
233234
}: AgentStepContext): ReturnType<
234235
NonNullable<SecretAgentDefinition['handleSteps']>
235236
> {
@@ -269,8 +270,9 @@ function* handleStepsMax({
269270
} satisfies ToolCall<'spawn_agents'>
270271

271272
// Extract spawn results
272-
const spawnedImplementations =
273-
extractSpawnResults<{ text: string }[]>(implementorResults)
273+
const spawnedImplementations = extractSpawnResults(
274+
implementorResults,
275+
) as any[]
274276

275277
// Extract all the plans from the structured outputs
276278
const letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
@@ -280,9 +282,14 @@ function* handleStepsMax({
280282
content:
281283
'errorMessage' in result
282284
? `Error: ${result.errorMessage}`
283-
: result[0].text,
285+
: extractLastMessageText(result),
284286
}))
285287

288+
logger.info(
289+
{ spawnedImplementations, implementations },
290+
'spawnedImplementations',
291+
)
292+
286293
// Spawn selector with implementations as params
287294
const { toolResult: selectorResult } = yield {
288295
toolName: 'spawn_agents',
@@ -321,15 +328,9 @@ function* handleStepsMax({
321328
return
322329
}
323330

324-
// Apply the chosen implementation using STEP_TEXT (only tool calls, no commentary)
325-
const toolCallsOnly = extractToolCallsOnly(
326-
typeof chosenImplementation.content === 'string'
327-
? chosenImplementation.content
328-
: '',
329-
)
330331
const { agentState: postEditsAgentState } = yield {
331332
type: 'STEP_TEXT',
332-
text: toolCallsOnly,
333+
text: chosenImplementation.content,
333334
} as StepText
334335
const { messageHistory } = postEditsAgentState
335336
const lastAssistantMessageIndex = messageHistory.findLastIndex(
@@ -352,37 +353,60 @@ function* handleStepsMax({
352353
includeToolCall: false,
353354
} satisfies ToolCall<'set_output'>
354355

355-
function extractSpawnResults<T>(
356-
results: any[] | undefined,
357-
): (T | { errorMessage: string })[] {
358-
if (!results) return []
359-
const spawnedResults = results
360-
.filter((result) => result.type === 'json')
361-
.map((result) => result.value)
362-
.flat() as {
363-
agentType: string
364-
value: { value?: T; errorMessage?: string }
365-
}[]
366-
return spawnedResults.map(
367-
(result) =>
368-
result.value.value ?? {
369-
errorMessage:
370-
result.value.errorMessage ?? 'Error extracting spawn results',
371-
},
372-
)
356+
/**
357+
* Extracts the array of subagent results from spawn_agents tool output.
358+
*
359+
* The spawn_agents tool result structure is:
360+
* [{ type: 'json', value: [{ agentName, agentType, value: AgentOutput }] }]
361+
*
362+
* Returns an array of agent outputs, one per spawned agent.
363+
*/
364+
function extractSpawnResults<T>(results: any[] | undefined): T[] {
365+
if (!results || results.length === 0) return []
366+
367+
// Find the json result containing spawn results
368+
const jsonResult = results.find((r) => r.type === 'json')
369+
if (!jsonResult?.value) return []
370+
371+
// Get the spawned agent results array
372+
const spawnedResults = Array.isArray(jsonResult.value)
373+
? jsonResult.value
374+
: [jsonResult.value]
375+
376+
// Extract the value (AgentOutput) from each result
377+
return spawnedResults.map((result: any) => result?.value).filter(Boolean)
373378
}
374379

375-
// Extract only tool calls from text, removing any commentary
376-
function extractToolCallsOnly(text: string): string {
377-
const toolExtractionPattern =
378-
/<codebuff_tool_call>\n(.*?)\n<\/codebuff_tool_call>/gs
379-
const matches: string[] = []
380-
381-
for (const match of text.matchAll(toolExtractionPattern)) {
382-
matches.push(match[0]) // Include the full tool call with tags
380+
/**
381+
* Extracts the text content from a 'lastMessage' AgentOutput.
382+
*
383+
* For agents with outputMode: 'last_message', the output structure is:
384+
* { type: 'lastMessage', value: [{ role: 'assistant', content: [{ type: 'text', text: '...' }] }] }
385+
*
386+
* Returns the text from the last assistant message, or null if not found.
387+
*/
388+
function extractLastMessageText(agentOutput: any): string | null {
389+
if (!agentOutput) return null
390+
391+
// Handle 'lastMessage' output mode - the value contains an array of messages
392+
if (
393+
agentOutput.type === 'lastMessage' &&
394+
Array.isArray(agentOutput.value)
395+
) {
396+
// Find the last assistant message with text content
397+
for (let i = agentOutput.value.length - 1; i >= 0; i--) {
398+
const message = agentOutput.value[i]
399+
if (message.role === 'assistant' && Array.isArray(message.content)) {
400+
// Find text content in the message
401+
for (const part of message.content) {
402+
if (part.type === 'text' && typeof part.text === 'string') {
403+
return part.text
404+
}
405+
}
406+
}
407+
}
383408
}
384-
385-
return matches.join('\n')
409+
return null
386410
}
387411
}
388412

.agents/editor/best-of-n/editor-implementor.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ export const createBestOfNImplementor = (options: {
3737
3838
Your task is to write out ALL the code changes needed to complete the user's request in a single comprehensive response.
3939
40-
Important: You can not make any other tool calls besides editing files. You cannot read more files, write todos, or spawn agents.
40+
Important: You can not make any other tool calls besides editing files. You cannot read more files, write todos, or spawn agents. Do not call any of these tools!
4141
4242
Write out what changes you would make using the tool call format below. Use this exact format for each file change:
4343

0 commit comments

Comments
 (0)