update editor best of n max + add unit tests (not fully working yet tho)

jahooma · jahooma · commit 8a1ba97cdf60 · 2025-11-26T13:02:16.000-08:00
diff --git a/.agents/__tests__/editor-best-of-n.integration.test.ts b/.agents/__tests__/editor-best-of-n.integration.test.ts
@@ -0,0 +1,91 @@
+import { API_KEY_ENV_VAR } from '@codebuff/common/old-constants'
+import { describe, expect, it } from 'bun:test'
+
+import { CodebuffClient } from '@codebuff/sdk'
+
+import type { PrintModeEvent } from '@codebuff/common/types/print-mode'
+
+/**
+ * Integration tests for the editor-best-of-n-max agent.
+ * These tests verify that the best-of-n editor workflow works correctly:
+ * 1. Spawns multiple implementor agents in parallel
+ * 2. Collects their implementation proposals
+ * 3. Uses a selector agent to choose the best implementation
+ * 4. Applies the chosen implementation
+ */
+describe('Editor Best-of-N Max Agent Integration', () => {
+  it(
+    'should generate and select the best implementation for a simple edit',
+    async () => {
+      const apiKey = process.env[API_KEY_ENV_VAR]
+      if (!apiKey) {
+        throw new Error('API key not found')
+      }
+
+      // Create mock project files with a simple TypeScript file to edit
+      const projectFiles: Record<string, string> = {
+        'src/utils/math.ts': `
+export function add(a: number, b: number): number {
+  return a + b
+}
+
+export function subtract(a: number, b: number): number {
+  return a - b
+}
+`,
+        'src/index.ts': `
+import { add, subtract } from './utils/math'
+
+console.log(add(1, 2))
+console.log(subtract(5, 3))
+`,
+        'package.json': JSON.stringify({
+          name: 'test-project',
+          version: '1.0.0',
+          dependencies: {},
+        }),
+      }
+
+      const client = new CodebuffClient({
+        apiKey,
+        cwd: '/tmp/test-best-of-n-project',
+        projectFiles,
+      })
+
+      const events: PrintModeEvent[] = []
+
+      // Run the editor-best-of-n-max agent with a simple task
+      // Using n=2 to keep the test fast while still testing the best-of-n workflow
+      const run = await client.run({
+        agent: 'editor-best-of-n-max',
+        prompt:
+          'Add a multiply function to src/utils/math.ts that takes two numbers and returns their product',
+        params: { n: 2 },
+        handleEvent: (event) => {
+          console.log(event)
+          events.push(event)
+        },
+      })
+
+      // The output should not be an error
+      expect(run.output.type).not.toEqual('error')
+
+      // Verify we got some output
+      expect(run.output).toBeDefined()
+
+      // The output should contain the implementation response
+      const outputStr =
+        typeof run.output === 'string' ? run.output : JSON.stringify(run.output)
+      console.log('Output:', outputStr)
+
+      // Should contain evidence of the multiply function being added
+      const relevantTerms = ['multiply', 'product', 'str_replace', 'write_file']
+      const foundRelevantTerm = relevantTerms.some((term) =>
+        outputStr.toLowerCase().includes(term.toLowerCase()),
+      )
+
+      expect(foundRelevantTerm).toBe(true)
+    },
+    { timeout: 120_000 }, // 2 minute timeout for best-of-n workflow
+  )
+})
diff --git a/.agents/editor/best-of-n/editor-best-of-n.ts b/.agents/editor/best-of-n/editor-best-of-n.ts
@@ -39,11 +39,11 @@ export function createBestOfNEditor(
     spawnableAgents: buildArray(
       'best-of-n-selector',
       'best-of-n-selector-opus',
-      isDefault && 'best-of-n-selector-gemini',
+      'best-of-n-selector-gemini',
       'editor-implementor',
       'editor-implementor-opus',
-      isDefault && 'editor-implementor-gemini',
-      isMax && 'editor-implementor-gpt-5',
+      'editor-implementor-gemini',
+      'editor-implementor-gpt-5',
     ),
 
     inputSchema: {
@@ -230,6 +230,7 @@ function* handleStepsDefault({
 }
 function* handleStepsMax({
   params,
+  logger,
 }: AgentStepContext): ReturnType<
   NonNullable<SecretAgentDefinition['handleSteps']>
 > {
@@ -269,8 +270,9 @@ function* handleStepsMax({
   } satisfies ToolCall<'spawn_agents'>
 
   // Extract spawn results
-  const spawnedImplementations =
-    extractSpawnResults<{ text: string }[]>(implementorResults)
+  const spawnedImplementations = extractSpawnResults(
+    implementorResults,
+  ) as any[]
 
   // Extract all the plans from the structured outputs
   const letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
@@ -280,9 +282,14 @@ function* handleStepsMax({
     content:
       'errorMessage' in result
         ? `Error: ${result.errorMessage}`
-        : result[0].text,
+        : extractLastMessageText(result),
   }))
 
+  logger.info(
+    { spawnedImplementations, implementations },
+    'spawnedImplementations',
+  )
+
   // Spawn selector with implementations as params
   const { toolResult: selectorResult } = yield {
     toolName: 'spawn_agents',
@@ -321,15 +328,9 @@ function* handleStepsMax({
     return
   }
 
-  // Apply the chosen implementation using STEP_TEXT (only tool calls, no commentary)
-  const toolCallsOnly = extractToolCallsOnly(
-    typeof chosenImplementation.content === 'string'
-      ? chosenImplementation.content
-      : '',
-  )
   const { agentState: postEditsAgentState } = yield {
     type: 'STEP_TEXT',
-    text: toolCallsOnly,
+    text: chosenImplementation.content,
   } as StepText
   const { messageHistory } = postEditsAgentState
   const lastAssistantMessageIndex = messageHistory.findLastIndex(
@@ -352,37 +353,60 @@ function* handleStepsMax({
     includeToolCall: false,
   } satisfies ToolCall<'set_output'>
 
-  function extractSpawnResults<T>(
-    results: any[] | undefined,
-  ): (T | { errorMessage: string })[] {
-    if (!results) return []
-    const spawnedResults = results
-      .filter((result) => result.type === 'json')
-      .map((result) => result.value)
-      .flat() as {
-      agentType: string
-      value: { value?: T; errorMessage?: string }
-    }[]
-    return spawnedResults.map(
-      (result) =>
-        result.value.value ?? {
-          errorMessage:
-            result.value.errorMessage ?? 'Error extracting spawn results',
-        },
-    )
+  /**
+   * Extracts the array of subagent results from spawn_agents tool output.
+   *
+   * The spawn_agents tool result structure is:
+   * [{ type: 'json', value: [{ agentName, agentType, value: AgentOutput }] }]
+   *
+   * Returns an array of agent outputs, one per spawned agent.
+   */
+  function extractSpawnResults<T>(results: any[] | undefined): T[] {
+    if (!results || results.length === 0) return []
+
+    // Find the json result containing spawn results
+    const jsonResult = results.find((r) => r.type === 'json')
+    if (!jsonResult?.value) return []
+
+    // Get the spawned agent results array
+    const spawnedResults = Array.isArray(jsonResult.value)
+      ? jsonResult.value
+      : [jsonResult.value]
+
+    // Extract the value (AgentOutput) from each result
+    return spawnedResults.map((result: any) => result?.value).filter(Boolean)
   }
 
-  // Extract only tool calls from text, removing any commentary
-  function extractToolCallsOnly(text: string): string {
-    const toolExtractionPattern =
-      /<codebuff_tool_call>\n(.*?)\n<\/codebuff_tool_call>/gs
-    const matches: string[] = []
-
-    for (const match of text.matchAll(toolExtractionPattern)) {
-      matches.push(match[0]) // Include the full tool call with tags
+  /**
+   * Extracts the text content from a 'lastMessage' AgentOutput.
+   *
+   * For agents with outputMode: 'last_message', the output structure is:
+   * { type: 'lastMessage', value: [{ role: 'assistant', content: [{ type: 'text', text: '...' }] }] }
+   *
+   * Returns the text from the last assistant message, or null if not found.
+   */
+  function extractLastMessageText(agentOutput: any): string | null {
+    if (!agentOutput) return null
+
+    // Handle 'lastMessage' output mode - the value contains an array of messages
+    if (
+      agentOutput.type === 'lastMessage' &&
+      Array.isArray(agentOutput.value)
+    ) {
+      // Find the last assistant message with text content
+      for (let i = agentOutput.value.length - 1; i >= 0; i--) {
+        const message = agentOutput.value[i]
+        if (message.role === 'assistant' && Array.isArray(message.content)) {
+          // Find text content in the message
+          for (const part of message.content) {
+            if (part.type === 'text' && typeof part.text === 'string') {
+              return part.text
+            }
+          }
+        }
+      }
     }
-
-    return matches.join('\n')
+    return null
   }
 }
 
diff --git a/.agents/editor/best-of-n/editor-implementor.ts b/.agents/editor/best-of-n/editor-implementor.ts
@@ -37,7 +37,7 @@ export const createBestOfNImplementor = (options: {
     
 Your task is to write out ALL the code changes needed to complete the user's request in a single comprehensive response.
 
-Important: You can not make any other tool calls besides editing files. You cannot read more files, write todos, or spawn agents.
+Important: You can not make any other tool calls besides editing files. You cannot read more files, write todos, or spawn agents. Do not call any of these tools!
 
 Write out what changes you would make using the tool call format below. Use this exact format for each file change: