Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
c3fca97
In messageHistory, add a message to introduce a spawned agent
jahooma Dec 11, 2025
a83e5ae
gpt-5.2 in editor, editor-implementor
jahooma Dec 11, 2025
f62ed7d
Merge branch 'main' into gpt-5.2
jahooma Dec 12, 2025
eb5a4dc
max: ask questions if needed
jahooma Dec 13, 2025
0afcef9
Update max agent steps
jahooma Dec 13, 2025
11a15cf
Stop dev from killing services
jahooma Dec 13, 2025
da353f5
Merge branch 'main' into gpt-5.2
jahooma Dec 13, 2025
03c1d9c
Merge branch 'main' into gpt-5.2
jahooma Dec 13, 2025
eacc976
Propose str_replace / write_file
jahooma Dec 13, 2025
98a6b70
Fix bug where exclude tool from message history propogated to subagents
jahooma Dec 13, 2025
9ba0254
editor-multi-prompt2
jahooma Dec 14, 2025
be03a3e
Fix tests to account for subagent spawn message
jahooma Dec 14, 2025
0bfce45
fix editor-multiprompt2
jahooma Dec 14, 2025
9c32bce
fix shwoing correct model of editor
jahooma Dec 14, 2025
bfa0002
Show gpt 5.2
jahooma Dec 14, 2025
ed1d539
Move tool call allowed check earlier
jahooma Dec 14, 2025
67873a6
Refactor spawn agents to not just pass all params through. Passes all…
jahooma Dec 14, 2025
2166ccb
tweaks
jahooma Dec 14, 2025
4fde2f1
fix tests
jahooma Dec 14, 2025
8a04ff4
Fix
jahooma Dec 14, 2025
df6f385
selector: include suggested improvements!
jahooma Dec 14, 2025
d9c4168
tweak editor implementor
jahooma Dec 14, 2025
e60492f
fix types
jahooma Dec 14, 2025
ebd7391
Merge branch 'main' into gpt-5.2
jahooma Dec 14, 2025
68a447f
Fix tests (by deleting them haha)
jahooma Dec 14, 2025
fd64ac8
Delete some dead code (too call parse error)
jahooma Dec 14, 2025
b906ea6
Merge branch 'main' into gpt-5.2
jahooma Dec 15, 2025
da01d2b
cleanup/fixes
jahooma Dec 15, 2025
de7b804
delete log
jahooma Dec 15, 2025
9cbf352
fix bug
jahooma Dec 15, 2025
ef404ef
Spawn gpt-5 first so that selector considers it more
jahooma Dec 15, 2025
d6822bc
Store the proposed files per runId
jahooma Dec 15, 2025
7a78abf
strengthen prompt for multi prompt editor
jahooma Dec 15, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 9 additions & 5 deletions .agents/base2/base2.ts
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ export function createBase2(
!isFast && 'suggest_followups',
'str_replace',
'write_file',
'propose_str_replace',
'propose_write_file',
'ask_user',
'set_output',
),
Expand All @@ -68,7 +70,7 @@ export function createBase2(
isDefault && 'thinker',
isLite && 'editor-gpt-5',
isDefault && 'editor',
isMax && 'editor-multi-prompt',
isMax && 'editor-multi-prompt2',
isMax && 'thinker-best-of-n-opus',
!isLite && 'code-reviewer',
'context-pruner',
Expand Down Expand Up @@ -127,7 +129,7 @@ Use the spawn_agents tool to spawn specialized agents to help you complete the u
(isDefault || isMax) &&
`- Spawn the ${isDefault ? 'thinker' : 'thinker-best-of-n-opus'} after gathering context to solve complex problems or when the user asks you to think about a problem.`,
isMax &&
`- Spawn the editor-multi-prompt agent to implement the changes after you have gathered all the context you need. You must spawn this agent for non-trivial changes, since it writes much better code than you would with the str_replace or write_file tools. Don't spawn the editor in parallel with context-gathering agents.`,
`- IMPORTANT: You must spawn the editor-multi-prompt2 agent to implement the changes after you have gathered all the context you need. You must spawn this agent for non-trivial changes, since it writes much better code than you would with the str_replace or write_file tools. Don't spawn the editor in parallel with context-gathering agents.`,
'- Spawn commanders sequentially if the second command depends on the the first.',
!isFast &&
!isLite &&
Expand Down Expand Up @@ -181,7 +183,7 @@ ${
? '[ You implement the changes using the str_replace or write_file tools ]'
: isLite
? '[ You implement the changes using the editor-gpt-5 agent ]'
: '[ You implement the changes using the editor-multi-prompt agent ]'
: '[ You implement the changes using the editor-multi-prompt2 agent ]'
}

${
Expand Down Expand Up @@ -291,6 +293,8 @@ ${buildArray(
EXPLORE_PROMPT,
isMax &&
`- Important: Read as many files as could possibly be relevant to the task over several steps to improve your understanding of the user's request and produce the best possible code changes. Find more examples within the codebase similar to the user's request, dependencies that help with understanding how things work, tests, etc. This is frequently 12-20 files, depending on the task.`,
isMax &&
'If needed, use the ask_user tool to ask the user for clarification on their request or alternate implementation strategies. It is good to get context on the codebase before asking questions so you can ask informed questions.',
(isDefault || isMax) &&
`- For any task requiring 3+ steps, use the write_todos tool to write out your step-by-step implementation plan. Include ALL of the applicable tasks in the list.${isFast ? '' : ' You should include a step to review the changes after you have implemented the changes.'}:${hasNoValidation ? '' : ' You should include at least one step to validate/test your changes: be specific about whether to typecheck, run tests, run lints, etc.'} You may be able to do reviewing and validation in parallel in the same step. Skip write_todos for simple tasks like quick edits or answering questions.`,
isDefault &&
Expand All @@ -300,7 +304,7 @@ ${buildArray(
isDefault &&
'- IMPORTANT: You must spawn the editor agent to implement the changes after you have gathered all the context you need. This agent will do the best job of implementing the changes so you must spawn it for all non-trivial changes. Do not pass any prompt or params to the editor agent when spawning it. It will make its own best choices of what to do.',
isMax &&
`- IMPORTANT: You must spawn the editor-multi-prompt agent to implement non-trivial code changes, since it will generate the best code changes from multiple implementation proposals. This is the best way to make high quality code changes -- strongly prefer using this agent over the str_replace or write_file tools, unless the change is very straightforward and obvious.`,
`- IMPORTANT: You must spawn the editor-multi-prompt2 agent to implement non-trivial code changes, since it will generate the best code changes from multiple implementation proposals. This is the best way to make high quality code changes -- strongly prefer using this agent over the str_replace or write_file tools, unless the change is very straightforward and obvious.`,
isFast &&
'- Implement the changes using the str_replace or write_file tools. Implement all the changes in one go.',
isFast &&
Expand Down Expand Up @@ -334,7 +338,7 @@ function buildImplementationStepPrompt({
isMax &&
`Keep working until the user's request is completely satisfied${!hasNoValidation ? ' and validated' : ''}, or until you require more information from the user.`,
isMax &&
`You must spawn the 'editor-multi-prompt' agent to implement code changes, since it will generate the best code changes.`,
`You must spawn the 'editor-multi-prompt2' agent to implement code changes, since it will generate the best code changes.`,
(isDefault || isMax) &&
'Spawn code-reviewer to review the changes after you have implemented the changes and in parallel with typechecking or testing.',
`After completing the user request, summarize your changes in a sentence${isFast ? '' : ' or a few short bullet points'}.${isSonnet ? " Don't create any summary markdown files or example documentation files, unless asked by the user." : ''} Don't repeat yourself, especially if you have already concluded and summarized the changes in a previous step -- just end your turn.`,
Expand Down
6 changes: 6 additions & 0 deletions .agents/context-pruner.ts
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,12 @@ const definition: AgentDefinition = {
if (lastInstructionsPromptIndex !== -1) {
currentMessages.splice(lastInstructionsPromptIndex, 1)
}
const lastSubagentSpawnIndex = currentMessages.findLastIndex((message) =>
message.tags?.includes('SUBAGENT_SPAWN'),
)
if (lastSubagentSpawnIndex !== -1) {
currentMessages.splice(lastSubagentSpawnIndex, 1)
}

// Initial check - if already under limit, return
const initialTokens = countMessagesTokens(currentMessages)
Expand Down
144 changes: 144 additions & 0 deletions .agents/editor/best-of-n/best-of-n-selector2.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
import {
PLACEHOLDER,
type SecretAgentDefinition,
} from '../../types/secret-agent-definition'
import { publisher } from '../../constants'

export const createBestOfNSelector2 = (options: {
model: 'sonnet' | 'opus' | 'gpt-5'
}): Omit<SecretAgentDefinition, 'id'> => {
const { model } = options
const isSonnet = model === 'sonnet'
const isOpus = model === 'opus'
const isGpt5 = model === 'gpt-5'
return {
publisher,
model: isSonnet
? 'anthropic/claude-sonnet-4.5'
: isOpus
? 'anthropic/claude-opus-4.5'
: 'openai/gpt-5.2',
...(isGpt5 && {
reasoningOptions: {
effort: 'high',
},
}),
displayName: isGpt5
? 'Best-of-N GPT-5 Diff Selector'
: isOpus
? 'Best-of-N Opus Diff Selector'
: 'Best-of-N Sonnet Diff Selector',
spawnerPrompt:
'Analyzes multiple implementation proposals (as unified diffs) and selects the best one',

includeMessageHistory: true,
inheritParentSystemPrompt: true,

toolNames: ['set_output'],
spawnableAgents: [],

inputSchema: {
params: {
type: 'object',
properties: {
implementations: {
type: 'array',
items: {
type: 'object',
properties: {
id: { type: 'string' },
strategy: { type: 'string' },
content: { type: 'string', description: 'Unified diff of the proposed changes' },
},
required: ['id', 'content'],
},
},
},
required: ['implementations'],
},
},
outputMode: 'structured_output',
outputSchema: {
type: 'object',
properties: {
implementationId: {
type: 'string',
description: 'The id of the chosen implementation',
},
reason: {
type: 'string',
description:
'An extremely short (1 sentence) description of why this implementation was chosen',
},
suggestedImprovements: {
type: 'string',
description:
'A summary of suggested improvements from non-chosen implementations that could enhance the selected implementation. You can also include any new ideas you have to improve upon the selected implementation. Leave empty if no valuable improvements were found.',
},
},
required: ['implementationId', 'reason', 'suggestedImprovements'],
},

instructionsPrompt: `As part of the best-of-n workflow of agents, you are the implementation selector agent.

## Task Instructions

You have been provided with multiple implementation proposals via params. Each implementation shows a UNIFIED DIFF of the proposed changes.

The implementations are available in the params.implementations array, where each has:
- id: A unique identifier for the implementation (A, B, C, etc.)
- strategy: The strategy/approach used for this implementation
- content: The unified diff showing what would change

Your task is to:
1. Analyze each implementation's diff carefully, compare them against the original user requirements
2. Select the best implementation
3. Identify the best ideas/techniques from the NON-CHOSEN implementations that could improve the selected implementation

Evaluate each based on (in order of importance):
- Correctness and completeness in fulfilling the user's request
- Simplicity and maintainability
- Code quality and adherence to project conventions
- Proper reuse of existing code (helper functions, libraries, etc.)
- Minimal changes to existing code (fewer files changed, fewer lines changed)
- Clarity and readability

## Analyzing Non-Chosen Implementations

After selecting the best implementation, look at each non-chosen implementation and identify any valuable aspects that could enhance the selected implementation. These might include:
- More elegant code patterns or abstractions
- Simplified logic or reuse of existing code
- Additional edge case handling
- Better naming or organization
- Useful comments or documentation
- Additional features that align with the user's request

Only include improvements that are genuinely valuable and compatible with the selected implementation. If a non-chosen implementation has no useful improvements to offer, don't include it.

## User Request

For context, here is the original user request again:
<user_message>
${PLACEHOLDER.USER_INPUT_PROMPT}
</user_message>

Try to select an implementation that fulfills all the requirements in the user's request.

## Response Format

${
isSonnet || isOpus
? `Use <think> tags to write out your thoughts about the implementations as needed to pick the best implementation. IMPORTANT: You should think really really hard to make sure you pick the absolute best implementation! Also analyze the non-chosen implementations for any valuable techniques or approaches that could improve the selected one.

Then, do not write any other explanations AT ALL. You should directly output a single tool call to set_output with the selected implementationId, short reason, and suggestedImprovements array.`
: `Output a single tool call to set_output with the selected implementationId, reason, and suggestedImprovements. Do not write anything else.`
}`,
}
}

const definition: SecretAgentDefinition = {
...createBestOfNSelector2({ model: 'opus' }),
id: 'best-of-n-selector2',
}

export default definition
2 changes: 1 addition & 1 deletion .agents/editor/best-of-n/editor-implementor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ export const createBestOfNImplementor = (options: {
? 'anthropic/claude-opus-4.5'
: isGemini
? 'google/gemini-3-pro-preview'
: 'openai/gpt-5.1',
: 'openai/gpt-5.2',
displayName: 'Implementation Generator',
spawnerPrompt:
'Generates a complete implementation plan with all code changes',
Expand Down
7 changes: 7 additions & 0 deletions .agents/editor/best-of-n/editor-implementor2-gpt-5.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
import { createBestOfNImplementor2 } from './editor-implementor2'

const definition = {
...createBestOfNImplementor2({ model: 'gpt-5' }),
id: 'editor-implementor2-gpt-5',
}
export default definition
156 changes: 156 additions & 0 deletions .agents/editor/best-of-n/editor-implementor2.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
import { publisher } from '../../constants'

import type { SecretAgentDefinition } from '../../types/secret-agent-definition'

export const createBestOfNImplementor2 = (options: {
model: 'gpt-5' | 'opus' | 'sonnet'
}): Omit<SecretAgentDefinition, 'id'> => {
const { model } = options
const isGpt5 = model === 'gpt-5'
const isOpus = model === 'opus'
return {
publisher,
model: isGpt5
? 'openai/gpt-5.2'
: isOpus
? 'anthropic/claude-opus-4.5'
: 'anthropic/claude-sonnet-4.5',
displayName: isGpt5
? 'GPT-5 Implementation Generator v2'
: isOpus
? 'Opus Implementation Generator v2'
: 'Sonnet Implementation Generator v2',
spawnerPrompt:
'Generates a complete implementation using propose_* tools that draft changes without applying them',

includeMessageHistory: true,
inheritParentSystemPrompt: true,

toolNames: ['propose_write_file', 'propose_str_replace'],
spawnableAgents: [],

inputSchema: {},
outputMode: 'structured_output',

instructionsPrompt: `You are an expert code editor with deep understanding of software engineering principles. You were spawned to generate an implementation for the user's request.

Your task is to write out ALL the code changes needed to complete the user's request.

IMPORTANT: Use propose_str_replace and propose_write_file tools to make your edits. These tools draft changes without actually applying them - they will be reviewed first.

You can make multiple tool calls across multiple steps to complete the implementation.

After your edit tool calls, you can optionally mention any follow-up steps to take, like deleting a file, or a specific way to validate the changes.

Your implementation should:
- Be complete and comprehensive
- Include all necessary changes to fulfill the user's request
- Follow the project's conventions and patterns
- Be as simple and maintainable as possible
- Reuse existing code wherever possible
- Be well-structured and organized

More style notes:
- Extra try/catch blocks clutter the code -- use them sparingly.
- Optional arguments are code smell and worse than required arguments.
- New components often should be added to a new file, not added to an existing file.

Write out your complete implementation now.`,

handleSteps: function* ({ agentState: initialAgentState }) {
const initialMessageHistoryLength =
initialAgentState.messageHistory.length

// Helper to check if a message is empty (no tool calls and empty/no text)
const isEmptyAssistantMessage = (message: any): boolean => {
if (message.role !== 'assistant' || !Array.isArray(message.content)) {
return false
}
const hasToolCalls = message.content.some(
(part: any) => part.type === 'tool-call',
)
if (hasToolCalls) {
return false
}
// Check if all text parts are empty or there are no text parts
const textParts = message.content.filter(
(part: any) => part.type === 'text',
)
if (textParts.length === 0) {
return true
}
return textParts.every((part: any) => !part.text || !part.text.trim())
}

const { agentState } = yield 'STEP_ALL'

let postMessages = agentState.messageHistory.slice(
initialMessageHistoryLength,
)

// Retry if no messages or if the only message is empty (no tool calls and empty text)
if (postMessages.length === 0) {
const { agentState: postMessagesAgentState } = yield 'STEP_ALL'
postMessages = postMessagesAgentState.messageHistory.slice(
initialMessageHistoryLength,
)
} else if (
postMessages.length === 1 &&
isEmptyAssistantMessage(postMessages[0])
) {
const { agentState: postMessagesAgentState } = yield 'STEP_ALL'
postMessages = postMessagesAgentState.messageHistory.slice(
initialMessageHistoryLength,
)
}

// Extract tool calls from assistant messages
// Handle both 'input' and 'args' property names for compatibility
const toolCalls: { toolName: string; input: any }[] = []
for (const message of postMessages) {
if (message.role !== 'assistant' || !Array.isArray(message.content))
continue
for (const part of message.content) {
if (part.type === 'tool-call') {
toolCalls.push({
toolName: part.toolName,
input: part.input ?? (part as any).args ?? {},
})
}
}
}

// Extract tool results (unified diffs) from tool messages
const toolResults: any[] = []
for (const message of postMessages) {
if (message.role !== 'tool' || !Array.isArray(message.content)) continue
for (const part of message.content) {
if (part.type === 'json' && part.value) {
toolResults.push(part.value)
}
}
}

// Concatenate all unified diffs for the selector to review
const unifiedDiffs = toolResults
.filter((result: any) => result.unifiedDiff)
.map((result: any) => `--- ${result.file} ---\n${result.unifiedDiff}`)
.join('\n\n')

yield {
toolName: 'set_output',
input: {
toolCalls,
toolResults,
unifiedDiffs,
},
includeToolCall: false,
}
},
}
}
const definition = {
...createBestOfNImplementor2({ model: 'opus' }),
id: 'editor-implementor2',
}
export default definition
Loading