Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 17 additions & 2 deletions internal/orchestrator/brain.go
Original file line number Diff line number Diff line change
Expand Up @@ -247,8 +247,8 @@ func (p Plan) Validate() error {
return errors.New("plan workItems must contain at least one work item")
}
for _, action := range p.Actions {
if strings.TrimSpace(action.When) != "immediate" {
return errors.New("work-itemless plans may only contain immediate actions")
if !actionAllowedWithoutWorkItem(action) {
return errors.New("work-itemless plans may only contain immediate actions or worker-bound PR actions")
}
}
} else if err := validateWorkItemRequests(p.WorkItems); err != nil {
Expand All @@ -262,6 +262,21 @@ func (p Plan) Validate() error {
return nil
}

func actionAllowedWithoutWorkItem(action PlanAction) bool {
if strings.TrimSpace(action.When) == "immediate" {
return true
}
if strings.TrimSpace(action.WorkerID) == "" {
return false
}
switch strings.TrimSpace(action.Kind) {
case "publish_pull_request", "update_pull_request":
return strings.TrimSpace(action.When) == "after_success"
default:
return false
}
}

func validateWorkItemRequests(items []WorkItemRequest) error {
ids := map[string]bool{}
for index, item := range items {
Expand Down
2 changes: 1 addition & 1 deletion internal/orchestrator/codex_brain.go
Original file line number Diff line number Diff line change
Expand Up @@ -902,7 +902,7 @@ Field rules:
- "workPlan" item statuses should usually be "pending", "running", "blocked", "done", or "dropped". Keep ids stable across turns when they still refer to the same workstream.
- When action is "continue", "plan" must be an object with the same exact schema as the scheduler plan: reasoningEffort, rationale, workPlan, steps, requiredApprovals, actions, workItems.
- The top-level "workPlan" is the durable task update. The continue plan's "workPlan" exists because continue plans use the scheduler schema; it should match the top-level "workPlan" when you are changing the durable plan for the next turn. If the current durable plan remains accurate, include the current work plan in plan.workPlan and set top-level "workPlan" to null.
- The continue plan must use workItems for next-turn execution, or use immediate actions with an empty workItems array when no worker turn is needed. Each workItems[] object must include id, kind, reason, prompt, targetKind, targetId, workerKind, reasoningEffort, dependsOn, and metadata. Root work items with empty dependsOn can run in parallel immediately. Work items with dependencies wait until all dependency work item ids finish.
- The continue plan must use workItems for next-turn execution, or use an action-only plan when no worker turn is needed. Action-only plans may use immediate actions, or worker-bound publish_pull_request/update_pull_request actions with when "after_success" when the referenced worker has already completed and produced the PR-sized diff. Each workItems[] object must include id, kind, reason, prompt, targetKind, targetId, workerKind, reasoningEffort, dependsOn, and metadata. Root work items with empty dependsOn can run in parallel immediately. Work items with dependencies wait until all dependency work item ids finish.
- The continue plan may include actions. Use action kind "publish_pull_request" to publish a worker result as a durable PR artifact. A publish_pull_request action must include inputs.title and inputs.body; do not rely on aged to generate either one. inputs.title must describe the specific PR-sized change, not the overall task or broad objective. When a publish_pull_request or update_pull_request action pushes code changes, provide inputs.commitMessage when you can write a more precise commit subject than the PR title. inputs.commitMessage must be a short imperative or conventional-commit subject describing the code diff, such as "refactor(cron): remove saffron dependency" or "Widen cron schedule search horizon"; never use worker status narration such as "tests passed", "pushed changes", "doing final status", "ready to publish", or "opening a PR". Write inputs.body the same way a human contributor would write the PR description: describe what the code changes do and any notable behavior, API, or migration impact, and list the validation commands actually run, under "## Summary" and "## Test plan" or "## Validation" headings. Do not restate the user's task prompt, mention orchestration internals (worker ids, task ids, replan rationale, "candidate", "aged"), or include changed-file lists or diffstats; the PR diff already shows them. Use inputs.continueAfterPublish=true for broad, large, or long-running objectives when more slices should be pursued after opening this PR; after such an intermediate PR, the next plan should continue objective work immediately and leave the PR to the babysitter. Do not use wait_external or a standalone watch_pull_requests action merely because an intermediate PR was opened. Use action kind "create_tasks" only when a genuinely separate user-facing task should be created; do not use it for internal setup, investigation, benchmark harnesses, validation, or PR slices inside the current objective. Use action kind "update_pull_request" only for an existing non-terminal PR that is still the right review artifact. For update_pull_request that should push code changes, set workerId to the specific worker or work item id that produced the changes for that PR; do not rely on task-wide latest-result inference because broad objective tasks may have multiple active PRs. When update_pull_request references a worker that produced code changes, aged will push those worker changes to the PR branch by default even if inputs.title or inputs.body are also present. Set inputs.metadataOnly=true or inputs.includeChanges=false only when the action must update title/body without pushing worker workspace changes. If a PR is closed, treat it as historical feedback and publish a fresh PR for new worker output. Use action kind "watch_pull_requests" with when "immediate" when the user only wants to babysit existing PRs. Use "wait_external" when the task should pause for an external event that actually blocks further objective work. Use "ask_user" when the task needs user setup, credentials, permissions, VM changes, or another human-provided answer before continuing. Use "finish_objective" when a broad objective is done and no additional PR should be published; include inputs.summary when a concise completion summary would help the user.
- Plan actions must be objects with kind, when, reason, workerId, and inputs. Use when "after_success" for worker-result actions and "immediate" for standalone existing-PR watch tasks, user questions, or durable spawn_work fanout. For publish_pull_request and code-changing update_pull_request actions, set workerId to the specific worker or work item id that produced the coherent PR-sized diff. Use workerId "" only when the action is metadata-only or does not consume worker changes. Use inputs {} when no extra inputs are needed for non-publish actions.
- Use workItems for future objective work, broad fanout, PR slices, compose work, PR follow-up, CI repair, review replies, and work that should survive daemon restart. spawn_work remains available only as an explicit action/tool callback for action-only fanout.
Expand Down
54 changes: 54 additions & 0 deletions internal/orchestrator/codex_brain_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,60 @@ func TestReplanDecisionAllowsFinishObjective(t *testing.T) {
}
}

func TestReplanDecisionAllowsWorkerBoundActionOnlyPublish(t *testing.T) {
decision, err := decodeReplanDecision([]byte(`{
"action": "continue",
"rationale": "validated candidate should be published",
"plan": {
"rationale": "publish the already-completed validation worker result",
"steps": [],
"requiredApprovals": [],
"actions": [{
"kind": "publish_pull_request",
"when": "after_success",
"reason": "Publish the validated manager list slice.",
"workerId": "worker-1",
"inputs": {
"title": "Add compact manager objective rows",
"body": "## Summary\n- add compact manager objective rows\n\n## Validation\n- npm run build"
}
}],
"workItems": []
}
}`))
if err != nil {
t.Fatal(err)
}
if err := decision.Validate(); err != nil {
t.Fatalf("worker-bound publish-only replan decision rejected: %v", err)
}
}

func TestReplanDecisionRejectsUnboundActionOnlyDeferredPublish(t *testing.T) {
decision, err := decodeReplanDecision([]byte(`{
"action": "continue",
"rationale": "invalid deferred publish",
"plan": {
"actions": [{
"kind": "publish_pull_request",
"when": "after_success",
"reason": "Publish something later.",
"inputs": {
"title": "Update manager console",
"body": "## Summary\n- update manager console"
}
}],
"workItems": []
}
}`))
if err != nil {
t.Fatal(err)
}
if err := decision.Validate(); err == nil {
t.Fatal("expected unbound deferred publish-only decision to be rejected")
}
}

func TestCodexBrainReplanPromptCompactsLargeState(t *testing.T) {
brain := &CodexBrain{template: "schedule the work"}
results := make([]WorkerTurnResult, 120)
Expand Down
7 changes: 7 additions & 0 deletions internal/orchestrator/replan_prompt_budgeter.go
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,9 @@ func (b ReplanPromptBudgeter) compactPullRequestFeedback(items []PullRequestFeed
func (b ReplanPromptBudgeter) compactArtifacts(artifacts []core.TaskArtifact) []ReplanPromptArtifact {
compact := make([]ReplanPromptArtifact, 0, len(artifacts))
for _, artifact := range artifacts {
if promptArtifactOmitted(artifact) {
continue
}
item := ReplanPromptArtifact{
ID: artifact.ID,
Kind: artifact.Kind,
Expand All @@ -275,6 +278,10 @@ func (b ReplanPromptBudgeter) compactArtifacts(artifacts []core.TaskArtifact) []
return compact
}

func promptArtifactOmitted(artifact core.TaskArtifact) bool {
return strings.EqualFold(strings.TrimSpace(artifact.Kind), "worker_log")
}

func (b ReplanPromptBudgeter) degradeToTotalBudget(payload map[string]any, state ReplanPromptState) ReplanPromptState {
currentTokens := approxJSONTokens(payload)
for currentTokens > b.TotalTokens {
Expand Down
44 changes: 44 additions & 0 deletions internal/orchestrator/replan_prompt_budgeter_test.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
package orchestrator

import (
"encoding/json"
"fmt"
"strings"
"testing"

"aged/internal/core"
)

func TestJSONArrayTokenSizerMatchesMarshalEstimate(t *testing.T) {
Expand Down Expand Up @@ -52,6 +55,47 @@ func TestCompactContextLedgerUsesBudget(t *testing.T) {
}
}

func TestCompactArtifactsOmitsWorkerLogs(t *testing.T) {
budgeter := DefaultReplanPromptBudgeter()
stdoutMetadata := core.MustJSON(map[string]any{
"bytes": 2_800_000,
"content": strings.Repeat("remote stdout line\n", 10_000),
})
artifacts := []core.TaskArtifact{
{
ID: "stdout-1",
Kind: "worker_log",
Name: "Remote stdout",
Ref: "/home/bot/work/worker/stdout.log",
Metadata: stdoutMetadata,
},
{
ID: "pr-1",
Kind: "github_pull_request",
Name: "Add compact manager objective rows",
URL: "https://github.com/nathanwhit/aged/pull/123",
Metadata: core.MustJSON(map[string]any{
"number": 123,
}),
},
}

compact := budgeter.compactArtifacts(artifacts)
if len(compact) != 1 {
t.Fatalf("compact artifacts count = %d, want 1: %+v", len(compact), compact)
}
if compact[0].Kind != "github_pull_request" || compact[0].ID != "pr-1" {
t.Fatalf("unexpected compact artifact: %+v", compact[0])
}
data, err := json.Marshal(compact)
if err != nil {
t.Fatal(err)
}
if strings.Contains(string(data), "remote stdout") || strings.Contains(string(data), "contentPreview") || strings.Contains(string(data), "contentOmittedBytes") {
t.Fatalf("worker log leaked into prompt artifacts: %s", data)
}
}

func BenchmarkCompactContextLedgerLarge(b *testing.B) {
budgeter := DefaultReplanPromptBudgeter()
budgeter.ContextLedgerTokens = 6000
Expand Down
Loading