From 97d6a2a500cff9db86426fadea5a67e2f381efda Mon Sep 17 00:00:00 2001
From: Hesham Salman <hsalman@makenotion.com>
Date: Fri, 20 Mar 2026 18:15:11 -0400
Subject: [PATCH] fix: embed completion protocol in bridge task prompt

The bridge's BuildTaskPrompt relied solely on --append-system-prompt-file
to inject the sentinel file instructions into task instances. If the
system prompt injection failed silently, instances had no knowledge of
the .claudio-task-complete.json convention and tasks would time out.

Embed the completion protocol directly in the user prompt as the primary
mechanism, with the system prompt injection as defense-in-depth.
---
 AGENTS.md                      |  1 +
 CHANGELOG.md                   |  1 +
 internal/bridge/AGENTS.md      |  3 ++-
 internal/bridge/bridge.go      | 46 +++++++++++++++++++++++++++++++---
 internal/bridge/bridge_test.go | 12 +++++++++
 5 files changed, 59 insertions(+), 4 deletions(-)

diff --git a/AGENTS.md b/AGENTS.md
index d854d70a..b2230715 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -344,6 +344,7 @@ These are real issues agents have encountered in this codebase. Package-specific
 - **Stale counter and waiting states** — The state monitor's `repeatedOutputCount` must not increment when the instance is in a waiting state (`IsWaiting()`). An instance at the `❯` prompt naturally has static output; this is idle behavior, not a stale loop. Similarly, `CheckTimeouts` must guard against firing `TimeoutStale` for waiting instances. Also, `Manager.Resume()` must call `ResetStaleCounter` to prevent ticks accumulated across prior active windows from carrying over after a tab switch. When adding new Claude Code UI elements (like `AskUserQuestion` menus), ensure the state detector recognizes them as waiting states — otherwise the static pane content will trigger a stale timeout. The `StripAnsi` function must also handle all escape sequences tmux emits (not just CSI/OSC), as unstripped `ESC(B` prefixes prevent `^❯` patterns from matching.
 - **Pause/resume symmetry in TUI update handlers** — When `HandleInstanceStubCreated` pauses the old active instance and switches to a new stub, all subsequent error paths (`HandleInstanceSetupComplete` setup failure, `StartInstance` failure) must call `ctx.ResumeActiveInstance()` to avoid leaving the previously-active instance permanently paused with a frozen display.
 - **Separate tracking for visible vs full captures** — The capture loop alternates between visible-only (cheap, no scrollback) and full (expensive, includes scrollback) tmux captures. Only full captures write to `outputBuf`. The change-detection variables must be independent (`lastVisibleOutput`, `lastFullOutput`) — a single shared variable causes cross-contamination where a visible capture sets the tracker, then the subsequent full capture (returning identical bytes when there's no scrollback) sees no change and skips the buffer write.
+- **Completion protocol must be in the user prompt, not just system prompt** — The bridge's `BuildTaskPrompt` must embed the sentinel file instructions directly in the task prompt. The `--append-system-prompt-file` injection in `bridgewire` provides defense-in-depth, but if it fails silently (wrong path, unsupported flag version, etc.), instances have no knowledge of the completion convention and tasks time out. The `completionFileName` constant in the bridge package is duplicated from `orchestrator/types.TaskCompletionFileName` to avoid import cycles — keep them in sync.
 
 ---
 
diff --git a/CHANGELOG.md b/CHANGELOG.md
index c52c0ae1..8e0e80ed 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -26,6 +26,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - **Subprocess Mode** - Removed the experimental subprocess execution mode (`experimental.subprocess_mode`), including the `internal/streamjson/` package, `subprocessFactory`, and all related config/TUI/wiring plumbing. Pipeline instances now always use the tmux-based execution backend.
 
 ### Fixed
+- **Missing Sentinel File in Pipeline Execution** - Fixed task instances not writing `.claudio-task-complete.json` in the Orchestration 2.0 pipeline path. The bridge's `BuildTaskPrompt` relied solely on `--append-system-prompt-file` to inject the completion protocol, which left instances unaware of the sentinel file convention. The completion protocol is now embedded directly in the task prompt as defense-in-depth.
 - **Stale Display Early in Session** - Fixed capture loop never populating the output buffer when there's no scrollback (screen not yet full). A single `lastOutput` tracking variable was shared between visible-only and full captures; when a visible capture detected new content and set `lastOutput`, the subsequent forced full capture (which returns identical bytes when there's no scrollback) found no change and skipped the buffer write. Split into independent `lastVisibleOutput` and `lastFullOutput` so each capture type compares against its own history.
 - **Flaky `TestPipelineExecutor_E2E_AllPhases`** - Fixed race condition in `completeAllTeamTasks` test helper where `m.AllStatuses()` returned empty (teams not yet added) causing vacuous `allDone = true` and premature return without completing any tasks. The pipeline publishes `phase_changed` before `AddTeam`, so the test goroutine could race ahead of team registration (#685)
 - **Terminal Pane Alt+Backspace and Alt+Arrow Keys** - Fixed the terminal pane's key handler dropping the Alt modifier on Backspace and arrow keys, silently sending plain keystrokes instead of Alt-modified ones. The instance path already handled these correctly via `M-` prefix; the terminal path now sends `Escape` + base key to match its existing alt key pattern.
diff --git a/internal/bridge/AGENTS.md b/internal/bridge/AGENTS.md
index f413cb44..4ce30fcd 100644
--- a/internal/bridge/AGENTS.md
+++ b/internal/bridge/AGENTS.md
@@ -36,7 +36,8 @@ These interfaces are implemented by adapters in `internal/orchestrator/bridgewir
 
 ## Pitfalls
 
-- **Import cycle with ultraplan** — The `bridge` package must NOT import `ultraplan` or `orchestrator`. The chain `bridge → team → coordination → ... → ultraplan → orchestrator` creates a cycle if `orchestrator` imports `bridge`. Use simple types (strings, slices) rather than concrete domain types in the bridge API. The `BuildTaskPrompt` function accepts `(taskID, title, description string, files []string)` instead of `ultraplan.PlannedTask` for this reason.
+- **Import cycle with ultraplan** — The `bridge` package must NOT import `ultraplan` or `orchestrator`. The chain `bridge → team → coordination → ... → ultraplan → orchestrator` creates a cycle if `orchestrator` imports `bridge`. Use simple types (strings, slices) rather than concrete domain types in the bridge API. The `BuildTaskPrompt` function accepts `(taskID, title, description string, files []string)` instead of `ultraplan.PlannedTask` for this reason. The `completionFileName` constant is duplicated from `orchestrator/types.TaskCompletionFileName` for the same reason — keep them in sync manually.
+- **Completion protocol must be in the user prompt** — `BuildTaskPrompt` embeds the full completion protocol (sentinel file instructions) directly in the task prompt. The system prompt injection via `--append-system-prompt-file` in `bridgewire` provides defense-in-depth, but the user prompt is the primary mechanism. Without the user-prompt copy, instances have no knowledge of sentinel files if the system prompt injection fails silently.
 - **Event-driven wake pattern** — The claim loop subscribes to `queue.depth_changed` events and blocks on a buffered channel. Don't replace this with polling — the event-driven approach is more efficient and responsive.
 - **Gate.IsComplete exit condition** — The claim loop exits when there are no tasks and `gate.IsComplete()` returns true (all tasks terminal). Without this check, the loop would block forever waiting for new tasks that will never arrive.
 - **Publish events outside the lock** — `BridgeTaskStartedEvent` and `BridgeTaskCompletedEvent` are published outside the mutex to avoid deadlock with synchronous event handlers that might call back into the bridge.
diff --git a/internal/bridge/bridge.go b/internal/bridge/bridge.go
index 41b80bbb..fea3e412 100644
--- a/internal/bridge/bridge.go
+++ b/internal/bridge/bridge.go
@@ -442,14 +442,19 @@ func (b *Bridge) ActiveInstances() int {
 	return b.sem.Acquired()
 }
 
+// completionFileName is the sentinel file that task instances must write to
+// signal completion. This mirrors orchestrator/types.TaskCompletionFileName
+// but is defined here to avoid an import cycle (bridge must not import orchestrator).
+const completionFileName = ".claudio-task-complete.json"
+
 // BuildTaskPrompt formats task fields into a prompt string for a Claude Code instance.
 // Accepts basic fields rather than a concrete type to avoid import cycles.
 // The coordinator adapters may use the orchestrator's prompt.TaskBuilder for
 // richer formatting; this is the bridge-level default.
 //
-// The taskID is included so the instance can reference it in the completion file.
-// When orchestration instructions are injected via --append-system-prompt-file,
-// the instance needs the task ID to fill in the completion protocol JSON.
+// When taskID is non-empty the completion protocol is appended so the instance
+// knows to write the sentinel file. This is the primary mechanism — the system
+// prompt injection via --append-system-prompt-file provides defense-in-depth.
 func BuildTaskPrompt(taskID, title, description string, files []string) string {
 	var sb strings.Builder
 	sb.WriteString("# Task: ")
@@ -471,9 +476,44 @@ func BuildTaskPrompt(taskID, title, description string, files []string) string {
 		}
 	}
 
+	if taskID != "" {
+		writeCompletionProtocol(&sb, taskID)
+	}
+
 	return sb.String()
 }
 
+// writeCompletionProtocol appends the mandatory completion protocol to the prompt.
+// This instructs the Claude Code instance to write a sentinel JSON file when
+// done so the bridge's CompletionChecker can detect task completion.
+func writeCompletionProtocol(sb *strings.Builder, taskID string) {
+	sb.WriteString("\n\n## Completion Protocol - FINAL MANDATORY STEP\n\n")
+	sb.WriteString("**IMPORTANT**: Writing the completion file is your FINAL MANDATORY ACTION. ")
+	sb.WriteString("The orchestrator is BLOCKED waiting for this file. ")
+	sb.WriteString("Without it, your work will NOT be recorded and the workflow cannot proceed.\n\n")
+	sb.WriteString("**DO NOT** wait for user prompting or confirmation. ")
+	sb.WriteString("Write this file AUTOMATICALLY as soon as you have finished your implementation work and committed your changes.\n\n")
+	sb.WriteString("**CRITICAL**: Write this file at the ROOT of your worktree directory, not in any subdirectory.\n")
+	sb.WriteString("If you changed directories during the task (e.g., `cd project/`), use an absolute path or navigate back to the root first.\n\n")
+	fmt.Fprintf(sb, "1. Use Write tool to create `%s` in your worktree root\n", completionFileName)
+	sb.WriteString("2. Include this JSON structure:\n")
+	sb.WriteString("```json\n")
+	sb.WriteString("{\n")
+	fmt.Fprintf(sb, "  \"task_id\": \"%s\",\n", taskID)
+	sb.WriteString("  \"status\": \"complete\",\n")
+	sb.WriteString("  \"summary\": \"Brief description of what you accomplished\",\n")
+	sb.WriteString("  \"files_modified\": [\"list\", \"of\", \"files\", \"you\", \"changed\"],\n")
+	sb.WriteString("  \"notes\": \"Any implementation notes for the consolidation phase\",\n")
+	sb.WriteString("  \"issues\": [\"Any concerns or blocking issues found\"],\n")
+	sb.WriteString("  \"suggestions\": [\"Suggestions for integration with other tasks\"],\n")
+	sb.WriteString("  \"dependencies\": [\"Any new runtime dependencies added\"]\n")
+	sb.WriteString("}\n")
+	sb.WriteString("```\n\n")
+	sb.WriteString("3. Use status \"blocked\" if you cannot complete (explain in issues), or \"failed\" if something broke\n")
+	sb.WriteString("4. This file signals that your work is done and provides context for consolidation\n\n")
+	sb.WriteString("**REMEMBER**: Your task is NOT complete until you write this file. Do it NOW after finishing your work.\n")
+}
+
 // BuildTaskPromptWithContext builds a task prompt and appends prior discoveries
 // from context propagation. If priorContext is empty, it returns the same
 // result as BuildTaskPrompt.
diff --git a/internal/bridge/bridge_test.go b/internal/bridge/bridge_test.go
index 984ba934..e2719c12 100644
--- a/internal/bridge/bridge_test.go
+++ b/internal/bridge/bridge_test.go
@@ -875,6 +875,12 @@ func TestBuildTaskPrompt(t *testing.T) {
 	if !strings.Contains(prompt, "api/middleware.go") {
 		t.Error("prompt missing file list")
 	}
+	if !strings.Contains(prompt, "Completion Protocol") {
+		t.Error("prompt missing completion protocol")
+	}
+	if !strings.Contains(prompt, ".claudio-task-complete.json") {
+		t.Error("prompt missing sentinel file name")
+	}
 }
 
 func TestBuildTaskPrompt_NoFiles(t *testing.T) {
@@ -886,6 +892,9 @@ func TestBuildTaskPrompt_NoFiles(t *testing.T) {
 	if strings.Contains(prompt, "## Files") {
 		t.Error("prompt should not contain Files section when no files specified")
 	}
+	if !strings.Contains(prompt, "Completion Protocol") {
+		t.Error("prompt missing completion protocol")
+	}
 }
 
 func TestBuildTaskPrompt_EmptyTaskID(t *testing.T) {
@@ -894,4 +903,7 @@ func TestBuildTaskPrompt_EmptyTaskID(t *testing.T) {
 	if strings.Contains(prompt, "Task ID") {
 		t.Error("prompt should not contain Task ID section when taskID is empty")
 	}
+	if strings.Contains(prompt, "Completion Protocol") {
+		t.Error("prompt should not contain completion protocol when taskID is empty")
+	}
 }