diff --git a/.editorconfig b/.editorconfig
new file mode 100644
index 00000000..3ec40c8f
--- /dev/null
+++ b/.editorconfig
@@ -0,0 +1,27 @@
+# EditorConfig — consistent formatting across editors
+# https://editorconfig.org
+
+root = true
+
+[*]
+indent_style = space
+indent_size = 4
+end_of_line = lf
+charset = utf-8
+trim_trailing_whitespace = true
+insert_final_newline = true
+
+[*.rs]
+indent_size = 4
+
+[*.md]
+trim_trailing_whitespace = false
+
+[*.{yml,yaml,json,toml}]
+indent_size = 2
+
+[*.sh]
+indent_size = 2
+
+[Makefile]
+indent_style = tab
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index a0becaa3..97c620e7 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -5,12 +5,20 @@ on:
branches: [main]
paths:
- "flowctl/**"
+ - "scripts/**"
+ - "skills/**/*.md"
+ - "agents/**/*.md"
- ".github/workflows/ci.yml"
+ - ".markdownlint.json"
pull_request:
branches: [main]
paths:
- "flowctl/**"
+ - "scripts/**"
+ - "skills/**/*.md"
+ - "agents/**/*.md"
- ".github/workflows/ci.yml"
+ - ".markdownlint.json"
env:
CARGO_TERM_COLOR: always
@@ -53,6 +61,66 @@ jobs:
- name: Clippy
run: cargo clippy --all-targets -- -D warnings
+ smoke-test:
+ name: Shell Smoke Tests
+ runs-on: ubuntu-latest
+ needs: [test]
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Install Rust toolchain
+ uses: dtolnay/rust-toolchain@stable
+
+ - name: Cache cargo registry & build
+ uses: actions/cache@v4
+ with:
+ path: |
+ ~/.cargo/registry
+ ~/.cargo/git
+ flowctl/target
+ key: ${{ runner.os }}-cargo-${{ hashFiles('flowctl/Cargo.lock') }}
+ restore-keys: |
+ ${{ runner.os }}-cargo-
+
+ - name: Build flowctl
+ run: cd flowctl && cargo build --release
+
+ - name: Run smoke tests
+ run: bash scripts/smoke_test.sh
+
+ security-audit:
+ name: Dependency Security Audit
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Install Rust toolchain
+ uses: dtolnay/rust-toolchain@stable
+
+ - name: Install cargo-audit
+ run: cargo install cargo-audit --locked
+
+ - name: Run cargo audit
+ working-directory: flowctl
+ run: cargo audit
+
+ markdown-lint:
+ name: Markdown Lint
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Setup Node.js
+ uses: actions/setup-node@v4
+ with:
+ node-version: "20"
+
+ - name: Install markdownlint-cli2
+ run: npm install -g markdownlint-cli2
+
+ - name: Lint skill and agent markdown
+ run: markdownlint-cli2 "skills/**/*.md" "agents/**/*.md"
+
coverage:
name: Test Coverage
runs-on: ubuntu-latest
diff --git a/.markdownlint.json b/.markdownlint.json
new file mode 100644
index 00000000..3cc783d3
--- /dev/null
+++ b/.markdownlint.json
@@ -0,0 +1,15 @@
+{
+ "default": true,
+ "MD013": {
+ "line_length": 300,
+ "code_blocks": false,
+ "tables": false,
+ "headings": false
+ },
+ "MD024": {
+ "siblings_only": true
+ },
+ "MD033": false,
+ "MD041": false,
+ "MD046": false
+}
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 00000000..b336ec12
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,23 @@
+# Changelog
+
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
+
+## [0.1.31] - 2026-04-07
+
+### Added
+- Anti-rationalization sections to 4 core skills (plan, work, impl-review, epic-review)
+- Adversarial review iteration 2 findings addressed
+- Simplify-ignore hook improvements from adversarial review
+- Borrow agent skills patterns and anti-patterns documentation
+- README overhaul with architecture diagram, skill inventory table, and streamlined quick start
+- CHANGELOG.md (this file)
+
+### Changed
+- skill-create updated to enforce skill-anatomy.md template
+- README.md restructured for clarity: badges, one-liner, install, quick start, full skill inventory, architecture, commands reference, contributing, and license sections
+
+## [0.1.30] and earlier
+
+See [git log](https://github.com/z23cc/flow-code/commits/main) for full history.
diff --git a/README.md b/README.md
index b79691ef..4209c7ee 100644
--- a/README.md
+++ b/README.md
@@ -1,1878 +1,242 @@
-**[English](README.md)** | **[中文](README_CN.md)**
-
# Flow-Code
-[](../../LICENSE)
+[](CHANGELOG.md)
+[](LICENSE)
[](https://claude.ai/code)
+[](CHANGELOG.md)
-[](../../CHANGELOG.md)
-
-[](../../CHANGELOG.md)
-
-**A production-grade harness for Claude Code. Full-auto development from idea to PR.**
-
-**Zero external dependencies. Zero questions asked.**
+**Full-auto development from idea to PR. Zero dependencies. Zero questions asked.**
---
-### What is Harness Engineering?
-
-> *"The model is commodity; the harness is moat."* — [Anthropic](https://www.anthropic.com/engineering/effective-harnesses-for-long-running-agents), [OpenAI](https://openai.com/index/harness-engineering/), [Mitchell Hashimoto](https://mitchellh.com/writing/my-ai-adoption-journey)
-
-A **harness** wraps around an AI coding agent to handle everything the model can't do alone: state management, context bridging, quality gates, multi-agent coordination, and error recovery. Flow-Code is a complete harness for Claude Code.
-
-### How Flow-Code Compares
-
-| Capability | Flow-Code | [compound-engineering](https://github.com/EveryInc/compound-engineering-plugin) (12.5K⭐) | [claude-mem](https://github.com/thedotmack/claude-mem) (44K⭐) | [superpowers](https://github.com/anthropics/claude-plugins-official) |
-|---|---|---|---|---|
-| Task DAG + state machine | ✅ 37 commands, deps, split/skip | ❌ | ❌ | ❌ |
-| Parallel Teams + file lock | ✅ Agent Teams, atomic locks | ❌ | ❌ | ✅ parallel agents (no locks) |
-| Three-layer quality | ✅ guard + RP + Codex adversarial | ❌ | ❌ | ❌ |
-| Runtime DAG mutation | ✅ split/skip/dep rm mid-execution | ❌ | ❌ | ❌ |
-| Cross-model adversarial review | ✅ GPT tries to break Claude's code | ❌ | ❌ | ❌ |
-| Full-auto (zero questions) | ✅ AI decides branch/review/depth | ❌ | ❌ | ❌ |
-| Context preservation | ✅ PreCompact hook | ❌ | ✅ embedding + RAG | ❌ |
-| Auto draft PR | ✅ | ❌ | ❌ | ❌ |
-| Zero dependencies | ✅ single Rust binary + Bash skills | ❌ Node.js | ❌ ChromaDB | ❌ Node.js |
-
----
-
-## Table of Contents
-
-- [What Is This?](#what-is-this)
-- [Why It Works](#why-it-works)
-- [Quick Start](#quick-start) — Install, setup, use
-- [When to Use What](#when-to-use-what) — Interview vs Plan vs Work
-- [Agent Readiness Assessment](#agent-readiness-assessment) — `/flow-code:prime`
-- [Troubleshooting](#troubleshooting)
-- [Codebase Map](#codebase-map) — Architecture documentation via parallel subagents
-- [Auto-Improve](#auto-improve-autonomous-optimization) — Autonomous code optimization
-- [Ralph (Autonomous Mode)](#ralph-autonomous-mode) — Run overnight
-- [Features](#features) — Re-anchoring, multi-user, reviews, dependencies
-- [Commands](#commands) — All slash commands + flags
- - [Command Reference](#command-reference) — Detailed input docs for each command
-- [The Workflow](#the-workflow) — Planning and work phases
-- [.flow/ Directory](#flow-directory) — File structure
-- [flowctl CLI](#flowctl-cli) — Direct CLI usage
-
----
-
-## What Is This?
-
-Flow-Code is a **harness engineering framework** for Claude Code. One command goes from idea to draft PR — planning, parallel implementation, three-layer quality gates, and cross-model adversarial review, all fully automated.
+Flow-Code is a harness engineering plugin for Claude Code. One command takes you from an idea to a draft PR -- planning, parallel implementation, three-layer quality gates, and cross-model adversarial review, all fully automated.
```
/flow-code:plan "Add OAuth login"
- → AI research (adaptive scouts)
- → RP plan-review (code-aware)
- → Teams parallel workers (file locking)
- → guard per-commit (Layer 1)
- → Codex adversarial (Layer 3: GPT tries to break it)
- → auto push + draft PR
-```
-
-Everything lives in your repo as `.flow/` state. No external services. No global config. Single Rust binary + Bash skills. Uninstall: delete `.flow/`.
-
-
-
- |
- |
-
-
-| Planning: dependency-ordered tasks |
-Execution: fixes, evidence, review |
-
-
-
----
-
-## Epic-first task model
-
-Flow-Code does not support standalone tasks.
-
-Every unit of work belongs to an epic fn-N (even if it's a single task).
-
-Tasks are always fn-N.M and inherit context from the epic spec.
-
-Flow-Code always creates an epic container (even for one-offs) so every task has a durable home for context, re-anchoring, and automation. You never have to think about it.
-
-Rationale: keeps the system simple, improves re-anchoring, makes automation (Ralph) reliable.
-
-"One-off request" -> epic with one task.
-
----
-
-## Why It Works
-
-### Full-Auto by Default
-
-Say one sentence. Flow-Code plans, implements, tests, commits, and opens a draft PR — zero questions asked. AI reads git state and `.flow/` config to make all decisions (branch, review backend, research depth) autonomously.
-
-```bash
-# Full auto: plan → implement → test → commit → draft PR
-/flow-code:plan "add OAuth support"
-
-# Resume anytime — reads .flow state and continues from where it left off
-/flow-code:work fn-1
-
-# One task at a time for maximum control
-/flow-code:work fn-1.1
+ -> AI research (adaptive scouts)
+ -> RP plan-review (code-aware)
+ -> Teams parallel workers (file locking)
+ -> guard per-commit (Layer 1)
+ -> Codex adversarial (Layer 3: GPT tries to break it)
+ -> auto push + draft PR
```
-All modes get: re-anchoring before each task, evidence recording, file locking, cross-model review (if rp-cli available), and auto push + draft PR on completion.
-
-**Default: Teams mode** — Ready tasks (no unresolved dependencies) are automatically spawned as parallel Agent Team workers with file locking and SendMessage coordination. Single tasks run as a foreground worker with zero overhead. After each wave completes, a structured **Wave Checkpoint** runs: aggregate results, verify integration (guards + invariants), output a summary, then plan the next wave. Newly unblocked tasks become ready for the next batch.
-
-Workers also use **file-level Wave parallelism** within each task — when touching 3+ files, they issue parallel reads in one message, analyze dependencies at a checkpoint, then issue parallel edits. This achieves 3-4x speedup over sequential file I/O.
-
-**Three-layer review timing**: Layer 1 (guard) runs per-commit automatically. Layer 2 (RP plan-review) runs once during planning. Layer 3 (Codex adversarial) runs once when all tasks complete. No per-task review overhead — quality gates are at the right level.
-
-### No Context Length Worries
-
-- **Tasks sized at planning:** Every task is scoped to fit one work iteration
-- **Re-anchor every task:** Fresh context from `.flow/` specs before each task
-- **Survives compaction:** Re-anchors after conversation summarization too
-- **Fresh context in Ralph:** Each iteration starts with a clean context window
-
-Never worry about 200K token limits again.
-
-### Three-Layer Quality System
-
-Each layer catches different types of problems. No overlap, no waste:
-
-| Layer | Tool | When | What it catches |
-|-------|------|------|----------------|
-| **1. Guard** | `flowctl guard` (lint/type/test) | Every commit | Syntax, types, test failures |
-| **2. RP Plan-Review** | RepoPrompt context_builder | Plan phase | Spec-code inconsistency (RP sees full codebase) |
-| **3. Codex Adversarial** | `flowctl codex adversarial` | Epic completion | Security, concurrency, edge cases (different model family) |
-
-Guard is deterministic. RP validates against existing code. Codex (GPT) tries to **break** what Claude built — different architectures have different blind spots.
-
----
-
-### Zero Friction
-
-- **Works in 30 seconds.** Install the plugin, run a command. No setup.
-- **Non-invasive.** No CLAUDE.md edits. No daemons. (Ralph uses plugin hooks for enforcement.)
-- **Clean uninstall.** Delete `.flow/` (and `scripts/ralph/` if enabled).
-- **Multi-user safe.** Teams work parallel branches without coordination servers.
-
----
+Everything lives in your repo as `.flow/` state. No external services. No global config. Single Rust binary + Markdown skills.
-## Quick Start
-
-### 1. Install
+## Install
```bash
-# Add marketplace
/plugin marketplace add https://github.com/z23cc/flow-code
-
-# Install flow-code
/plugin install flow-code
```
-### 2. Setup (Recommended)
-
-```bash
-/flow-code:setup
-```
-
-This is technically optional but **highly recommended**. It:
-- **Configures review backend** (RepoPrompt, Codex, or none) — required for cross-model reviews
-- Copies `flowctl` to `.flow/bin/` for direct CLI access
-- Adds flow-code instructions to CLAUDE.md/AGENTS.md (helps other AI tools understand your project)
-- Creates `.flow/usage.md` with full CLI reference
+**Requirements:** [Claude Code](https://docs.anthropic.com/en/docs/claude-code), git. Optional: [RepoPrompt](https://repoprompt.com) (macOS) or [Codex CLI](https://github.com/openai/codex) for cross-model reviews.
-**Idempotent** - safe to re-run. Detects plugin updates and refreshes scripts automatically.
-
-After setup:
-```bash
-export PATH=".flow/bin:$PATH"
-flowctl --help
-flowctl epics # List all epics
-flowctl tasks --epic fn-1 # List tasks for epic
-flowctl ready --epic fn-1 # What's ready to work on
-```
-
-### 3. Use
+## Quick Start
```bash
-# Spec: "create a spec for X" — writes epic with structured requirements
-# Then plan or interview to refine
-
-# Plan: research, create epic with tasks
-/flow-code:plan Add a contact form with validation
+# 1. Plan a feature (auto-executes by default)
+/flow-code:plan "add a contact form with validation"
-# Work: execute tasks in dependency order
+# 2. Or plan-only, then work separately
+/flow-code:plan "add OAuth support" --plan-only
/flow-code:work fn-1
-# Or work directly from a spec file (creates epic automatically)
-/flow-code:work docs/my-feature-spec.md
-```
-
-That's it. Flow-Code handles research, task ordering, reviews, and audit trails.
-
-### When to Use What
-
-Flow-next is flexible. There's no single "correct" order — the right sequence depends on how well-defined your spec already is.
-
-**The key question: How fleshed out is your idea?**
-
-#### Spec-driven (recommended for new features)
-
-```
-Create spec → Interview or Plan → Work
-```
-
-1. **Create spec** — ask Claude to "create a spec for X". This creates an epic with a structured spec (goal, architecture, API contracts, edge cases, acceptance criteria, boundaries, decision context) — no tasks yet
-2. **Refine or plan**:
- - `/flow-code:interview fn-1` — deep Q&A to pressure-test the spec, surface gaps
- - `/flow-code:plan fn-1` — research best practices + break into tasks
-3. **Work** — `/flow-code:work fn-1` executes with re-anchoring and reviews
-
-Best for: features where you want to nail down the WHAT/WHY before committing to HOW. The spec captures everything an implementer needs.
-
-#### Vague idea or rough concept
-
-```
-Interview → Plan → Work
-```
-
-1. **Interview first** — `/flow-code:interview "your rough idea"` asks 40+ deep questions to surface requirements, edge cases, and decisions you haven't thought about
-2. **Plan** — `/flow-code:plan fn-1` takes the refined spec and researches best practices, current docs, repo patterns, then splits into properly-sized tasks
-3. **Work** — `/flow-code:work fn-1` executes with re-anchoring and reviews
-
-#### Well-written spec or PRD
-
-```
-Plan → Interview → Work
-```
-
-1. **Plan first** — `/flow-code:plan specs/my-feature.md` researches best practices and current patterns, then breaks your spec into epic + tasks
-2. **Interview after** — `/flow-code:interview fn-1` runs deep questions against the plan to catch edge cases, missing requirements, or assumptions
-3. **Work** — `/flow-code:work fn-1` executes
-
-#### Minimal planning
-
-```
-Plan → Work
-```
-
-Skip interview entirely for well-understood changes. Plan still researches best practices and splits into tasks.
-
-#### Quick single-task (spec already complete)
-
-```
-Work directly
-```
-
-```bash
-/flow-code:work specs/small-fix.md
-```
-
-For small, self-contained changes where you already have a complete spec. Creates an epic with **one task** and executes immediately. You get flow tracking, re-anchoring, and optional review — without full planning overhead.
-
-Best for: bug fixes, small features, well-scoped changes that don't need task splitting.
-
-**Note:** This does NOT split into multiple tasks. For detailed specs that need breakdown, use Plan first.
-
-**Summary:**
-
-| Starting point | Recommended sequence |
-|----------------|---------------------|
-| New feature, want solid spec first | Spec → Interview/Plan → Work |
-| Vague idea, rough notes | Interview → Plan → Work |
-| Detailed spec/PRD | Plan → Interview → Work |
-| Well-understood, needs task splitting | Plan → Work |
-| Small single-task, spec complete | Work directly (creates 1 epic + 1 task) |
-
-**Spec vs Interview vs Plan:**
-- **Spec** (just ask "create a spec") creates an epic with structured requirements (goal, architecture, API contracts, edge cases, acceptance criteria, boundaries). No tasks, no codebase research.
-- **Interview** refines an epic via deep Q&A (40+ questions). Writes back to the epic spec only — no tasks.
-- **Plan** researches best practices, analyzes existing patterns, and creates sized tasks with dependencies.
-
-You can always run interview again after planning to catch anything missed. Interview writes back to the epic spec only — it won't modify existing tasks.
-
----
-
-## Agent Readiness Assessment
-
-> Inspired by [Factory.ai's Agent Readiness framework](https://factory.ai/news/agent-readiness)
-
-`/flow-code:prime` assesses your codebase for agent-readiness and proposes improvements. Works for greenfield and brownfield projects.
-
-### The Problem
-
-Agents waste cycles when codebases lack:
-- **Pre-commit hooks** → waits 10min for CI instead of 5sec local feedback
-- **Documented env vars** → guesses, fails, guesses again
-- **CLAUDE.md** → doesn't know project conventions
-- **Test commands** → can't verify changes work
-
-These are **environment problems**, not agent problems. Prime helps fix them.
-
-### Quick Start
-
-```bash
-/flow-code:prime # Full assessment + interactive fixes
-/flow-code:prime --report-only # Just show the report
-/flow-code:prime --fix-all # Apply all fixes without asking
-```
-
-### The Eight Pillars
-
-Prime evaluates your codebase across eight pillars (48 criteria total):
-
-#### Agent Readiness (Pillars 1-5) — Scored, Fixes Offered
-
-| Pillar | What It Checks |
-|--------|----------------|
-| **1. Style & Validation** | Linters, formatters, type checking, pre-commit hooks |
-| **2. Build System** | Build tool, commands, lock files, monorepo tooling |
-| **3. Testing** | Test framework, commands, verification, coverage, E2E |
-| **4. Documentation** | README, CLAUDE.md, setup docs, architecture |
-| **5. Dev Environment** | .env.example, Docker, devcontainer, runtime version |
-
-#### Production Readiness (Pillars 6-8) — Reported Only
-
-| Pillar | What It Checks |
-|--------|----------------|
-| **6. Observability** | Structured logging, tracing, metrics, error tracking, health endpoints |
-| **7. Security** | Branch protection, secret scanning, CODEOWNERS, Dependabot |
-| **8. Workflow & Process** | CI/CD, PR templates, issue templates, release automation |
-
-**Two-tier approach**: Pillars 1-5 determine your agent maturity level and are eligible for fixes. Pillars 6-8 are reported for visibility but no fixes are offered — these are team/production decisions.
-
-### Maturity Levels
-
-| Level | Name | Description | Overall Score |
-|-------|------|-------------|---------------|
-| 1 | Minimal | Basic project structure only | <30% |
-| 2 | Functional | Can build and run, limited docs | 30-49% |
-| 3 | **Standardized** | Agent-ready for routine work | 50-69% |
-| 4 | Optimized | Fast feedback loops, comprehensive docs | 70-84% |
-| 5 | Autonomous | Full autonomous operation capable | 85%+ |
-
-**Level 3 is the target** for most teams. It means agents can handle routine work: bug fixes, tests, docs, dependency updates.
-
-### How It Works
-
-1. **Parallel Assessment** — 9 haiku scouts run in parallel (~15-20 seconds):
-
- Agent Readiness scouts:
- - `tooling-scout` — linters, formatters, pre-commit, type checking
- - `claude-md-scout` — CLAUDE.md/AGENTS.md analysis
- - `env-scout` — environment setup
- - `testing-scout` — test infrastructure
- - `build-scout` — build system
- - `docs-gap-scout` — README, ADRs, architecture docs
-
- Production Readiness scouts:
- - `observability-scout` — logging, tracing, metrics, health endpoints
- - `security-scout` — GitHub API checks, CODEOWNERS, Dependabot
- - `workflow-scout` — CI/CD, templates, automation
-
-2. **Verification** — Verifies test commands actually work (e.g., `pytest --collect-only`)
-
-3. **Synthesize Report** — Calculates Agent Readiness score, Production Readiness score, and maturity level
-
-4. **Interactive Remediation** — Uses `AskUserQuestion` for agent readiness fixes only:
- ```
- Which tooling improvements should I add?
- ☐ Add pre-commit hooks (Recommended)
- ☐ Add linter config
- ☐ Add runtime version file
- ```
-
-5. **Apply Fixes** — Creates/modifies files based on your selections
-
-6. **Re-assess** — Optionally re-run to show improvement
-
-### Example Report
-
-```markdown
-# Agent Readiness Report
-
-**Repository**: my-project
-**Assessed**: 2026-01-23
-
-## Scores Summary
-
-| Category | Score | Level |
-|----------|-------|-------|
-| **Agent Readiness** (Pillars 1-5) | 73% | Level 4 - Optimized |
-| Production Readiness (Pillars 6-8) | 17% | — |
-| **Overall** | 52% | — |
-
-## Agent Readiness (Pillars 1-5)
-
-| Pillar | Score | Status |
-|--------|-------|--------|
-| Style & Validation | 67% (4/6) | ⚠️ |
-| Build System | 100% (6/6) | ✅ |
-| Testing | 67% (4/6) | ⚠️ |
-| Documentation | 83% (5/6) | ✅ |
-| Dev Environment | 83% (5/6) | ✅ |
-
-## Production Readiness (Pillars 6-8) — Report Only
-
-| Pillar | Score | Status |
-|--------|-------|--------|
-| Observability | 33% (2/6) | ❌ |
-| Security | 17% (1/6) | ❌ |
-| Workflow & Process | 0% (0/6) | ❌ |
-
-## Top Recommendations (Agent Readiness)
-
-1. **Tooling**: Add pre-commit hooks — 5 sec feedback vs 10 min CI wait
-2. **Tooling**: Add Python type checking — catch errors locally
-3. **Docs**: Update README — replace generic template
-```
-
-### Remediation Templates
-
-Prime offers fixes for agent readiness gaps (**not** team governance):
-
-| Fix | What Gets Created |
-|-----|-------------------|
-| CLAUDE.md | Project overview, commands, structure, conventions |
-| .env.example | Template with detected env vars |
-| Pre-commit (JS) | Husky + lint-staged config |
-| Pre-commit (Python) | `.pre-commit-config.yaml` |
-| Linter config | ESLint, Biome, or Ruff config (if none exists) |
-| Formatter config | Prettier or Biome config (if none exists) |
-| .nvmrc/.python-version | Runtime version pinning |
-| .gitignore entries | .env, build outputs, node_modules |
-
-Templates adapt to your project's detected conventions and existing tools. Won't suggest ESLint if you have Biome, etc.
-
-### User Consent Required
-
-**By default, prime asks before every change** using interactive checkboxes. You choose what gets created.
-
-- **Asks first** — uses `AskUserQuestion` tool for interactive selection per category
-- **Never overwrites** existing files without explicit consent
-- **Never commits** changes (leaves for you to review)
-- **Never deletes** files
-- **Merges** with existing configs when possible
-- **Respects** your existing tools (won't add ESLint if you have Biome)
-
-Use `--fix-all` to skip questions and apply everything. Use `--report-only` to just see the assessment.
-
-### Flags
-
-| Flag | Description |
-|------|-------------|
-| `--report-only` | Skip remediation, just show report |
-| `--fix-all` | Apply all recommendations without asking |
-| `` | Assess a different directory |
-
----
-
-### Full-Auto vs Interactive
-
-By default, everything is autonomous. Use `--interactive` only when you want to pause between tasks.
-
-| Mode | Trigger | Behavior |
-|------|---------|----------|
-| **Full-auto** (default) | `/flow-code:plan "idea"` | Plan → work → review → PR, zero questions |
-| **Interactive** | `--interactive` flag | Pauses after each task for human confirmation |
-| **Ralph** (multi-session) | `scripts/ralph/ralph.sh` | Fresh context per iteration, overnight runs |
-
-For large epics (>10 tasks), Ralph provides fresh context per session. See [Ralph Mode](#ralph-autonomous-mode) for setup.
-
----
-
-## Troubleshooting
-
-### Reset a stuck task
-
-```bash
-# Check task status
-flowctl show fn-1.2 --json
-
-# Reset to todo (from done/blocked)
-flowctl task reset fn-1.2
-
-# Reset + dependents in same epic
-flowctl task reset fn-1.2 --cascade
-```
-
-### Clean up `.flow/` safely
-
-Run manually in terminal (not via AI agent):
-
-```bash
-# Remove all flow state (keeps git history)
-rm -rf .flow/
-
-# Re-initialize
-flowctl init
-```
-
-### Debug Ralph runs
-
-```bash
-# Check run progress
-cat scripts/ralph/runs/*/progress.txt
-
-# View iteration logs
-ls scripts/ralph/runs/*/iter-*.log
-
-# Check for blocked tasks
-ls scripts/ralph/runs/*/block-*.md
-```
-
-### Receipt validation failing
-
-```bash
-# Check receipt exists
-ls scripts/ralph/runs/*/receipts/
+# 3. Resume anytime -- reads .flow state and continues
+/flow-code:work fn-1
-# Verify receipt format
-cat scripts/ralph/runs/*/receipts/impl-fn-1.1.json
-# Must have: {"type":"impl_review","id":"fn-1.1",...}
+# 4. Optional setup for review backends + local CLI
+/flow-code:setup
```
-### Custom rp-cli instructions conflicting
-
-> **Caution**: If you have custom instructions for `rp-cli` in your `CLAUDE.md` or `AGENTS.md`, they may conflict with Flow-Code's RepoPrompt integration.
+That's it. Flow-Code handles research, task ordering, parallel execution, reviews, and opens a draft PR when done.
-Flow-Code's plan-review and impl-review skills include specific instructions for `rp-cli` usage (window selection, builder workflow, chat commands). Custom rp-cli instructions can override these and cause unexpected behavior.
-
-**Symptoms:**
-- Reviews not using the correct RepoPrompt window
-- Builder not selecting expected files
-- Chat commands failing or behaving differently
-
-**Fix:** Remove or comment out custom rp-cli instructions from your `CLAUDE.md`/`AGENTS.md` when using Flow-Code reviews. The plugin provides complete rp-cli guidance.
-
----
+## Architecture
-## Codebase Map
+```mermaid
+graph TD
+ A["/flow-code:plan"] --> B["Research Scouts
(parallel subagents)"]
+ B --> C[".flow/ epic + tasks + deps"]
+ C --> D["/flow-code:work"]
+ D --> E["Teams Mode
(parallel workers + file locking)"]
+ E --> F["Layer 1: guard
(lint/type/test per commit)"]
+ F --> G["Layer 3: Codex adversarial
(GPT tries to break it)"]
+ G --> H["Auto push + draft PR"]
-Generate comprehensive architecture documentation using parallel Sonnet subagents.
+ I["/flow-code:plan-review"] -.-> C
+ J["/flow-code:impl-review"] -.-> F
+ K["/flow-code:epic-review"] -.-> G
-```bash
-/flow-code:map
+ style A fill:#4a9eff,color:#fff
+ style D fill:#4a9eff,color:#fff
+ style H fill:#2ecc71,color:#fff
```
-Creates `docs/CODEBASE_MAP.md` with:
-- Architecture diagram (Mermaid)
-- Module guide (purpose, exports, dependencies per file)
-- Data flow diagrams
-- Conventions and gotchas
-- Navigation guide ("To add an API endpoint: touch these files")
+**Core engine:** `flowctl` is a Rust binary with libSQL storage. Skills and agents are Markdown files loaded by Claude Code's Skill tool. No Node.js, no npm, no external services.
-**How it works:**
-1. Scans file tree with token counts (respects .gitignore)
-2. Splits work into ~150k token chunks
-3. Spawns Sonnet subagents in parallel to analyze each chunk
-4. Synthesizes reports into a single map document
-
-**Update mode** — re-run to update only changed modules:
-```bash
-/flow-code:map --update
```
-
-**Integrated with flow-code workflow:**
-- `repo-scout` reads the map first during planning (faster, more accurate)
-- `auto-improve` reads the map before each experiment (better context)
-- `context-scout` benefits from architecture overview
-
-Based on [Cartographer](https://github.com/kingbootoshi/cartographer) (MIT).
-
----
-
-## Auto-Improve (Autonomous Optimization)
-
-> Inspired by [Karpathy's autoresearch](https://github.com/karpathy/autoresearch) — 700 experiments in 2 days, 19% performance gain at Shopify.
-
-One command to start autonomous code improvement. Auto-detects project type, guard commands, and runs immediately.
-
-```bash
-/flow-code:auto-improve "fix N+1 queries and add missing tests" --scope src/
+commands/flow-code/*.md -> Slash command definitions (user entry points)
+skills/*/SKILL.md -> Skill implementations (24 skills)
+agents/*.md -> Subagent definitions (24 agents)
+bin/flowctl -> Rust binary (built from flowctl/ workspace)
+hooks/hooks.json -> Ralph workflow guards (active when FLOW_RALPH=1)
```
-That's it. Flow-Code detects your project (Django/React/Next.js), finds lint+test commands, creates an experiment branch, and starts improving. Each experiment: discover → implement → test → keep or discard.
-
-**More examples:**
-```bash
-# Next.js bundle optimization
-/flow-code:auto-improve "reduce bundle size" --scope src/components/ --max 20
-
-# Security hardening
-/flow-code:auto-improve "fix security vulnerabilities" --scope src/api/ src/auth/
-
-# Test coverage
-/flow-code:auto-improve "improve test coverage to 80%"
-
-# Watch mode (see what agent is doing)
-/flow-code:auto-improve "optimize API performance" --scope src/ --watch
-```
+## Commands
-**How it works:**
-```
-for each experiment (up to --max, default 50):
- 1. Agent reads code + previous experiments (learns from history)
- 2. Discovers ONE improvement opportunity
- 3. Writes test first (TDD style)
- 4. Implements minimal change (scope-restricted)
- 5. Runs guard (auto-detected lint + tests must pass)
- 6. Judges: keep (git commit) or discard (git reset)
- 7. Logs to experiments.jsonl → summary.md at end
-```
+| Command | What It Does |
+|---------|--------------|
+| `/flow-code:plan ` | Research codebase, create epic with dependency-ordered tasks |
+| `/flow-code:work ` | Execute epic/task/spec file with re-anchoring before each task |
+| `/flow-code:interview ` | Deep Q&A (40+ questions) to refine a spec before planning |
+| `/flow-code:plan-review ` | Carmack-level plan review via RepoPrompt or Codex |
+| `/flow-code:impl-review` | Carmack-level implementation review of current branch |
+| `/flow-code:epic-review ` | Verify implementation matches spec before closing |
+| `/flow-code:debug` | Systematic debugging: root cause investigation |
+| `/flow-code:prime` | Assess codebase agent-readiness, propose fixes |
+| `/flow-code:sync ` | Update downstream task specs after implementation drift |
+| `/flow-code:retro` | Post-epic retrospective: lessons learned |
+| `/flow-code:ralph-init` | Scaffold autonomous Ralph harness |
+| `/flow-code:django` | Django-specific patterns, security, testing |
+| `/flow-code:skill-create` | Create new flow-code skills |
+| `/flow-code:setup` | Install flowctl locally + configure review backend |
+| `/flow-code:uninstall` | Remove flow-code from project |
+
+**Flags:** All commands accept flags (`--research=rp|grep`, `--review=rp|codex|none`, `--branch=current|new|worktree`, `--interactive`, `--tdd`, `--plan-only`, `--no-pr`). Natural language also works: `/flow-code:plan Add webhooks, use context-scout, skip review`.
+
+## Skill Inventory
+
+### Core Skills (8)
+
+| Skill | Command | Purpose |
+|-------|---------|---------|
+| `flow-code` | `/flow-code` | Task/epic management (list, create, status) |
+| `flow-code-plan` | `/flow-code:plan` | Create structured build plans from descriptions |
+| `flow-code-work` | `/flow-code:work` | Execute plans with Teams mode (parallel workers + file locking) |
+| `flow-code-plan-review` | `/flow-code:plan-review` | Carmack-level plan review via RepoPrompt or Codex |
+| `flow-code-impl-review` | `/flow-code:impl-review` | Post-implementation code review |
+| `flow-code-epic-review` | `/flow-code:epic-review` | Final review before closing an epic |
+| `flow-code-setup` | `/flow-code:setup` | Install flowctl CLI and configure project |
+| `flow-code-map` | `/flow-code:map` | Generate codebase architecture maps via parallel subagents |
+
+### Extension Skills -- Development (4)
+
+| Skill | Command | Purpose |
+|-------|---------|---------|
+| `flow-code-debug` | `/flow-code:debug` | Systematic debugging with root cause investigation |
+| `flow-code-auto-improve` | `/flow-code:auto-improve` | Autonomous code quality improvement loops |
+| `flow-code-django` | `/flow-code:django` | Django-specific patterns, security, and testing |
+| `flow-code-deps` | `/flow-code:deps` | Dependency graph visualization and execution order |
+
+### Extension Skills -- Workflow (4)
+
+| Skill | Command | Purpose |
+|-------|---------|---------|
+| `flow-code-interview` | `/flow-code:interview` | Refine specs through structured Q&A (40+ questions) |
+| `flow-code-sync` | `/flow-code:sync` | Sync downstream task specs after implementation drift |
+| `flow-code-retro` | `/flow-code:retro` | Post-epic retrospective and lessons learned |
+| `flow-code-prime` | `/flow-code:prime` | Assess codebase readiness for agent work |
+
+### Extension Skills -- Tooling (8)
+
+| Skill | Command | Purpose |
+|-------|---------|---------|
+| `flow-code-ralph-init` | `/flow-code:ralph-init` | Scaffold autonomous Ralph harness |
+| `flow-code-loop-status` | `/flow-code:loop-status` | Monitor running Ralph/auto-improve loops |
+| `flow-code-worktree-kit` | `/flow-code:worktree-kit` | Git worktree management for parallel work |
+| `flow-code-export-context` | `/flow-code:export-context` | Export context for external model review |
+| `flow-code-rp-explorer` | `/flow-code:rp-explorer` | RepoPrompt-powered codebase exploration |
+| `flow-code-skill-create` | `/flow-code:skill-create` | Create new flow-code skills |
+| `flow-code-prompt-eng` | Internal | Prompt engineering guidance for review agents |
+| `browser` | `/browser` | Browser automation via agent-browser CLI |
+
+## How It Works
-**What's auto-detected:**
+### Full-Auto by Default
-| Project | Guard command |
-|---------|--------------|
-| Django + ruff | `ruff check . && python -m pytest -x -q` |
-| Django + pytest | `python -m pytest -x -q` |
-| Next.js/React | `npm run lint && npm test` |
-| No tests found | Warning — set `GUARD_CMD` in config.env |
+Say one sentence. Flow-Code plans, implements, tests, commits, and opens a draft PR -- zero questions asked. AI reads git state and `.flow/` config to make all decisions autonomously.
-**Customization:**
-- `scripts/auto-improve/program.md` — edit to change improvement focus and judgment criteria
-- `scripts/auto-improve/config.env` — override goal, scope, guard, max experiments
+**Default mode: Teams + Phase-Gate.** Ready tasks are spawned as parallel Agent Team workers with file locking and SendMessage coordination. After each wave, a structured checkpoint verifies integration before the next batch.
-**Output:**
-- `experiments.jsonl` — every experiment logged (hypothesis, result, commit)
-- `summary.md` — generated at end with kept/discarded/crashed counts
-- Kept improvements committed on `auto-improve/` branch
+### Three-Layer Quality System
-**Using with Codex CLI:**
-```bash
-# Set CLAUDE_BIN to use Codex instead of Claude
-CLAUDE_BIN=codex scripts/auto-improve/auto-improve.sh
+Each layer catches different types of problems:
-# Or set in config.env for persistent use
-# CLAUDE_BIN=codex
-# AUTO_IMPROVE_CODEX_MODEL=gpt-5.4
-```
+| Layer | Tool | When | What It Catches |
+|-------|------|------|----------------|
+| **1. Guard** | `flowctl guard` | Every commit | Syntax, types, test failures |
+| **2. RP Plan-Review** | RepoPrompt context_builder | Plan phase | Spec-code inconsistency |
+| **3. Codex Adversarial** | `flowctl codex adversarial` | Epic completion | Security, concurrency, edge cases |
-Auto-improve auto-detects the CLI type and uses the correct flags (Claude: `-p --output-format stream-json`, Codex: `-q --full-auto`).
+Guard is deterministic. RP validates against existing code. Codex (GPT) tries to **break** what Claude built -- different model families have different blind spots.
-**Ralph vs Auto-Improve:**
-| | Ralph | Auto-Improve |
-|---|---|---|
-| Purpose | Execute planned tasks | Explore & optimize |
-| Input | Epic with spec + tasks | Goal + scope |
-| Approach | Follow plan exactly | Discover improvements |
-| Output | Completed features | Incremental code improvements |
-| When | You know WHAT to build | You want code to get BETTER |
+### Re-Anchoring
----
+Before every task, Flow-Code re-reads epic spec, task spec, and git state from `.flow/`. No hallucinated scope creep, no forgotten requirements. Survives context compaction.
-## Uninstall
+### Ralph (Autonomous Mode)
-Run manually in terminal (DCG blocks these from AI agents):
+Ralph is the repo-local autonomous loop for overnight runs. Fresh context per iteration, multi-model review gates, receipt-based gating, and scope freeze for safety.
```bash
-rm -rf .flow/ # Core flow state
-rm -rf scripts/ralph/ # Ralph (if enabled)
+/flow-code:ralph-init # Scaffold (one-time)
+scripts/ralph/ralph_once.sh # One iteration (observe)
+scripts/ralph/ralph.sh # Full loop (AFK)
+scripts/ralph/ralph.sh --watch # Stream tool calls in real-time
```
-Or use `/flow-code:uninstall` which cleans up docs and prints commands to run.
+### Cross-Model Reviews
----
+Two models catch what one misses. Reviews use a second model (RepoPrompt or Codex CLI) to verify plans and implementations. Carmack-level criteria: Completeness, Feasibility, Architecture, Security, Testability.
-## Ralph (Autonomous Mode)
+| Backend | Platform | Best For |
+|---------|----------|----------|
+| [RepoPrompt](https://repoprompt.com) | macOS | Best context, visual builder, deeper codebase discovery |
+| [Codex CLI](https://github.com/openai/codex) | All | Cross-platform, terminal-based, session continuity |
-> **⚠️ Safety first**: Ralph defaults to `YOLO=1` (skips permission prompts).
-> - Start with `ralph_once.sh` to observe one iteration
-> - Consider [Docker sandbox](https://docs.docker.com/ai/sandboxes/claude-code/) for isolation
-> - Consider [DCG (Destructive Command Guard)](https://github.com/Dicklesworthstone/destructive_command_guard) to block destructive commands — see [DCG setup](docs/ralph.md#additional-safety-dcg-optional)
->
-> **Community sandbox setups** (alternative approaches):
-> - [devcontainer-for-claude-yolo-and-flow-code](https://github.com/Ranudar/devcontainer-for-claude-yolo-and-flow-code) — VS Code devcontainer with Playwright, firewall whitelisting, and RepoPrompt MCP bridge
-> - [agent-sandbox](https://github.com/novotnyllc/agent-sandbox) — Docker Sandbox (Desktop 4.50+) with seccomp/user namespace isolation, .NET + Node.js
+### Other Platforms
-Ralph is the repo-local autonomous loop that plans and works through tasks end-to-end.
+| Platform | Install | Notes |
+|----------|---------|-------|
+| **Claude Code** | `/plugin install flow-code` | Primary platform |
+| **Factory Droid** | `/plugin install flow-code` | Native support, uses `${DROID_PLUGIN_ROOT}` fallback |
+| **OpenAI Codex** | `./scripts/install-codex.sh` | Commands use `/prompts:` prefix |
-**Setup (one-time, inside Claude):**
-```bash
-/flow-code:ralph-init
-```
+## `.flow/` Directory
-Or from terminal without entering Claude:
-```bash
-claude -p "/flow-code:ralph-init"
```
-
-**Run (outside Claude):**
-```bash
-scripts/ralph/ralph.sh
+.flow/
+ meta.json # Schema version
+ config.json # Project settings
+ epics/
+ fn-1-add-oauth.json # Epic metadata (id, title, status, deps)
+ specs/
+ fn-1-add-oauth.md # Epic spec (plan, scope, acceptance)
+ tasks/
+ fn-1-add-oauth.1.json # Task metadata (id, status, priority, deps)
+ fn-1-add-oauth.1.md # Task spec (description, acceptance, done summary)
+ memory/ # Persistent learnings (opt-in)
```
-Ralph writes run artifacts under `scripts/ralph/runs/`, including review receipts used for gating.
-
-📖 **[Ralph deep dive](docs/ralph.md)**
+Uninstall: delete `.flow/` (and `scripts/ralph/` if enabled). Or run `/flow-code:uninstall`.
-🖥️ **[Ralph TUI](../../flow-code-tui/)** — Terminal UI for monitoring runs in real-time (`bun add -g flow-code-tui`)
-
-### How Ralph Differs from Other Autonomous Agents
-
-Autonomous coding agents are taking the industry by storm—loop until done, commit, repeat. Most solutions gate progress by tests and linting alone. Ralph goes further.
-
-**Multi-model review gates**: Ralph uses [RepoPrompt](https://repoprompt.com) (macOS) or OpenAI Codex CLI (cross-platform) to send plan and implementation reviews to a *different* model. A second set of eyes catches blind spots that self-review misses. RepoPrompt's builder provides full file context; Codex uses context hints from changed files.
-
-**Review loops until Ship** (max 2 iterations): Reviews block progress until resolved. Fix → re-review cycles run until `SHIP` verdict or iteration limit (prevents infinite loops from diminishing-returns fixes).
-
-**Receipt-based gating**: Reviews must produce a receipt JSON file proving they ran. No receipt = no progress. This prevents drift where Claude skips the review step and marks things done anyway.
-
-**Guard hooks**: Plugin hooks enforce workflow rules deterministically—blocking `--json` flags, preventing new chats on re-reviews, requiring receipts before stop. Only active when `FLOW_RALPH=1`; zero impact for non-Ralph users. See [Guard Hooks](docs/ralph.md#guard-hooks).
-
-**Atomic window selection**: The `setup-review` command handles RepoPrompt window matching atomically. Claude can't skip steps or invent window IDs—the entire sequence runs as one unit or fails.
-
-The result: code that's been reviewed by two models, tested, linted, and iteratively refined. Not perfect, but meaningfully more robust than single-model autonomous loops.
-
-### Controlling Ralph
-
-External agents (Clawdbot, GitHub Actions, etc.) can pause/resume/stop Ralph runs without killing processes.
-
-**CLI commands:**
-```bash
-# Check status
-flowctl status # Epic/task counts + active runs
-flowctl status --json # JSON for automation
-
-# Control active run
-flowctl ralph pause # Pause run (auto-detects if single)
-flowctl ralph resume # Resume paused run
-flowctl ralph stop # Request graceful stop
-flowctl ralph status # Show run state
-
-# Specify run when multiple active
-flowctl ralph pause --run
-```
-
-**Sentinel files (manual control):**
-```bash
-# Pause: touch PAUSE file in run directory
-touch scripts/ralph/runs//PAUSE
-# Resume: remove PAUSE file
-rm scripts/ralph/runs//PAUSE
-# Stop: touch STOP file (kept for audit)
-touch scripts/ralph/runs//STOP
-```
+## Contributing
-Ralph checks sentinels at iteration boundaries (after Claude returns, before next iteration).
+1. Fork the repository
+2. Create a feature branch
+3. Run tests: `cd flowctl && cargo build --release && cargo test --all`
+4. Run smoke tests: `bash scripts/smoke_test.sh`
+5. Submit a PR
-### Review Mode (Three-Layer Quality)
+See [docs/skills.md](docs/skills.md) for the skill classification and [CLAUDE.md](CLAUDE.md) for development conventions.
-Ralph uses the same three-layer quality system as interactive mode:
+## License
-```
-plan → RP plan-review (Layer 2)
-task 1 → guard ✓ (Layer 1)
-task 2 → guard ✓
-task N → guard ✓
-all done → Codex adversarial (Layer 3)
-→ push + draft PR
-```
-
-**Configure in `scripts/ralph/config.env`:**
-
-```bash
-# Review backend (rp = RepoPrompt, codex = Codex CLI, none = skip)
-WORK_REVIEW=rp
-```
-
-**Common configurations:**
-
-```bash
-# Fast iteration with quality gate (recommended)
-REVIEW_MODE=per-epic
-WORK_REVIEW=rp
-
-# Maximum speed, no reviews
-REVIEW_MODE=per-epic
-WORK_REVIEW=none
-COMPLETION_REVIEW=none
-
-# Strict mode, review everything
-REVIEW_MODE=per-task
-WORK_REVIEW=rp
-COMPLETION_REVIEW=rp
-```
-
-**Monitoring:**
-
-```bash
-# Watch Ralph run in real-time
-scripts/ralph/ralph.sh --watch
-
-# View run logs
-tail -f scripts/ralph/runs/latest/ralph.log
-
-# Check progress
-scripts/ralph/flowctl list
-```
-
-### Scope Isolation (Freeze Scope)
-
-When running Ralph overnight, external changes to the backlog can cause unexpected behavior — new tasks picked up without review, removed tasks causing confusion, modified specs invalidating assumptions.
-
-**Configure in `scripts/ralph/config.env`:**
-
-```bash
-# Capture task IDs + spec hashes at start, check each iteration
-FREEZE_SCOPE=1
-
-# What to do on scope change: stop | warn | ignore
-SCOPE_CHANGE_ACTION=stop
-```
-
-**What it detects:**
-
-| Change Type | Detection | Outcome |
-|-------------|-----------|---------|
-| Task added externally | Task ID not in frozen list | SCOPE_CHANGED |
-| Task removed externally | Frozen task ID missing | SCOPE_CHANGED |
-| Spec content modified | MD5 hash mismatch | SCOPE_CHANGED |
-| Status change (todo→done) | Not tracked | Allowed (normal) |
-
-**Actions:**
-
-| Action | Behavior |
-|--------|----------|
-| `stop` | Halt Ralph with exit code 1 and clear message |
-| `warn` | Log changes, display warning, continue execution |
-| `ignore` | Log changes silently, continue execution |
-
-**Files created in `$RUN_DIR/scope/`:**
-
-| File | Content |
-|------|---------|
-| `scope.json` | Full snapshot (task IDs, statuses, spec hashes) |
-| `task_ids.txt` | Sorted task IDs for easy diff |
-| `hashes.txt` | `id:md5hash` pairs for specs and tasks |
-| `changes-iter-NNN.txt` | Detected changes per iteration (if any) |
-
-**Recommended for overnight runs:**
-```bash
-FREEZE_SCOPE=1
-SCOPE_CHANGE_ACTION=stop # Safe: halt on external changes
-```
-
-**For monitored runs:**
-```bash
-FREEZE_SCOPE=1
-SCOPE_CHANGE_ACTION=warn # Continue but flag changes
-```
-
-### Structured Logging
-
-Ralph writes structured JSON event logs to `$RUN_DIR/events.jsonl` for easy parsing and analysis. Each line is a JSON object:
-
-```json
-{"ts":"2026-03-26T12:00:00.123Z","level":"info","event":"run_start","run_id":"20260326-120000-a1b2","max_iterations":25,"review_mode":"per-epic"}
-{"ts":"2026-03-26T12:01:15.456Z","level":"info","event":"iteration","iter":1,"status":"work","task":"fn-1.1"}
-{"ts":"2026-03-26T12:05:30.789Z","level":"info","event":"worker_done","iter":1,"exit_code":0,"timeout":false}
-{"ts":"2026-03-26T12:30:00.000Z","level":"info","event":"run_end","reason":"NO_WORK","tasks_done":5,"elapsed":"29:00"}
-```
-
-**Query examples:**
-```bash
-# Count iterations per status
-jq -r 'select(.event=="iteration") | .status' events.jsonl | sort | uniq -c
-
-# Find failed workers
-jq 'select(.event=="worker_done" and .exit_code!=0)' events.jsonl
-
-# Total run time
-jq -r 'select(.event=="run_end") | .elapsed' events.jsonl
-```
-
-The plain-text `progress.txt` log still exists for backwards compatibility. Use `events.jsonl` for automation and analysis.
-
-**Task retry/rollback:**
-```bash
-# Reset completed/blocked task to todo
-flowctl task reset fn-1-add-oauth.3
-
-# Reset + cascade to dependent tasks (same epic)
-flowctl task reset fn-1-add-oauth.2 --cascade
-```
-
----
-
-## Human-in-the-Loop Workflow (Detailed)
-
-Default flow when you drive manually:
-
-```mermaid
-flowchart TD
- A[Idea or short spec
prompt or doc] --> B{Need deeper spec?}
- B -- yes --> C[Optional: /flow-code:interview fn-N or spec.md
40+ deep questions to refine spec]
- C --> D[Refined spec]
- B -- no --> D
- D --> E[/flow-code:plan idea or fn-N/]
- E --> F[Parallel subagents: repo patterns + online docs + best practices]
- F --> G[flow-gap-analyst: edge cases + missing reqs]
- G --> H[Writes .flow/ epic + tasks + deps]
- H --> I{Plan review?}
- I -- yes --> J[/flow-code:plan-review fn-N/]
- J --> K{Plan passes review?}
- K -- no --> L[Re-anchor + fix plan]
- L --> J
- K -- yes --> M[/flow-code:work fn-N/]
- I -- no --> M
- M --> N[Re-anchor before EVERY task]
- N --> O[Implement]
- O --> P[Test + verify acceptance]
- P --> Q[flowctl done: write done summary + evidence]
- Q --> R{Impl review?}
- R -- yes --> S[/flow-code:impl-review/]
- S --> T{Next ready task?}
- R -- no --> T
- T -- yes --> N
- T -- no --> V{Epic review?}
- V -- yes --> W[/flow-code:epic-review fn-N/]
- W --> X{Epic passes review?}
- X -- no --> Y[Fix gaps inline]
- Y --> W
- X -- yes --> U[Close epic]
- V -- no --> U
- classDef optional stroke-dasharray: 6 4,stroke:#999;
- class C,J,S,W optional;
-```
-
-Notes:
-- `/flow-code:interview` accepts Flow IDs or spec file paths and writes refinements back
-- `/flow-code:plan` accepts new ideas or an existing Flow ID to update the plan
-
-Tip: with RP 1.5.68+, use `flowctl rp setup-review --create` to auto-open RepoPrompt windows. Alternatively, open RP on your repo beforehand for faster context loading.
-Plan review in rp mode requires `flowctl rp chat-send`; if rp-cli/windows unavailable, the review gate retries.
-
----
-
-## Features
-
-Built for reliability. These are the guardrails.
-
-**Re-anchoring prevents drift**
-
-Before EVERY task, Flow-Code re-reads the epic spec, task spec, and git state from `.flow/`. This forces Claude back to the source of truth - no hallucinated scope creep, no forgotten requirements. In Ralph mode, this happens automatically each iteration.
-
-Unlike agents that carry accumulated context (where early mistakes compound), re-anchoring gives each task a fresh, accurate starting point.
-
-### Re-anchoring
-
-Before EVERY task, Flow-Code re-reads:
-- Epic spec and task spec from `.flow/`
-- Current git status and recent commits
-- Validation state
-
-Per Anthropic's long-running agent guidance: agents must re-anchor from sources of truth to prevent drift. The reads are cheap; drift is expensive.
-
-### Multi-user Safe
-
-Teams can work in parallel branches without coordination servers:
-
-- **Merge-safe IDs**: Scans existing files to allocate the next ID. No shared counters.
-- **Soft claims**: Tasks track an `assignee` field. Prevents accidental duplicate work.
-- **Actor resolution**: Auto-detects from git email, `FLOW_ACTOR` env, or `$USER`.
-- **Local validation**: `flowctl validate --all` catches issues before commit.
-
-```bash
-# Actor A starts task
-flowctl start fn-1.1 # Sets assignee automatically
-
-# Actor B tries same task
-flowctl start fn-1.1 # Fails: "claimed by actor-a@example.com"
-flowctl start fn-1.1 --force # Override if needed
-```
-
-### Parallel Worktrees
-
-Multiple agents can work simultaneously in different git worktrees, sharing task state:
-
-```bash
-# Main repo
-git worktree add ../feature-a fn-1-branch
-git worktree add ../feature-b fn-2-branch
-
-# Both worktrees share task state via .git/flow-state/
-cd ../feature-a && flowctl start fn-1.1 # Agent A claims task
-cd ../feature-b && flowctl start fn-2.1 # Agent B claims different task
-```
-
-**How it works:**
-- Runtime state (status, assignee, evidence) lives in `.git/flow-state/` — shared across worktrees
-- Definition files (title, description, deps) stay in `.flow/` — tracked in git
-- Per-task `fcntl` locking prevents race conditions
-
-**State directory resolution:**
-1. `FLOW_STATE_DIR` env (explicit override)
-2. `git --git-common-dir` + `/flow-state` (worktree-aware)
-3. `.flow/state` fallback (non-git or old git)
-
-**Commands:**
-```bash
-flowctl state-path # Show resolved state directory
-```
-
-### Zero Dependencies
-
-Everything is bundled:
-- `flowctl.py` and the `flowctl/` package ship with the plugin
-- No external tracker CLI to install
-- No external services
-- Just Python 3
-
-### Bundled Skills
-
-Utility skills available during planning and implementation:
-
-| Skill | Use Case |
-|-------|----------|
-| `browser` | Web automation via agent-browser CLI (verify UI, scrape docs, test flows) |
-| `flow-code-rp-explorer` | Token-efficient codebase exploration via RepoPrompt |
-| `flow-code-worktree-kit` | Git worktree management for parallel work |
-| `flow-code-export-context` | Export context for external LLM review |
-
-### Non-invasive
-
-- No daemons
-- No CLAUDE.md edits
-- Delete `.flow/` to uninstall; if you enabled Ralph, also delete `scripts/ralph/`
-- Ralph uses plugin hooks for workflow enforcement (only active when `FLOW_RALPH=1`)
-
-### CI-ready
-
-```bash
-flowctl validate --all
-```
-
-Exits 1 on errors. Drop into pre-commit hooks or GitHub Actions. See `docs/ci-workflow-example.yml`.
-
-### One File Per Task
-
-Each epic and task gets its own JSON + markdown file pair. Merge conflicts are rare and easy to resolve.
-
-### Cross-Model Reviews
-
-Two models catch what one misses. Reviews use a second model (via RepoPrompt or Codex) to verify plans and implementations before they ship.
-
-**Three review types:**
-- **Plan reviews** — Verify architecture before coding starts
-- **Impl reviews** — Verify each task implementation
-- **Completion reviews** — Verify epic delivers all spec requirements before closing
-
-**Review criteria (Carmack-level, identical for both backends):**
-
-| Review Type | Criteria |
-|-------------|----------|
-| **Plan** | Completeness, Feasibility, Clarity, Architecture, Risks (incl. security), Scope, Testability |
-| **Impl** | Correctness, Simplicity, DRY, Architecture, Edge Cases, Tests, Security |
-| **Completion** | Spec compliance: all requirements delivered, docs updated, no gaps |
-
-Reviews block progress until `SHIP`. Fix → re-review cycles continue until approved.
-
-#### RepoPrompt (Recommended)
-
-[RepoPrompt](https://repoprompt.com) provides the best review experience on macOS.
-
-**Why recommended:**
-- Best-in-class context builder for reviews (full file context, smart selection)
-- Enables **context-scout** for deeper codebase discovery (alternative: repo-scout works without RP)
-- Visual diff review UI + persistent chat threads
-
-**Setup:**
-
-1. Install RepoPrompt:
- ```bash
- brew install --cask repoprompt
- ```
-
-2. **Enable MCP Server** (required for rp-cli):
- - Settings → MCP Server → Enable
- - Click "Install CLI to PATH" (creates `/usr/local/bin/rp-cli`)
- - Verify: `rp-cli --version`
-
-3. **Configure models** — RepoPrompt uses two models that must be set in the UI (not controllable via CLI):
-
- | Setting | Recommended | Purpose |
- |---------|-------------|---------|
- | **Context Builder model** | GPT-5.3 Codex Medium (via Codex CLI or OpenAI API) | Builds file selection for reviews. Needs large context window. |
- | **Chat model** | GPT-5.2 High (via Codex CLI or OpenAI API) | Runs the actual review. Needs strong reasoning. |
-
- Set these in Settings → Models. Any OpenAI API-compatible model works (Codex CLI, OpenAI API key, or other providers). These models are what make cross-model review valuable — a different model catches blind spots that self-review misses.
-
- > **Note:** When `--create` auto-opens a new workspace, it inherits your default model settings. Configure models before first use.
-
-**Usage:**
-```bash
-/flow-code:plan-review fn-1 --review=rp
-/flow-code:impl-review --review=rp
-```
-
-#### Codex (Cross-Platform Alternative)
-
-OpenAI Codex CLI works on any platform (macOS, Linux, Windows).
-
-**Why use Codex:**
-- Cross-platform (no macOS requirement)
-- Terminal-based (no GUI needed)
-- Session continuity via thread IDs
-- Same Carmack-level review criteria as RepoPrompt
-- Uses GPT 5.2 High by default when used as a review backend from Claude Code (no config needed)
-
-**Trade-off:** Uses heuristic context hints from changed files rather than RepoPrompt's intelligent file selection.
-
-> **Note:** When running Flow-Code inside Codex itself, commands use `/prompts:` prefix (e.g., `/prompts:impl-review`). The `/flow-code:` prefix below applies to Claude Code.
-
-**Setup:**
-```bash
-# Install and authenticate Codex CLI
-npm install -g @openai/codex
-codex auth
-```
-
-**Usage:**
-```bash
-/flow-code:plan-review fn-1 --review=codex
-/flow-code:impl-review --review=codex
-
-# Or via flowctl directly
-flowctl codex plan-review fn-1 --base main
-flowctl codex impl-review fn-1.3 --base main
-```
-
-**Verify installation:**
-```bash
-flowctl codex check
-```
-
-#### Configuration
-
-Set default review backend:
-```bash
-# Per-project (saved in .flow/config.json)
-flowctl config set review.backend rp # or codex, or none
-
-# Per-session (environment variable)
-export FLOW_REVIEW_BACKEND=codex
-```
-
-Priority: `--review=...` argument > `FLOW_REVIEW_BACKEND` env > `.flow/config.json` > error.
-
-**No auto-detect.** Run `/flow-code:setup` to configure your preferred review backend, or pass `--review=X` explicitly.
-
-#### Which to Choose?
-
-| Scenario | Recommendation |
-|----------|----------------|
-| macOS with GUI available | RepoPrompt (better context) |
-| Linux/Windows | Codex (only option) |
-| CI/headless environments | Codex (no GUI needed) |
-| Ralph overnight runs | Either works; RP auto-opens with --create (1.5.68+) |
-
-Without a backend configured, reviews fail with a clear error. Run `/flow-code:setup` or pass `--review=X`.
-
-### Dependency Graphs
-
-Tasks declare their blockers. `flowctl ready` shows what can start. Nothing executes until dependencies resolve.
-
-**Epic-level dependencies**: During planning, `epic-scout` runs in parallel with other research scouts to find relationships with existing open epics. If the new plan depends on APIs/patterns from another epic, dependencies are auto-set via `flowctl epic add-dep`. Findings reported at end of planning—no prompts needed.
-
-### Auto-Block Stuck Tasks
-
-After MAX_ATTEMPTS_PER_TASK failures (default 5), Ralph:
-1. Writes `block-.md` with failure context
-2. Marks task blocked via `flowctl block`
-3. Moves to next task
-
-Prevents infinite retry loops. Review `block-*.md` files in the morning to understand what went wrong.
-
-### Plan-Sync (Opt-in)
-
-Synchronizes downstream task specs when implementation drifts from the original plan.
-
-**Automatic (opt-in):**
-```bash
-flowctl config set planSync.enabled true
-```
-
-When enabled, after each task completes, a plan-sync agent:
-1. Compares what was planned vs what was actually built
-2. Identifies downstream tasks that reference stale assumptions (names, APIs, data structures)
-3. Updates affected task specs with accurate info
-
-Skip conditions: disabled (default), task failed, no downstream tasks.
-
-**Cross-epic sync (opt-in, default false):**
-```bash
-flowctl config set planSync.crossEpic true
-```
-
-When enabled, plan-sync also checks other open epics for stale references. Useful when multiple epics share APIs/patterns, but increases sync time. Disabled by default to avoid long Ralph loops.
-
-**Manual trigger:**
-```bash
-/flow-code:sync fn-1.2 # Sync from specific task
-/flow-code:sync fn-1 # Scan whole epic for drift
-/flow-code:sync fn-1.2 --dry-run # Preview changes without writing
-```
-
-Manual sync ignores `planSync.enabled` config—if you run it, you want it. Works with any source task status (not just done).
-
-### Memory System (Opt-in)
-
-Persistent learnings that survive context compaction.
-
-```bash
-# Enable
-flowctl config set memory.enabled true
-flowctl memory init
-
-# Manual entries
-flowctl memory add --type pitfall "Always use flowctl rp wrappers"
-flowctl memory add --type convention "Tests in __tests__ dirs"
-flowctl memory add --type decision "SQLite over Postgres for simplicity"
-
-# Query
-flowctl memory list
-flowctl memory search "flowctl"
-flowctl memory read --type pitfalls
-```
-
-When enabled:
-- **Planning**: `memory-scout` runs in parallel with other scouts
-- **Work**: worker reads memory files directly during re-anchor
-- **Ralph**: NEEDS_WORK reviews auto-capture to `pitfalls.md`
-- **Auto-capture**: session end hook extracts decisions, discoveries, and pitfalls from transcript
-
-**Auto-memory** (on by default, zero config):
-
-Every session end, the plugin automatically extracts key learnings from the transcript:
-
-- **Default: Gemini AI summarization** — `gemini -p` analyzes the transcript and extracts decisions, discoveries, and pitfalls. Understands semantics, not just keywords.
-- **Fallback: pattern matching** — if `gemini` CLI is not available, falls back to regex extraction.
-
-No setup needed — `.flow/memory/` is auto-created on first capture. Max 5 entries per session:
-- `pitfalls.md` — bugs found, things to avoid
-- `conventions.md` — project patterns, coding conventions
-- `decisions.md` — architectural choices and rationale
-
-To disable: `flowctl config set memory.auto false`
-
-Memory retrieval works in all modes (manual, Ralph, auto-improve). Use `flowctl memory add` for manual entries.
-
-Config lives in `.flow/config.json`, separate from Ralph's `scripts/ralph/config.env`.
-
----
-
-## Commands
-
-Ten commands, complete workflow:
-
-| Command | What It Does |
-|---------|--------------|
-| `/flow-code:plan ` | Research the codebase, create epic with dependency-ordered tasks |
-| `/flow-code:work ` | Execute epic, task, or spec file, re-anchoring before each |
-| `/flow-code:interview ` | Deep interview to flesh out a spec before planning |
-| `/flow-code:plan-review ` | Carmack-level plan review via RepoPrompt |
-| `/flow-code:impl-review` | Carmack-level impl review of current branch |
-| `/flow-code:epic-review ` | Epic-completion review: verify implementation matches spec |
-| `/flow-code:debug` | Systematic debugging: root cause investigation → pattern analysis → hypothesis → fix |
-| `/flow-code:prime` | Assess codebase agent-readiness, propose fixes ([details](#agent-readiness-assessment)) |
-| `/flow-code:sync ` | Manual plan-sync: update downstream tasks after implementation drift |
-| `/flow-code:ralph-init` | Scaffold repo-local Ralph harness (`scripts/ralph/`) |
-| `/flow-code:retro` | Post-epic retrospective: what worked, what didn't, lessons → memory |
-| `/flow-code:django` | Django patterns: architecture, DRF, security, testing, verification |
-| `/flow-code:skill-create` | TDD-based skill creation: baseline test → write → bulletproof |
-| `/flow-code:setup` | Optional: install flowctl locally + add docs (for power users) |
-| `/flow-code:uninstall` | Remove flow-code from project (keeps tasks if desired) |
-
-Work accepts an epic (`fn-N`), task (`fn-N.M`), or markdown spec file (`.md`). Spec files auto-create an epic with one task.
-
-### Autonomous Mode (Flags)
-
-All commands accept flags to skip questions:
-
-```bash
-# Plan with flags
-/flow-code:plan Add caching --research=grep --no-review
-/flow-code:plan Add auth --research=rp --review=rp
-
-# Work with flags
-/flow-code:work fn-1 --branch=current --no-review
-/flow-code:work fn-1 --branch=new --review=export
-
-# Reviews with flags
-/flow-code:plan-review fn-1 --review=rp
-/flow-code:impl-review --review=export
-```
-
-Natural language also works:
-
-```bash
-/flow-code:plan Add webhooks, use context-scout, skip review
-/flow-code:work fn-1 current branch, no review
-```
-
-| Command | Available Flags |
-|---------|-----------------|
-| `/flow-code:plan` | `--research=rp\|grep`, `--depth=short\|standard\|deep`, `--review=rp\|codex\|export\|none`, `--plan-only` |
-| `/flow-code:work` | `--branch=current\|worktree\|new`, `--review=rp\|codex\|none`, `--no-review`, `--interactive`, `--tdd`, `--no-pr` |
-| `/flow-code:plan-review` | `--review=rp\|codex\|export` |
-| `/flow-code:impl-review` | `--review=rp\|codex\|export` |
-| `/flow-code:prime` | `--report-only`, `--fix-all` |
-| `/flow-code:sync` | `--dry-run` |
-
-### Command Reference
-
-Detailed input documentation for each command.
-
-#### `/flow-code:plan`
-
-```
-/flow-code:plan [--research=rp|grep] [--review=rp|codex|export|none]
-```
-
-| Input | Description |
-|-------|-------------|
-| `` | Free-form feature description ("Add user authentication with OAuth") |
-| `fn-N` | Existing epic ID to update the plan |
-| `--research=rp` | Use RepoPrompt context-scout for deeper codebase discovery |
-| `--research=grep` | Use grep-based repo-scout (default, faster) |
-| `--review=rp\|codex\|export\|none` | Review backend after planning |
-| `--no-review` | Shorthand for `--review=none` |
-
-#### `/flow-code:work`
-
-```
-/flow-code:work [--branch=current|new|worktree] [--review=rp|codex|export|none]
-```
-
-| Input | Description |
-|-------|-------------|
-| `fn-N` | Execute entire epic (all tasks in dependency order) |
-| `fn-N.M` | Execute single task |
-| `path/to/spec.md` | Create epic from spec file, execute immediately |
-| `--branch=current` | Work on current branch |
-| `--branch=new` | Create new branch `fn-N-slug` (default) |
-| `--branch=worktree` | Create git worktree for isolated work |
-| `--review=rp\|codex\|export\|none` | Review backend after work |
-| `--no-review` | Shorthand for `--review=none` |
-
-#### `/flow-code:interview`
-
-```
-/flow-code:interview
-```
-
-| Input | Description |
-|-------|-------------|
-| `fn-N` | Interview about epic to refine requirements |
-| `fn-N.M` | Interview about specific task |
-| `path/to/spec.md` | Interview about spec file |
-| `"rough idea"` | Interview about a new idea (creates epic) |
-
-Deep questioning (40+ questions) to surface requirements, edge cases, and decisions.
-
-#### `/flow-code:plan-review`
-
-```
-/flow-code:plan-review [--review=rp|codex|export] [focus areas]
-```
-
-| Input | Description |
-|-------|-------------|
-| `fn-N` | Epic ID to review |
-| `--review=rp` | Use RepoPrompt (macOS, visual builder) |
-| `--review=codex` | Use OpenAI Codex CLI (cross-platform) |
-| `--review=export` | Export context for manual review |
-| `[focus areas]` | Optional: "focus on security" or "check API design" |
-
-Carmack-level criteria: Completeness, Feasibility, Clarity, Architecture, Risks, Scope, Testability.
-
-#### `/flow-code:impl-review`
-
-```
-/flow-code:impl-review [--review=rp|codex|export] [focus areas]
-```
-
-| Input | Description |
-|-------|-------------|
-| `--review=rp` | Use RepoPrompt (macOS, visual builder) |
-| `--review=codex` | Use OpenAI Codex CLI (cross-platform) |
-| `--review=export` | Export context for manual review |
-| `[focus areas]` | Optional: "focus on performance" or "check error handling" |
-
-Reviews current branch changes. Carmack-level criteria: Correctness, Simplicity, DRY, Architecture, Edge Cases, Tests, Security.
-
-#### `/flow-code:epic-review`
-
-```
-/flow-code:epic-review [--review=rp|codex|none]
-```
-
-| Input | Description |
-|-------|-------------|
-| `fn-N` | Epic ID to review |
-| `--review=rp` | Use RepoPrompt (macOS, visual builder) |
-| `--review=codex` | Use OpenAI Codex CLI (cross-platform) |
-| `--review=none` | Skip review |
-
-Reviews epic implementation against spec. Runs after all tasks complete. Catches requirement gaps, missing functionality, incomplete doc updates.
-
-#### `/flow-code:prime`
-
-```
-/flow-code:prime [--report-only] [--fix-all] [path]
-```
-
-| Input | Description |
-|-------|-------------|
-| (no args) | Assess current directory, interactive fixes |
-| `--report-only` | Show assessment report, skip remediation |
-| `--fix-all` | Apply all recommendations without asking |
-| `[path]` | Assess a different directory |
-
-See [Agent Readiness Assessment](#agent-readiness-assessment) for details.
-
-#### `/flow-code:sync`
-
-```
-/flow-code:sync [--dry-run]
-```
-
-| Input | Description |
-|-------|-------------|
-| `fn-N` | Sync entire epic's downstream tasks |
-| `fn-N.M` | Sync from specific task |
-| `--dry-run` | Preview changes without writing |
-
-Updates downstream task specs when implementation drifts from plan.
-
-#### `/flow-code:ralph-init`
-
-```
-/flow-code:ralph-init
-```
-
-No arguments. Scaffolds `scripts/ralph/` for autonomous operation.
-
-#### `/flow-code:setup`
-
-```
-/flow-code:setup
-```
-
-No arguments. Optional setup that:
-- Configures review backend (rp, codex, or none)
-- Copies flowctl to `.flow/bin/`
-- Adds flow-code instructions to CLAUDE.md/AGENTS.md
-
-#### `/flow-code:uninstall`
-
-```
-/flow-code:uninstall
-```
-
-No arguments. Interactive removal with option to keep tasks.
-
----
-
-## The Workflow
-
-### Defaults (manual and Ralph)
-
-Flow-Code uses the same defaults in manual and Ralph runs. Ralph bypasses prompts only.
-
-- plan: `--research=grep`
-- work: `--branch=new`
-- review: from `.flow/config.json` (set via `/flow-code:setup`), or `none` if not configured
-
-Override via flags or `scripts/ralph/config.env`.
-
-### Planning Phase
-
-1. **Research (parallel subagents)**: `repo-scout` (or `context-scout` if rp-cli) + `practice-scout` + `docs-scout` + `github-scout` + `epic-scout` + `docs-gap-scout`
-2. **Gap analysis**: `flow-gap-analyst` finds edge cases + missing requirements
-3. **Epic creation**: Writes spec to `.flow/specs/fn-N.md`, sets epic dependencies from `epic-scout` findings
-4. **Task breakdown**: Creates tasks + explicit dependencies in `.flow/tasks/`, adds doc update acceptance criteria from `docs-gap-scout`
-5. **Validate**: `flowctl validate --epic fn-N`
-6. **Review** (optional): `/flow-code:plan-review fn-N` with re-anchor + fix loop until "Ship"
-
-### Work Phase
-
-1. **Re-anchor**: Re-read epic + task specs + git state (EVERY task)
-2. **Execute**: Implement using existing patterns
-3. **Test**: Verify acceptance criteria
-4. **Record**: `flowctl done` adds summary + evidence to the task spec
-5. **Review** (optional): `/flow-code:impl-review` via RepoPrompt
-6. **Loop**: Next ready task → repeat until no ready tasks. Close epic manually (`flowctl epic close fn-N`) or let Ralph close at loop end.
-
----
-
-## Ralph Mode (Autonomous, Opt-In)
-
-Ralph is repo-local and opt-in. Files are created only by `/flow-code:ralph-init`. Remove manually with `rm -rf scripts/ralph/`.
-`/flow-code:ralph-init` also writes `scripts/ralph/.gitignore` so run logs stay out of git.
-
-What it automates (one unit per iteration, fresh context each time):
-- Selector chooses plan vs work unit (`flowctl next`)
-- Plan gate = plan review loop until Ship (if enabled)
-- Work gate = one task until pass (tests + validate + optional impl review)
- - Single run branch: all epics work on one `ralph-` branch (cherry-pick/revert friendly)
-
-Enable:
-```bash
-/flow-code:ralph-init
-./scripts/ralph/ralph_once.sh # one iteration (observe)
-./scripts/ralph/ralph.sh # full loop (AFK)
-```
-
-**Watch mode** - see what Claude is doing:
-```bash
-./scripts/ralph/ralph.sh --watch # Stream tool calls in real-time
-./scripts/ralph/ralph.sh --watch verbose # Also stream model responses
-```
-
-Run scripts from terminal (not inside Claude Code). `ralph_once.sh` runs one iteration so you can observe before going fully autonomous.
-
-### Ralph defaults vs recommended (plan review gate)
-
-`REQUIRE_PLAN_REVIEW` controls whether Ralph must pass the **plan review gate** before doing any implementation work.
-
-**Default (safe, won't stall):**
-
-* `REQUIRE_PLAN_REVIEW=0`
- Ralph can proceed to work tasks even if `rp-cli` is missing or unavailable overnight.
-
-**Recommended (best results, requires rp-cli):**
-
-* `REQUIRE_PLAN_REVIEW=1`
-* `PLAN_REVIEW=rp`
-
-This forces Ralph to run `/flow-code:plan-review` until the epic plan is approved before starting tasks.
-
-**Tip:** If you don't have `rp-cli` installed, keep `REQUIRE_PLAN_REVIEW=0` or Ralph may repeatedly select the plan gate and make no progress.
-
-Ralph verifies RepoPrompt reviews via receipt JSON files in `scripts/ralph/runs//receipts/` (plan + impl).
-
-### Ralph loop (one iteration)
-
-```mermaid
-flowchart TD
- A[ralph.sh iteration] --> B[flowctl next]
- B -->|status=plan| C[/flow-code:plan-review fn-N/]
- C -->|verdict=SHIP| D[flowctl epic set-plan-review-status=ship]
- C -->|verdict!=SHIP| A
-
- B -->|status=work| E[/flow-code:work fn-N.M/]
- E --> F[tests + validate]
- F -->|fail| A
-
- F -->|WORK_REVIEW!=none| R[/flow-code:impl-review/]
- R -->|verdict=SHIP| G[flowctl done + git commit]
- R -->|verdict!=SHIP| A
-
- F -->|WORK_REVIEW=none| G
-
- G --> A
-
- B -->|status=completion_review| CR[/flow-code:epic-review fn-N/]
- CR -->|verdict=SHIP| CRD[flowctl epic set-completion-review-status=ship]
- CR -->|verdict!=SHIP| A
- CRD --> A
-
- B -->|status=none| H[close done epics]
- H --> I[COMPLETE]
-```
-
-**YOLO safety**: YOLO mode uses `--dangerously-skip-permissions`. Use a sandbox/container and no secrets in env for unattended runs.
-
----
-
-## .flow/ Directory
-
-```
-.flow/
-├── meta.json # Schema version
-├── config.json # Project settings (memory enabled, etc.)
-├── epics/
-│ └── fn-1-add-oauth.json # Epic metadata (id, title, status, deps)
-├── specs/
-│ └── fn-1-add-oauth.md # Epic spec (plan, scope, acceptance)
-├── tasks/
-│ ├── fn-1-add-oauth.1.json # Task metadata (id, status, priority, deps, assignee)
-│ ├── fn-1-add-oauth.1.md # Task spec (description, acceptance, done summary)
-│ └── ...
-└── memory/ # Persistent learnings (opt-in)
- ├── pitfalls.md # Lessons from NEEDS_WORK reviews
- ├── conventions.md # Project patterns
- └── decisions.md # Architectural choices
-```
-
-Flowctl accepts schema v1 and v2; new fields are optional and defaulted.
-
-New fields:
-- Epic JSON: `plan_review_status`, `plan_reviewed_at`, `completion_review_status`, `completion_reviewed_at`, `depends_on_epics`, `branch_name`, `gaps`
-- Task JSON: `priority`
-
-### ID Format
-
-- **Epic**: `fn-N-slug` where `slug` is derived from the epic title (e.g., `fn-1-add-oauth`, `fn-2-fix-login-bug`)
-- **Task**: `fn-N-slug.M` (e.g., `fn-1-add-oauth.1`, `fn-2-fix-login-bug.2`)
-
-The slug is automatically generated from the epic title (lowercase, hyphens for spaces, max 40 chars). This makes IDs human-readable and self-documenting.
-
-**Backwards compatibility**: Legacy formats `fn-N` (no suffix) and `fn-N-xxx` (random 3-char suffix) are still fully supported. Existing epics don't need migration.
-
-There are no task IDs outside an epic. If you want a single task, create an epic with one task.
-
-### Separation of Concerns
-
-- **JSON files**: Metadata only (IDs, status, dependencies, assignee)
-- **Markdown files**: Narrative content (specs, descriptions, summaries)
-
----
-
-## flowctl CLI
-
-Bundled Python script for managing `.flow/`. Flow-Code's commands handle epic/task creation automatically—use `flowctl` for direct inspection, fixes, or advanced workflows:
-
-```bash
-# Setup
-flowctl init # Create .flow/ structure
-flowctl detect # Check if .flow/ exists
-
-# Epics
-flowctl epic create --title "..." # Create epic
-flowctl epic create --title "..." --branch "fn-1-epic"
-flowctl epic set-plan fn-1 --file spec.md # Set epic spec from file
-flowctl epic set-plan-review-status fn-1 --status ship
-flowctl epic close fn-1 # Close epic (requires all tasks done)
-
-# Tasks
-flowctl task create --epic fn-1 --title "..." --deps fn-1.2,fn-1.3 --priority 10
-flowctl task set-description fn-1.1 --file desc.md
-flowctl task set-acceptance fn-1.1 --file accept.md
-
-# Dependencies
-flowctl dep add fn-1.3 fn-1.2 # fn-1.3 depends on fn-1.2
-
-# Workflow
-flowctl ready --epic fn-1 # Show ready/in_progress/blocked
-flowctl next # Select next plan/work unit
-flowctl start fn-1.1 # Claim and start task
-flowctl done fn-1.1 --summary-file s.md --evidence-json e.json
-flowctl block fn-1.2 --reason-file r.md
-
-# Queries
-flowctl show fn-1 --json # Epic with all tasks
-flowctl cat fn-1 # Print epic spec
-
-# Validation
-flowctl validate --epic fn-1 # Validate single epic
-flowctl validate --all # Validate everything (for CI)
-
-# Review helpers
-flowctl rp chat-send --window W --tab T --message-file m.md
-flowctl prep-chat --message-file m.md --selected-paths a.ts b.ts -o payload.json
-```
-
-📖 **[Full CLI reference](docs/flowctl.md)**
-🤖 **[Ralph deep dive](docs/ralph.md)**
-
----
-
-## Task Completion
-
-When a task completes, `flowctl done` appends structured data to the task spec:
-
-### Done Summary
-
-```markdown
-## Done summary
-
-- Added ContactForm component with Zod validation
-- Integrated with server action for submission
-- All tests passing
-
-Follow-ups:
-- Consider rate limiting (out of scope)
-```
-
-### Evidence
-
-```markdown
-## Evidence
-
-- Commits: a3f21b9
-- Tests: bun test
-- PRs:
-```
-
-This creates a complete audit trail: what was planned, what was done, how it was verified.
-
----
-
-## Flow vs Flow-Code
-
-| | Flow | Flow-Code |
-|:--|:--|:--|
-| **Task tracking** | External tracker or standalone plan files | `.flow/` directory (bundled flowctl) |
-| **Install** | Plugin + optional external tracker | Plugin only |
-| **Artifacts** | Standalone plan files | `.flow/specs/` and `.flow/tasks/` |
-| **Config edits** | External config edits (if using tracker) | None |
-| **Multi-user** | Via external tracker | Built-in (scan-based IDs, soft claims) |
-| **Uninstall** | Remove plugin + external tracker config | Delete `.flow/` (and `scripts/ralph/` if enabled) |
-
-**Choose Flow-Code if you want:**
-- Zero external dependencies
-- No config file edits
-- Clean uninstall (delete `.flow/`, and `scripts/ralph/` if enabled)
-- Built-in multi-user safety
-
-**Choose Flow if you:**
-- Already use an external tracker for issue tracking
-- Want plan files as standalone artifacts
-- Need full issue management features
-
----
-
-## Requirements
-
-- Python 3.8+
-- git
-- Optional: [RepoPrompt](https://repoprompt.com) for macOS GUI reviews + enables **context-scout** (deeper codebase discovery than repo-scout). Reviews work without it via Codex backend.
-- Optional: OpenAI Codex CLI (`npm install -g @openai/codex`) for cross-platform terminal-based reviews
-
-Without a review backend, reviews are skipped.
-
----
-
-## Development
-
-```bash
-claude --plugin-dir ./plugins/flow-code
-```
-
----
-
-## Other Platforms
-
-### Factory Droid (Native Support)
-
-Flow-Code works natively in [Factory Droid](https://factory.ai) — no modifications needed.
-
-**Install:**
-```bash
-# In Droid CLI
-/plugin marketplace add https://github.com/z23cc/flow-code
-/plugin install flow-code
-```
-
-**Cross-platform patterns used:**
-- Skills use `${DROID_PLUGIN_ROOT:-${CLAUDE_PLUGIN_ROOT}}` bash fallback
-- Hooks use `Bash|Execute` regex matcher (Claude Code = Bash, Droid = Execute)
-- Agents use `disallowedTools` blacklist (not `tools` whitelist — tool names differ between platforms)
-
-**Caveats:**
-- Subagents may behave differently (Droid's Task tool implementation)
-- Hook timing may vary slightly
-
-> **Rollback:** If you experience issues, downgrade to v0.20.9 (last pre-Droid version): `claude plugins install flow-code@0.20.9`
-
-### OpenAI Codex
-
-Flow-Code works in OpenAI Codex with near-parity to Claude Code. The install script converts Claude Code's plugin system to Codex's multi-agent roles, prompts, and config.
-
-**Key difference:** Commands use the `/prompts:` prefix in Codex instead of `/flow-code:`:
-
-| Claude Code | Codex |
-|-------------|-------|
-| `/flow-code:plan` | `/prompts:plan` |
-| `/flow-code:work` | `/prompts:work` |
-| `/flow-code:impl-review` | `/prompts:impl-review` |
-| `/flow-code:plan-review` | `/prompts:plan-review` |
-| `/flow-code:epic-review` | `/prompts:epic-review` |
-| `/flow-code:interview` | `/prompts:interview` |
-| `/flow-code:prime` | `/prompts:prime` |
-| `/flow-code:ralph-init` | `/prompts:ralph-init` |
-
-**What works:**
-- Planning, work execution, interviews, reviews — full workflow
-- Multi-agent roles: 20 agents run as parallel Codex threads (up to 12 concurrent)
-- Cross-model reviews (Codex as review backend)
-- flowctl CLI
-
-**Model mapping (3-tier):**
-
-| Tier | Codex Model | Agents | Reasoning |
-|------|-------------|--------|-----------|
-| Intelligent | `gpt-5.4` | quality-auditor, flow-gap-analyst, context-scout | high |
-| Smart scouts | `gpt-5.4` | epic-scout, agents-md-scout, docs-gap-scout | high |
-| Fast scouts | `gpt-5.3-codex-spark` | build, env, testing, tooling, observability, security, workflow, memory scouts | skipped |
-| Inherited | parent model | worker, plan-sync | parent |
-
-Smart scouts (epic-scout, agents-md-scout, docs-gap-scout) need deeper reasoning for context building and analysis. The remaining 8 scanning scouts run on Spark for speed — they check for file presence and patterns without needing multi-step reasoning.
-
-Override model defaults:
-```bash
-CODEX_MODEL_INTELLIGENT=gpt-5.4 \
-CODEX_MODEL_FAST=gpt-5.3-codex-spark \
-CODEX_REASONING_EFFORT=high \
-CODEX_MAX_THREADS=12 \
-./scripts/install-codex.sh flow-code
-```
-
-**Caveats:**
-- `/prompts:setup` not supported — use manual project setup below
-- Ralph autonomous mode not supported — requires plugin hooks (guard hooks, receipt gating) which Codex doesn't support
-- `/prompts:ralph-init` scaffolds files but the loop won't enforce workflow rules without hooks
-- `claude-md-scout` is auto-renamed to `agents-md-scout` (CLAUDE.md → AGENTS.md patching)
-
-**Install:**
-```bash
-# Clone and install (one-time)
-git clone https://github.com/z23cc/flow-code.git
-cd flow-code
-./scripts/install-codex.sh
-```
-
-> The script copies skills/agents/prompts to `~/.codex/` and flowctl to `~/.flow/bin/`. Add `export PATH="$HOME/.flow/bin:$PATH"` to your shell profile.
-
-**Per-project setup** (run in each project):
-```bash
-# Initialize .flow/ directory
-flowctl init
-
-# Optional: configure review backend (codex recommended for Codex CLI)
-flowctl config set review.backend codex
-```
-
-**Optional AGENTS.md snippet** (helps Codex understand flow-code):
-```markdown
-
-## Flow-Code
-
-This project uses Flow-Code for task tracking. `flowctl` must be in PATH (`~/.flow/bin/`).
-
-Quick commands:
-- `flowctl list` — list epics + tasks
-- `flowctl ready --epic fn-N` — what's ready
-- `flowctl start fn-N.M` — claim task
-- `flowctl done fn-N.M --summary-file s.md --evidence-json e.json`
-
-Prompts (use `/prompts:`):
-- `/prompts:plan` — create a build plan
-- `/prompts:work` — execute tasks
-- `/prompts:impl-review` — implementation review
-- `/prompts:interview` — refine specs interactively
-
-```
+MIT License. See [LICENSE](LICENSE) for details.
---
diff --git a/docs/skill-anatomy.md b/docs/skill-anatomy.md
index 0a8e6b1c..62329918 100644
--- a/docs/skill-anatomy.md
+++ b/docs/skill-anatomy.md
@@ -21,10 +21,32 @@ Prefix all skill directories with `flow-code-`. Main file is always `SKILL.md` (
---
name: flow-code-
description: Use when [triggering conditions and symptoms only]
+# --- Optional fields (all backward-compatible) ---
+# allowed-tools: # Tool allowlist (allowlist, not denylist)
+# - Bash
+# - Read
+# - Edit
+# - Glob
+# - Grep
+# version: 1.0.0 # SemVer skill version
+# model: sonnet # LLM model override (haiku|sonnet|opus|inherit)
+# preamble-tier: 1 # Startup cost: 1=none, 2=light, 3=heavy
+# voice-triggers: # Speech-to-text aliases for discovery
+# - "flow plan"
+# - "make a plan"
+# user-invocable: false # Hide from / menu (background knowledge only)
+# argument-hint: "" # Autocomplete hint shown in / menu
+# context: fork # Run in isolated subagent context
+# agent: Explore # Subagent type when context: fork
+# effort: medium # Model reasoning effort (low|medium|high|max)
+# hooks: { pre-tool-call: ... } # Lifecycle hooks scoped to this skill
+# paths: "*.rs,*.toml" # Glob patterns limiting auto-activation
+# shell: bash # Shell for DCI blocks (bash|powershell)
---
```
-**Rules:**
+### Required Fields
+
- `name`: Lowercase, hyphen-separated. Must match directory name. Always starts with `flow-code-`.
- `description`: Starts with "Use when...". Max 500 characters. Third person.
- Include: triggering conditions, symptoms, contexts.
@@ -32,6 +54,28 @@ description: Use when [triggering conditions and symptoms only]
**Why:** Descriptions are injected into system prompts for skill discovery. If the description contains process steps, agents follow the summary and skip the actual skill content.
+### Optional Fields Reference
+
+All optional fields are backward-compatible. Omitting them changes nothing for existing skills.
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `allowed-tools` | list | Tool allowlist — tools permitted without prompts when skill is active. Valid: `Read`, `Write`, `Edit`, `Bash`, `Glob`, `Grep`, `WebFetch`, `WebSearch`, `Task`, `TodoWrite`, `NotebookEdit`, `AskUserQuestion`, `Skill`. Bash supports patterns: `Bash(cargo:*)` |
+| `version` | string | SemVer version (e.g., `1.0.0`). Useful for tracking skill evolution and coordinating updates across plugins |
+| `model` | string | LLM model override: `haiku`, `sonnet`, `opus`, or `inherit` (default). Use sparingly — most skills should inherit the session model |
+| `preamble-tier` | integer | Startup cost indicator: `1` = no preamble (instant), `2` = light preamble (env detection), `3` = heavy preamble (network, builds). Helps agents estimate activation cost |
+| `voice-triggers` | list | Speech-to-text aliases for discovery. Handles common STT misheard variants (e.g., `"flow plan"` for `/flow-code:plan`). Listed in the description at render time |
+| `user-invocable` | boolean | Set `false` to hide from `/` menu. Skill becomes background knowledge only, intended for agent preloading |
+| `argument-hint` | string | Autocomplete hint shown in the `/` menu (e.g., `[epic-id]`, ``) |
+| `context` | string | Set to `fork` to run the skill in an isolated subagent context |
+| `agent` | string | Subagent type when `context: fork` is set (default: `general-purpose`) |
+| `effort` | string | Override model reasoning effort: `low`, `medium`, `high`, `max` |
+| `hooks` | object | Lifecycle hooks scoped to this skill (pre-tool-call, post-tool-call, etc.) |
+| `paths` | string/list | Glob patterns limiting auto-activation. Accepts comma-separated string or YAML list |
+| `shell` | string | Shell for DCI (`` !`command` ``) blocks: `bash` (default) or `powershell` |
+
+**Key difference from agents:** Skills use `allowed-tools` (allowlist) while agents use `disallowedTools` (denylist). The `effort` and `maxTurns` fields originated as agent-only but `effort` is now available for skills too.
+
## Required Sections
```markdown
@@ -161,6 +205,7 @@ The `flow-code-debug` skill (`skills/flow-code-debug/SKILL.md`) is the reference
- [ ] Directory created as `skills/flow-code-/`
- [ ] SKILL.md has valid YAML frontmatter with `name` and `description`
- [ ] Description starts with "Use when..." (no workflow summary)
+- [ ] Optional fields (if used) are valid: `allowed-tools` lists real tools, `version` is SemVer, `model` is a known alias
- [ ] All six required sections present (Overview, When to Use, Core Process, Common Rationalizations, Red Flags, Verification)
- [ ] Rationalizations table has 3+ entries with factual rebuttals
- [ ] Red flags list has observable symptoms (not vague advice)
diff --git a/scripts/gen-platform.sh b/scripts/gen-platform.sh
new file mode 100755
index 00000000..bacf2ba1
--- /dev/null
+++ b/scripts/gen-platform.sh
@@ -0,0 +1,455 @@
+#!/usr/bin/env bash
+# gen-platform.sh — Generate cross-platform adapter files for flow-code.
+# Usage: bash scripts/gen-platform.sh
+# Idempotent: safe to re-run, overwrites cleanly.
+# Dependencies: bash, jq
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PLUGIN_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
+CLAUDE_MD="$PLUGIN_ROOT/CLAUDE.md"
+PLUGIN_JSON="$PLUGIN_ROOT/.claude-plugin/plugin.json"
+
+# --- helpers ---
+
+die() { printf 'Error: %s\n' "$1" >&2; exit 1; }
+
+require_file() {
+ [ -f "$1" ] || die "Required file not found: $1"
+}
+
+ensure_dir() {
+ mkdir -p "$1"
+}
+
+plugin_name() {
+ jq -r '.name // "flow-code"' "$PLUGIN_JSON"
+}
+
+plugin_version() {
+ jq -r '.version // "unknown"' "$PLUGIN_JSON"
+}
+
+plugin_description() {
+ jq -r '.description // ""' "$PLUGIN_JSON"
+}
+
+# Collect skill names and descriptions from SKILL.md frontmatter.
+# Outputs: namedescription per line.
+collect_skills() {
+ local skills_dir="$PLUGIN_ROOT/skills"
+ [ -d "$skills_dir" ] || return 0
+ for skill_dir in "$skills_dir"/*/; do
+ local skill_md="$skill_dir/SKILL.md"
+ [ -f "$skill_md" ] || continue
+ local name="" desc=""
+ local in_frontmatter=0
+ while IFS= read -r line; do
+ if [[ "$in_frontmatter" -eq 0 && "$line" == "---" ]]; then
+ in_frontmatter=1
+ continue
+ fi
+ if [[ "$in_frontmatter" -eq 1 && "$line" == "---" ]]; then
+ break
+ fi
+ if [[ "$in_frontmatter" -eq 1 ]]; then
+ if [[ "$line" =~ ^name:\ *(.*) ]]; then
+ name="${BASH_REMATCH[1]}"
+ # Strip quotes
+ name="${name#\"}"
+ name="${name%\"}"
+ name="${name#\'}"
+ name="${name%\'}"
+ fi
+ if [[ "$line" =~ ^description:\ *(.*) ]]; then
+ desc="${BASH_REMATCH[1]}"
+ desc="${desc#\"}"
+ desc="${desc%\"}"
+ desc="${desc#\'}"
+ desc="${desc%\'}"
+ fi
+ fi
+ done < "$skill_md"
+ [ -n "$name" ] || name="$(basename "$skill_dir")"
+ printf '%s\t%s\n' "$name" "$desc"
+ done
+}
+
+# Collect agent names and descriptions from agent .md frontmatter.
+collect_agents() {
+ local agents_dir="$PLUGIN_ROOT/agents"
+ [ -d "$agents_dir" ] || return 0
+ for agent_md in "$agents_dir"/*.md; do
+ [ -f "$agent_md" ] || continue
+ local name="" desc=""
+ local in_frontmatter=0
+ while IFS= read -r line; do
+ if [[ "$in_frontmatter" -eq 0 && "$line" == "---" ]]; then
+ in_frontmatter=1
+ continue
+ fi
+ if [[ "$in_frontmatter" -eq 1 && "$line" == "---" ]]; then
+ break
+ fi
+ if [[ "$in_frontmatter" -eq 1 ]]; then
+ if [[ "$line" =~ ^name:\ *(.*) ]]; then
+ name="${BASH_REMATCH[1]}"
+ name="${name#\"}"
+ name="${name%\"}"
+ name="${name#\'}"
+ name="${name%\'}"
+ fi
+ if [[ "$line" =~ ^description:\ *(.*) ]]; then
+ desc="${BASH_REMATCH[1]}"
+ desc="${desc#\"}"
+ desc="${desc%\"}"
+ desc="${desc#\'}"
+ desc="${desc%\'}"
+ fi
+ fi
+ done < "$agent_md"
+ [ -n "$name" ] || name="$(basename "$agent_md" .md)"
+ printf '%s\t%s\n' "$name" "$desc"
+ done
+}
+
+# Collect command names and descriptions from command .md files.
+collect_commands() {
+ local cmds_dir="$PLUGIN_ROOT/commands/flow-code"
+ [ -d "$cmds_dir" ] || return 0
+ for cmd_md in "$cmds_dir"/*.md; do
+ [ -f "$cmd_md" ] || continue
+ local name="" desc=""
+ local in_frontmatter=0
+ while IFS= read -r line; do
+ if [[ "$in_frontmatter" -eq 0 && "$line" == "---" ]]; then
+ in_frontmatter=1
+ continue
+ fi
+ if [[ "$in_frontmatter" -eq 1 && "$line" == "---" ]]; then
+ break
+ fi
+ if [[ "$in_frontmatter" -eq 1 ]]; then
+ if [[ "$line" =~ ^name:\ *(.*) ]]; then
+ name="${BASH_REMATCH[1]}"
+ name="${name#\"}"
+ name="${name%\"}"
+ name="${name#\'}"
+ name="${name%\'}"
+ fi
+ if [[ "$line" =~ ^description:\ *(.*) ]]; then
+ desc="${BASH_REMATCH[1]}"
+ desc="${desc#\"}"
+ desc="${desc%\"}"
+ desc="${desc#\'}"
+ desc="${desc%\'}"
+ fi
+ fi
+ done < "$cmd_md"
+ [ -n "$name" ] || name="$(basename "$cmd_md" .md)"
+ printf '%s\t%s\n' "$name" "$desc"
+ done
+}
+
+# --- codex target ---
+
+generate_codex() {
+ local out_dir="$PLUGIN_ROOT/.codex"
+ ensure_dir "$out_dir"
+
+ local name version desc
+ name="$(plugin_name)"
+ version="$(plugin_version)"
+ desc="$(plugin_description)"
+
+ # Build AGENTS.md
+ {
+ cat < Auto-generated by \`scripts/gen-platform.sh codex\`. Safe to re-run.
+> Source plugin: $name v$version
+
+## Overview
+
+$desc
+
+## Setup
+
+This directory was generated from a Claude Code plugin. The canonical source of truth
+is \`CLAUDE.md\` at the repository root. Key differences for Codex:
+
+- **flowctl binary**: The task engine lives at \`bin/flowctl\`. Invoke it directly:
+ \`\`\`bash
+ ./bin/flowctl
+ \`\`\`
+- **No slash commands**: Codex does not support \`/plugin:command\` syntax. Use the
+ skill descriptions below to understand available capabilities.
+- **No hooks**: Codex does not support lifecycle hooks. Guard checks must be run manually.
+
+## Primary Workflow
+
+1. Plan: Create an epic with tasks — \`./bin/flowctl epic create "description"\`
+2. Work: Pick next task — \`./bin/flowctl next\`
+3. Complete: Mark done with evidence — \`./bin/flowctl done --summary-file --evidence-json \`
+
+## Architecture
+
+\`\`\`
+CLAUDE.md → Project instructions (read this first)
+bin/flowctl → Rust binary — single source of truth for .flow/ state
+commands/flow-code/ → Slash command definitions (Claude Code specific)
+skills/*/SKILL.md → Skill implementations (reusable across platforms)
+agents/*.md → Subagent definitions
+\`\`\`
+
+HEADER
+
+ # Commands section
+ printf '## Commands\n\n'
+ printf 'These map to Claude Code slash commands (`/flow-code:`).\n'
+ printf 'In Codex, invoke the underlying skill or flowctl command directly.\n\n'
+ printf '| Command | Description |\n'
+ printf '|---------|-------------|\n'
+ while IFS=$'\t' read -r cname cdesc; do
+ printf '| %s | %s |\n' "$cname" "$cdesc"
+ done < <(collect_commands)
+ printf '\n'
+
+ # Skills section
+ printf '## Skills\n\n'
+ printf 'Skills are loaded by the Skill tool in Claude Code. In Codex, read the\n'
+ printf 'corresponding `skills//SKILL.md` for detailed instructions.\n\n'
+ printf '| Skill | Description |\n'
+ printf '|-------|-------------|\n'
+ while IFS=$'\t' read -r sname sdesc; do
+ printf '| %s | %s |\n' "$sname" "$sdesc"
+ done < <(collect_skills)
+ printf '\n'
+
+ # Agents section
+ printf '## Agents\n\n'
+ printf 'Agents are spawned as subagents in Claude Code. In Codex, read the\n'
+ printf 'corresponding `agents/.md` for instructions.\n\n'
+ printf '| Agent | Description |\n'
+ printf '|-------|-------------|\n'
+ while IFS=$'\t' read -r aname adesc; do
+ printf '| %s | %s |\n' "$aname" "$adesc"
+ done < <(collect_agents)
+ printf '\n'
+
+ # Key design decisions — excerpt from CLAUDE.md
+ printf '## Key Design Decisions\n\n'
+ printf 'See `CLAUDE.md` for the full list. Highlights:\n\n'
+ printf -- '- flowctl outputs JSON (`--json` flag) for machine consumption\n'
+ printf -- '- Tasks follow `todo -> in_progress -> done` state machine\n'
+ printf -- '- Evidence-based completion: `flowctl done` requires `--summary-file` and `--evidence-json`\n'
+ printf -- '- File locking prevents concurrent edits in Teams mode\n'
+ printf -- '- Three-layer quality: guard (lint/test) + plan-review + adversarial review\n'
+
+ } > "$out_dir/AGENTS.md"
+
+ printf 'Generated %s\n' "$out_dir/AGENTS.md"
+}
+
+# --- cursor target ---
+
+generate_cursor() {
+ local out_dir="$PLUGIN_ROOT/.cursor/rules"
+ ensure_dir "$out_dir"
+
+ local name version
+ name="$(plugin_name)"
+ version="$(plugin_version)"
+
+ # Rule 1: Project overview from CLAUDE.md
+ {
+ cat <<'RULE_HEADER'
+---
+description: Flow-code project overview and architecture
+globs:
+alwaysApply: true
+---
+
+RULE_HEADER
+ # Include CLAUDE.md content directly
+ cat "$CLAUDE_MD"
+ } > "$out_dir/flow-code-overview.mdc"
+ printf 'Generated %s\n' "$out_dir/flow-code-overview.mdc"
+
+ # Rule 2: flowctl usage
+ {
+ cat <<'RULE_HEADER'
+---
+description: How to use the flowctl CLI for task management
+globs:
+ - "flowctl/**"
+ - ".flow/**"
+ - "scripts/*.sh"
+alwaysApply: false
+---
+
+RULE_HEADER
+ cat <<'BODY'
+# flowctl CLI Reference
+
+flowctl is the Rust binary that manages `.flow/` state. Always invoke from the repo root:
+
+```bash
+./bin/flowctl
+```
+
+## Common Commands
+
+- `epic create "description"` — Create a new epic
+- `epic list` — List all epics
+- `tasks --epic ` — List tasks for an epic
+- `next [--epic ]` — Get the next ready task
+- `start ` — Mark task as in-progress
+- `done --summary-file --evidence-json ` — Complete a task
+- `restart ` — Reset a task and cascade to dependents
+- `task skip --reason "why"` — Skip a task
+- `task split --titles "A|B|C" --chain` — Split into sub-tasks
+- `lock --task --files ` — Acquire file locks
+- `unlock --task ` — Release file locks
+- `guard` — Run lint/type/test checks
+- `codex adversarial --base main` — Adversarial review via Codex
+- `status --interrupted` — Check for unfinished work
+
+## State Machine
+
+Tasks: `todo` -> `in_progress` -> `done` (with `blocked` and `skipped` side-states)
+
+## Output
+
+Use `--json` for machine-readable JSON output.
+BODY
+ } > "$out_dir/flow-code-flowctl.mdc"
+ printf 'Generated %s\n' "$out_dir/flow-code-flowctl.mdc"
+
+ # Rule 3: Skills index
+ {
+ cat <<'RULE_HEADER'
+---
+description: Flow-code available skills and their purposes
+globs:
+ - "skills/**"
+alwaysApply: false
+---
+
+RULE_HEADER
+ printf '# Flow-code Skills\n\n'
+ printf 'Each skill lives in `skills//SKILL.md`. Read the SKILL.md for full instructions.\n\n'
+ printf '| Skill | Description |\n'
+ printf '|-------|-------------|\n'
+ while IFS=$'\t' read -r sname sdesc; do
+ printf '| %s | %s |\n' "$sname" "$sdesc"
+ done < <(collect_skills)
+ } > "$out_dir/flow-code-skills.mdc"
+ printf 'Generated %s\n' "$out_dir/flow-code-skills.mdc"
+
+ # Rule 4: Agents index
+ {
+ cat <<'RULE_HEADER'
+---
+description: Flow-code available agents and their purposes
+globs:
+ - "agents/**"
+alwaysApply: false
+---
+
+RULE_HEADER
+ printf '# Flow-code Agents\n\n'
+ printf 'Each agent lives in `agents/.md`. Read the file for full instructions.\n\n'
+ printf '| Agent | Description |\n'
+ printf '|-------|-------------|\n'
+ while IFS=$'\t' read -r aname adesc; do
+ printf '| %s | %s |\n' "$aname" "$adesc"
+ done < <(collect_agents)
+ } > "$out_dir/flow-code-agents.mdc"
+ printf 'Generated %s\n' "$out_dir/flow-code-agents.mdc"
+
+ # Rule 5: Code quality
+ {
+ cat <<'RULE_HEADER'
+---
+description: Code quality and testing rules for flow-code
+globs:
+ - "flowctl/**"
+ - "scripts/**"
+ - "hooks/**"
+alwaysApply: false
+---
+
+RULE_HEADER
+ cat <<'BODY'
+# Code Quality Rules
+
+## Build and Test
+
+```bash
+cd flowctl && cargo build --release && cargo test --all
+```
+
+## Validation
+
+```bash
+python3 -c "import json; json.load(open('hooks/hooks.json'))"
+```
+
+## Testing
+
+```bash
+bash scripts/smoke_test.sh # flowctl core
+bash scripts/ci_test.sh # full CI
+bash scripts/teams_e2e_test.sh # Teams file locking
+```
+
+All tests create temp directories and clean up. Must NOT run from the plugin repo root.
+
+## Files to Never Commit
+
+- `ref/` — reference/backup repos
+- `*.upstream` — upstream backup files
+- `.tasks/` — runtime state
+- `__pycache__/` — Python cache
+- `.flow/` — per-project task state
+BODY
+ } > "$out_dir/flow-code-quality.mdc"
+ printf 'Generated %s\n' "$out_dir/flow-code-quality.mdc"
+}
+
+# --- main ---
+
+main() {
+ if [ $# -lt 1 ]; then
+ cat >&2 <
+
+Targets:
+ codex Generate .codex/ directory with AGENTS.md
+ cursor Generate .cursor/rules/ with .mdc rules files
+USAGE
+ exit 1
+ fi
+
+ require_file "$CLAUDE_MD"
+ require_file "$PLUGIN_JSON"
+
+ case "$1" in
+ codex)
+ generate_codex
+ ;;
+ cursor)
+ generate_cursor
+ ;;
+ *)
+ die "Unknown target: $1 (expected 'codex' or 'cursor')"
+ ;;
+ esac
+
+ printf 'Done.\n'
+}
+
+main "$@"
diff --git a/scripts/setup-hooks.sh b/scripts/setup-hooks.sh
new file mode 100755
index 00000000..94db9b38
--- /dev/null
+++ b/scripts/setup-hooks.sh
@@ -0,0 +1,34 @@
+#!/usr/bin/env bash
+# setup-hooks.sh — Install git hooks by symlinking from scripts/ to .git/hooks/
+set -euo pipefail
+
+ROOT="$(cd "$(dirname "$0")/.." && pwd)"
+HOOKS_DIR="$ROOT/.git/hooks"
+
+if [[ ! -d "$ROOT/.git" ]]; then
+ echo "ERROR: not a git repository (no .git/ at $ROOT)" >&2
+ exit 1
+fi
+
+mkdir -p "$HOOKS_DIR"
+
+# Symlink pre-commit hook
+SOURCE="$ROOT/scripts/pre-commit.sh"
+TARGET="$HOOKS_DIR/pre-commit"
+
+if [[ ! -f "$SOURCE" ]]; then
+ echo "ERROR: $SOURCE not found" >&2
+ exit 1
+fi
+
+if [[ -L "$TARGET" ]]; then
+ echo "pre-commit hook already symlinked, updating..."
+ rm "$TARGET"
+elif [[ -f "$TARGET" ]]; then
+ echo "WARNING: existing pre-commit hook found, backing up to pre-commit.bak"
+ mv "$TARGET" "$TARGET.bak"
+fi
+
+ln -s "$SOURCE" "$TARGET"
+chmod +x "$TARGET"
+echo "Installed pre-commit hook: $TARGET -> $SOURCE"
diff --git a/scripts/validate-skills.sh b/scripts/validate-skills.sh
new file mode 100755
index 00000000..20a47d29
--- /dev/null
+++ b/scripts/validate-skills.sh
@@ -0,0 +1,142 @@
+#!/usr/bin/env bash
+# validate-skills.sh — validate SKILL.md files against skill-anatomy.md schema
+# Exit 0 = all pass, 1 = failures found
+set -euo pipefail
+
+# Resolve plugin root (same pattern as other scripts)
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PLUGIN_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
+
+SKILLS_DIR="$PLUGIN_ROOT/skills"
+
+pass_count=0
+fail_count=0
+warn_count=0
+total=0
+
+# Required frontmatter fields
+REQUIRED_FM_FIELDS="name description"
+
+# Required ## sections per docs/skill-anatomy.md
+REQUIRED_SECTIONS="Overview|When to Use|Core Process|Common Rationalizations|Red Flags|Verification"
+
+MAX_LINES=500
+
+validate_skill() {
+ local skill_file="$1"
+ local skill_dir
+ skill_dir="$(basename "$(dirname "$skill_file")")"
+ local has_error=0
+ local error_msgs=""
+ local warn_msgs=""
+
+ # --- Check YAML frontmatter exists ---
+ local first_line
+ first_line="$(head -1 "$skill_file")"
+ if [[ "$first_line" != "---" ]]; then
+ has_error=1
+ error_msgs=" ✗ missing YAML frontmatter"
+ total=$((total + 1))
+ fail_count=$((fail_count + 1))
+ echo "FAIL ${skill_dir}"
+ echo "$error_msgs"
+ return
+ fi
+
+ # Extract frontmatter (between first and second ---)
+ local fm
+ fm="$(awk 'BEGIN{n=0} /^---$/{n++; if(n==2) exit; next} n==1{print}' "$skill_file")"
+
+ if [[ -z "$fm" ]]; then
+ has_error=1
+ total=$((total + 1))
+ fail_count=$((fail_count + 1))
+ echo "FAIL ${skill_dir}"
+ echo " ✗ empty or malformed YAML frontmatter"
+ return
+ fi
+
+ # --- Check required frontmatter fields ---
+ for field in $REQUIRED_FM_FIELDS; do
+ if ! echo "$fm" | grep -qE "^${field}:"; then
+ has_error=1
+ error_msgs="${error_msgs} ✗ missing frontmatter field: ${field}
+"
+ fi
+ done
+
+ # --- Check name field value ---
+ local name_val
+ name_val="$(echo "$fm" | grep -E '^name:' | head -1 | sed 's/^name:[[:space:]]*//' | tr -d '"' | tr -d "'")"
+ if [[ -n "$name_val" && "$name_val" != "$skill_dir" ]]; then
+ warn_msgs="${warn_msgs} ⚠ name '${name_val}' does not match directory '${skill_dir}'
+"
+ fi
+
+ # --- Check description starts with "Use when" ---
+ local desc_val
+ desc_val="$(echo "$fm" | grep -E '^description:' | head -1 | sed 's/^description:[[:space:]]*//' | tr -d '"' | tr -d "'")"
+ if [[ -n "$desc_val" ]]; then
+ if ! echo "$desc_val" | grep -q '^Use when'; then
+ warn_msgs="${warn_msgs} ⚠ description should start with 'Use when...'
+"
+ fi
+ fi
+
+ # --- Check required ## sections (warn, not fail) ---
+ local body
+ body="$(awk 'BEGIN{n=0} /^---$/{n++; next} n>=2{print}' "$skill_file")"
+
+ local IFS='|'
+ for section in $REQUIRED_SECTIONS; do
+ if ! echo "$body" | grep -qiE "^## .*${section}"; then
+ warn_msgs="${warn_msgs} ⚠ missing recommended section: ## ${section}
+"
+ fi
+ done
+ unset IFS
+
+ # --- File size warning ---
+ local line_count
+ line_count="$(wc -l < "$skill_file" | tr -d ' ')"
+ if [[ "$line_count" -gt "$MAX_LINES" ]]; then
+ warn_msgs="${warn_msgs} ⚠ ${line_count} lines (>${MAX_LINES} recommended max)
+"
+ fi
+
+ # --- Print result ---
+ total=$((total + 1))
+
+ if [[ "$has_error" -eq 1 ]]; then
+ fail_count=$((fail_count + 1))
+ echo "FAIL ${skill_dir}"
+ [[ -n "$error_msgs" ]] && printf '%s' "$error_msgs"
+ else
+ pass_count=$((pass_count + 1))
+ echo "PASS ${skill_dir}"
+ fi
+
+ if [[ -n "$warn_msgs" ]]; then
+ local wc_lines
+ wc_lines="$(echo "$warn_msgs" | grep -c '⚠' || true)"
+ warn_count=$((warn_count + wc_lines))
+ printf '%s' "$warn_msgs"
+ fi
+}
+
+# --- Main ---
+echo "Validating skills in ${SKILLS_DIR}/"
+echo "---"
+
+for skill_file in "$SKILLS_DIR"/*/SKILL.md; do
+ [[ -f "$skill_file" ]] || continue
+ validate_skill "$skill_file"
+done
+
+echo "---"
+echo "Total: ${total} Pass: ${pass_count} Fail: ${fail_count} Warnings: ${warn_count}"
+
+if [[ "$fail_count" -gt 0 ]]; then
+ exit 1
+fi
+exit 0
diff --git a/skills/flow-code-brainstorm/SKILL.md b/skills/flow-code-brainstorm/SKILL.md
index 676bfe30..17bca786 100644
--- a/skills/flow-code-brainstorm/SKILL.md
+++ b/skills/flow-code-brainstorm/SKILL.md
@@ -2,6 +2,21 @@
name: flow-code-brainstorm
description: "Use when exploring requirements before planning. Pressure-tests ideas, generates approaches, and outputs a requirements doc for /flow-code:plan."
user-invocable: false
+version: 1.0.0
+preamble-tier: 2
+allowed-tools:
+ - Bash
+ - Read
+ - Write
+ - Edit
+ - Glob
+ - Grep
+ - AskUserQuestion
+ - WebSearch
+voice-triggers:
+ - "brainstorm"
+ - "explore this idea"
+ - "think through this"
---
# Flow brainstorm
diff --git a/skills/flow-code-plan/SKILL.md b/skills/flow-code-plan/SKILL.md
index cb237b15..3d4c6aaf 100644
--- a/skills/flow-code-plan/SKILL.md
+++ b/skills/flow-code-plan/SKILL.md
@@ -2,6 +2,21 @@
name: flow-code-plan
description: "Use when planning features or designing implementation. Triggers on /flow-code:plan with text descriptions or Flow IDs."
user-invocable: false
+version: 1.0.0
+preamble-tier: 2
+allowed-tools:
+ - Bash
+ - Read
+ - Write
+ - Edit
+ - Glob
+ - Grep
+ - AskUserQuestion
+ - Skill
+voice-triggers:
+ - "flow plan"
+ - "make a plan"
+ - "plan this"
---
# Flow plan
diff --git a/skills/flow-code-work/SKILL.md b/skills/flow-code-work/SKILL.md
index 3df07e43..7937b7a2 100644
--- a/skills/flow-code-work/SKILL.md
+++ b/skills/flow-code-work/SKILL.md
@@ -2,6 +2,22 @@
name: flow-code-work
description: "Use when implementing a plan or working through a spec. Triggers on /flow-code:work with Flow IDs."
user-invocable: false
+version: 1.0.0
+preamble-tier: 2
+allowed-tools:
+ - Bash
+ - Read
+ - Write
+ - Edit
+ - Glob
+ - Grep
+ - AskUserQuestion
+ - Skill
+ - Task
+voice-triggers:
+ - "flow work"
+ - "start working"
+ - "execute the plan"
---
# Flow work